In [1]:
# Estabelece a conexão com o Google Drive, para permitir a leitura do Data Frame
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
# Faz a importação das bibliotecas necessárias para o script
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import OrdinalEncoder

In [3]:
# Carrega o Data Frame de acidentes fatais na Austrália
df = pd.read_csv('/content/gdrive/My Drive/topicos-iv/resources/crash_data.csv', sep=',', low_memory=False)

In [4]:
# Remove linhas que não tenham os dados completos
df = df.dropna()

# Converte os valores da coluna State em categorias numéricas
encoder = OrdinalEncoder(categories=[['Vic', 'SA', 'Qld', 'WA', 'NSW', 'NT', 'ACT', 'Tas']])
df.loc[:, 'State'] = encoder.fit_transform(df[['State']])

# Converte os valores da coluna Dayweek em categorias numéricas
encoder = OrdinalEncoder(categories=[['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']])
df.loc[:, 'Dayweek'] = encoder.fit_transform(df[['Dayweek']])

# Converte os valores da coluna Dayweek em categorias numéricas
encoder = OrdinalEncoder(categories=[df['Time'].unique()])
df.loc[:, 'Time'] = encoder.fit_transform(df[['Time']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[['Single', 'Multiple']])
df.loc[:, 'Crash Type'] = encoder.fit_transform(df[['Crash Type']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[['No', 'Yes']])
df.loc[:, 'Bus Involvement'] = encoder.fit_transform(df[['Bus Involvement']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[['No', 'Yes']])
df.loc[:, 'Heavy Rigid Truck Involvement'] = encoder.fit_transform(df[['Heavy Rigid Truck Involvement']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[['No', 'Yes']])
df.loc[:, 'Articulated Truck Involvement'] = encoder.fit_transform(df[['Articulated Truck Involvement']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[['100', '60', '80', '50', '110', '70', '90', '130', '40', '10', '30', '25', '20', '<40', 'Unspecified', '75']])
df.loc[:, 'Speed Limit'] = encoder.fit_transform(df[['Speed Limit']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[['Motorcycle rider', 'Driver', 'Passenger', 'Pedal cyclist', 'Pedestrian', 'Other/-9', 'Motorcycle pillion passenger']])
df.loc[:, 'Road User'] = encoder.fit_transform(df[['Road User']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[['Male', 'Female']])
df.loc[:, 'Gender'] = encoder.fit_transform(df[['Gender']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[['Major Cities of Australia', 'Inner Regional Australia', 'Outer Regional Australia', 'Remote Australia', 'Very Remote Australia']])
df.loc[:, 'National Remoteness Areas'] = encoder.fit_transform(df[['National Remoteness Areas']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[df['SA4 Name 2016'].unique()])
df.loc[:, 'SA4 Name 2016'] = encoder.fit_transform(df[['SA4 Name 2016']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[df['National LGA Name 2017'].unique()])
df.loc[:, 'National LGA Name 2017'] = encoder.fit_transform(df[['National LGA Name 2017']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[df['National Road Type'].unique()])
df.loc[:, 'National Road Type'] = encoder.fit_transform(df[['National Road Type']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[df['Christmas Period'].unique()])
df.loc[:, 'Christmas Period'] = encoder.fit_transform(df[['Christmas Period']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[df['Easter Period'].unique()])
df.loc[:, 'Easter Period'] = encoder.fit_transform(df[['Easter Period']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[['17_to_25', '40_to_64', '26_to_39', '0_to_16', '65_to_74', '75_or_older']])
df.loc[:, 'Age Group'] = encoder.fit_transform(df[['Age Group']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[['Weekend', 'Weekday']])
df.loc[:, 'Day of week'] = encoder.fit_transform(df[['Day of week']])

# Converte os valores da coluna Crash Type em categorias numéricas
encoder = OrdinalEncoder(categories=[['Night', 'Day']])
df.loc[:, 'Time of day'] = encoder.fit_transform(df[['Time of day']])

print(df)

# Imprime o gráfico de correlação entre os atributos
df.iloc[:,:].corr()

      Crash ID State  Month  Year Dayweek   Time Crash Type Bus Involvement  \
5     20213034   2.0      9  2021     6.0    0.0        1.0             0.0   
9     20213092   2.0      9  2021     6.0    1.0        0.0             0.0   
10    20214053   1.0      9  2021     4.0    2.0        0.0             0.0   
11    20213178   2.0      9  2021     0.0    2.0        1.0             0.0   
13    20211068   4.0      9  2021     2.0    3.0        0.0             0.0   
...        ...   ...    ...   ...     ...    ...        ...             ...   
9091  20144083   1.0      1  2014     5.0  369.0        1.0             0.0   
9097  20145108   3.0      1  2014     3.0  520.0        0.0             0.0   
9106  20144022   1.0      1  2014     1.0   86.0        0.0             0.0   
9114  20145072   3.0      1  2014     2.0  280.0        0.0             0.0   
9116  20144007   1.0      1  2014     2.0   25.0        0.0             0.0   

     Heavy Rigid Truck Involvement Articulated Truc

Unnamed: 0,Crash ID,State,Month,Year,Dayweek,Time,Crash Type,Bus Involvement,Heavy Rigid Truck Involvement,Articulated Truck Involvement,...,Age,National Remoteness Areas,SA4 Name 2016,National LGA Name 2017,National Road Type,Christmas Period,Easter Period,Age Group,Day of week,Time of day
Crash ID,1.0,-0.093173,-0.10668,0.99613,0.014682,-0.269331,-0.0186,-0.026855,-0.027759,0.00728,...,-0.003117,-0.000736,-0.179724,-0.258746,-0.297229,-0.026154,0.020249,-0.008135,-0.004797,-0.021365
State,-0.093173,1.0,0.008716,-0.105868,0.017886,0.033087,-0.013887,0.007503,0.009267,-0.015734,...,-0.033296,0.166081,-0.089419,0.045194,0.071991,0.002199,0.012971,-0.031952,-0.01986,-0.013698
Month,-0.10668,0.008716,1.0,-0.10873,-0.004106,-0.001779,-0.014891,0.010349,-0.02127,-0.015218,...,-0.011006,0.042389,0.021141,0.026694,0.035987,0.146454,-0.090877,-8.6e-05,-0.013852,-0.007795
Year,0.99613,-0.105868,-0.10873,1.0,0.012971,-0.274122,-0.014445,-0.027327,-0.022878,0.010018,...,0.004642,-0.028345,-0.202855,-0.283863,-0.318278,-0.026892,0.017875,-0.002637,-0.003315,-0.016791
Dayweek,0.014682,0.017886,-0.004106,0.012971,1.0,0.018826,-0.017612,-0.022529,-0.003824,-0.004835,...,-0.015873,0.02519,0.032367,0.027082,0.004205,-0.016848,0.040228,-0.011513,-0.055712,-0.064016
Time,-0.269331,0.033087,-0.001779,-0.274122,0.018826,1.0,0.015661,0.037677,0.035135,-0.024118,...,-0.030625,-0.090844,0.204788,0.21967,0.151584,-0.007423,-0.01058,-0.017891,-0.00649,-0.061583
Crash Type,-0.0186,-0.013887,-0.014891,-0.014445,-0.017612,0.015661,1.0,0.07312,0.1937,0.22763,...,0.07871,-0.146991,-0.018434,-0.066841,0.032547,4.2e-05,-0.001762,0.047056,0.086033,0.176593
Bus Involvement,-0.026855,0.007503,0.010349,-0.027327,-0.022529,0.037677,0.07312,1.0,-0.033836,-0.027702,...,0.021902,-0.038027,0.024292,0.020097,0.012993,-0.013264,-0.00777,0.034921,0.030758,0.032688
Heavy Rigid Truck Involvement,-0.027759,0.009267,-0.02127,-0.022878,-0.003824,0.035135,0.1937,-0.033836,1.0,-0.021832,...,0.017719,-0.048169,0.007581,-0.023637,0.007469,-0.03316,-0.023018,8.9e-05,0.13194,0.114247
Articulated Truck Involvement,0.00728,-0.015734,-0.015218,0.010018,-0.004835,-0.024118,0.22763,-0.027702,-0.021832,1.0,...,0.013898,0.091604,-0.038294,-0.001597,0.051437,0.006378,-0.006136,-0.014129,0.103231,0.027805
