In [1]:
#1. Importar as bibliotecas necessárias

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.tree import export_graphviz
import graphviz


In [2]:
#2. Importar os dados e remover dados nulos

tabela = pd.read_csv("Dados_Exercício_3.csv")
tabela = tabela.dropna()
tabela.head()


Unnamed: 0.1,Unnamed: 0,GoodStudent,Age,SocioEcon,RiskAversion,VehicleYear,ThisCarDam,RuggedAuto,Accident,MakeModel,...,HomeBase,AntiTheft,PropCost,OtherCarCost,OtherCar,MedCost,Cushioning,Airbag,ILiCost,DrivHist
0,1,False,Adult,Prole,Adventurous,Older,Moderate,EggShell,Mild,Economy,...,City,False,TenThou,Thousand,True,Thousand,Poor,False,Thousand,Many
4,5,False,Adolescent,Prole,Normal,Older,Moderate,Football,Moderate,Economy,...,City,False,TenThou,Thousand,False,Thousand,Fair,False,Thousand,Many
5,6,False,Adult,UpperMiddle,Normal,Current,Moderate,EggShell,Moderate,SportsCar,...,Suburb,True,HundredThou,HundredThou,True,TenThou,Poor,True,Thousand,Many
6,7,False,Senior,UpperMiddle,Normal,Current,Mild,Football,Mild,Economy,...,Secure,True,TenThou,Thousand,False,Thousand,Excellent,True,Thousand,Many
7,8,False,Adult,Prole,Normal,Older,Severe,EggShell,Severe,Economy,...,City,False,Million,HundredThou,True,Thousand,Fair,False,Thousand,Many


In [3]:
#3. Remover colunas inúteis para a previsão

tabela = tabela.drop(columns = ["Unnamed: 0"])
tabela.head()


Unnamed: 0,GoodStudent,Age,SocioEcon,RiskAversion,VehicleYear,ThisCarDam,RuggedAuto,Accident,MakeModel,DrivQuality,...,HomeBase,AntiTheft,PropCost,OtherCarCost,OtherCar,MedCost,Cushioning,Airbag,ILiCost,DrivHist
0,False,Adult,Prole,Adventurous,Older,Moderate,EggShell,Mild,Economy,Poor,...,City,False,TenThou,Thousand,True,Thousand,Poor,False,Thousand,Many
4,False,Adolescent,Prole,Normal,Older,Moderate,Football,Moderate,Economy,Poor,...,City,False,TenThou,Thousand,False,Thousand,Fair,False,Thousand,Many
5,False,Adult,UpperMiddle,Normal,Current,Moderate,EggShell,Moderate,SportsCar,Poor,...,Suburb,True,HundredThou,HundredThou,True,TenThou,Poor,True,Thousand,Many
6,False,Senior,UpperMiddle,Normal,Current,Mild,Football,Mild,Economy,Poor,...,Secure,True,TenThou,Thousand,False,Thousand,Excellent,True,Thousand,Many
7,False,Adult,Prole,Normal,Older,Severe,EggShell,Severe,Economy,Poor,...,City,False,Million,HundredThou,True,Thousand,Fair,False,Thousand,Many


In [4]:
#4. Separar a variável dependente das independentes

y = tabela.iloc[:,7].values
X = tabela.iloc[:,[0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]].values


In [5]:
#5. Criar o LabelEncoder

label = LabelEncoder()
for i in range(X.shape[1]):
    if X[:,i].dtype == "object":
        X[:,i] = label.fit_transform(X[:,i])

print(X)


[[0 1 1 ... 0 3 0]
 [0 0 1 ... 0 3 0]
 [0 1 2 ... 1 3 0]
 ...
 [0 1 1 ... 1 3 0]
 [0 1 2 ... 0 3 0]
 [0 1 1 ... 0 3 0]]


In [6]:
#6. Dividir os dados em treino e teste
#X é independente
#Y é dependente

X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size = 0.3, random_state = 1)


In [17]:
#7 Criar o modelo

#modelo = DecisionTreeClassifier(random_state = 1) Modelo sem profundidade máxima

modelo = DecisionTreeClassifier(random_state = 1, max_depth = 8, max_leaf_nodes = 8) #Modelo com profundidade máxima limitada e folha máxima limitada
modelo.fit(X_treino, y_treino)


In [18]:
#8 Visualizar o modelo da árvore em uma imagem

imagem_arvore = export_graphviz(modelo, out_file = None, filled = True, feature_names = tabela.columns[:-1], class_names = True, rounded = True)

grafico = graphviz.Source(imagem_arvore)
grafico.render("Árvore_de_Decisão_Exercício_4", format = "png")


'Árvore_de_Decisão_Exercício_4.png'

In [19]:
#9 Fazer as previsões do y_teste usando os dados em X_teste

previsoes = modelo.predict(X_teste)
print(previsoes)


['Moderate' 'Severe' 'Mild' ... 'Severe' 'Severe' 'Moderate']


In [20]:
#10. Verificar a acurácia da IA
acuracia = accuracy_score(y_teste, previsoes)
precisao = precision_score(y_teste, previsoes, average = "weighted")
recall = recall_score(y_teste, previsoes, average = "weighted")
f1 = f1_score(y_teste, previsoes, average = "weighted")

print(f"{acuracia * 100:.4f}%")
print(f"{precisao:.4f}")
print(f"{recall:.4f}")
print(f"{f1:.4f}")


86.0902%
0.8639
0.8609
0.8608


In [21]:
#11. Relatório do modelo

verificacao = classification_report(y_teste, previsoes)
print(verificacao)


              precision    recall  f1-score   support

        Mild       0.96      0.84      0.90       423
    Moderate       0.78      0.77      0.78       497
      Severe       0.87      0.93      0.90       676

    accuracy                           0.86      1596
   macro avg       0.87      0.85      0.86      1596
weighted avg       0.86      0.86      0.86      1596

