In [13]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [2]:

datas = load_wine()

df_wine = pd.DataFrame(np.c_[datas['data'], datas['target']],
                       columns=np.append(datas['feature_names'], ['target']))

In [3]:
df_wine.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0.0


In [4]:
df_wine.shape

(178, 14)

In [6]:
df_wine.describe()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
count,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0
mean,13.000618,2.336348,2.366517,19.494944,99.741573,2.295112,2.02927,0.361854,1.590899,5.05809,0.957449,2.611685,746.893258,0.938202
std,0.811827,1.117146,0.274344,3.339564,14.282484,0.625851,0.998859,0.124453,0.572359,2.318286,0.228572,0.70999,314.907474,0.775035
min,11.03,0.74,1.36,10.6,70.0,0.98,0.34,0.13,0.41,1.28,0.48,1.27,278.0,0.0
25%,12.3625,1.6025,2.21,17.2,88.0,1.7425,1.205,0.27,1.25,3.22,0.7825,1.9375,500.5,0.0
50%,13.05,1.865,2.36,19.5,98.0,2.355,2.135,0.34,1.555,4.69,0.965,2.78,673.5,1.0
75%,13.6775,3.0825,2.5575,21.5,107.0,2.8,2.875,0.4375,1.95,6.2,1.12,3.17,985.0,2.0
max,14.83,5.8,3.23,30.0,162.0,3.88,5.08,0.66,3.58,13.0,1.71,4.0,1680.0,2.0


In [8]:
df_wine.dtypes

Unnamed: 0,0
alcohol,float64
malic_acid,float64
ash,float64
alcalinity_of_ash,float64
magnesium,float64
total_phenols,float64
flavanoids,float64
nonflavanoid_phenols,float64
proanthocyanins,float64
color_intensity,float64


In [9]:
for column in df_wine.columns:
  print(f'{column}: {df_wine[column].isnull().sum()}')

alcohol: 0
malic_acid: 0
ash: 0
alcalinity_of_ash: 0
magnesium: 0
total_phenols: 0
flavanoids: 0
nonflavanoid_phenols: 0
proanthocyanins: 0
color_intensity: 0
hue: 0
od280/od315_of_diluted_wines: 0
proline: 0
target: 0


In [10]:
df_wine['target'].value_counts()

Unnamed: 0_level_0,count
target,Unnamed: 1_level_1
1.0,71
0.0,59
2.0,48


In [11]:
X = df_wine.drop('target', axis=1)
y = df_wine['target']

X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.3, random_state=42)

In [14]:
for i in range(1, 4):
  model = DecisionTreeClassifier(criterion='gini', max_depth=i)
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)
  print("="*100)
  print("Profundidade da Árvore:", i)
  print("Acurácia:", accuracy_score(y_test, y_pred))
  print("="*100)
  print("Matriz de Confusão:\n", confusion_matrix(y_test, y_pred))
  print("="*100)
  print("Relatório de Classificação:\n", classification_report(y_test,
  y_pred))
  print("="*100)

Profundidade da Árvore: 1
Acurácia: 0.6111111111111112
Matriz de Confusão:
 [[17  2  0]
 [ 5 16  0]
 [14  0  0]]
Relatório de Classificação:
               precision    recall  f1-score   support

         0.0       0.47      0.89      0.62        19
         1.0       0.89      0.76      0.82        21
         2.0       0.00      0.00      0.00        14

    accuracy                           0.61        54
   macro avg       0.45      0.55      0.48        54
weighted avg       0.51      0.61      0.54        54

Profundidade da Árvore: 2
Acurácia: 0.8518518518518519
Matriz de Confusão:
 [[17  2  0]
 [ 5 16  0]
 [ 1  0 13]]
Relatório de Classificação:
               precision    recall  f1-score   support

         0.0       0.74      0.89      0.81        19
         1.0       0.89      0.76      0.82        21
         2.0       1.00      0.93      0.96        14

    accuracy                           0.85        54
   macro avg       0.88      0.86      0.86        54
weighted 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [16]:
for i in range(1,4):
  rf = RandomForestClassifier(n_estimators = 3, max_depth=i, max_features= 20, criterion='gini')
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  accuracy = accuracy_score(y_test, y_pred)
  print("="*100)
  print("Profundidade da Árvore:", i)
  print("Acurácia:", accuracy_score(y_test, y_pred))
  print("Matriz de Confusão:\n", confusion_matrix(y_test, y_pred))
  print("Relatório de Classificação:\n", classification_report(y_test,
  y_pred))
  print("="*100)

Profundidade da Árvore: 1
Acurácia: 0.8333333333333334
Matriz de Confusão:
 [[17  2  0]
 [ 5 16  0]
 [ 2  0 12]]
Relatório de Classificação:
               precision    recall  f1-score   support

         0.0       0.71      0.89      0.79        19
         1.0       0.89      0.76      0.82        21
         2.0       1.00      0.86      0.92        14

    accuracy                           0.83        54
   macro avg       0.87      0.84      0.84        54
weighted avg       0.85      0.83      0.84        54

Profundidade da Árvore: 2
Acurácia: 0.8888888888888888
Matriz de Confusão:
 [[19  0  0]
 [ 5 16  0]
 [ 1  0 13]]
Relatório de Classificação:
               precision    recall  f1-score   support

         0.0       0.76      1.00      0.86        19
         1.0       1.00      0.76      0.86        21
         2.0       1.00      0.93      0.96        14

    accuracy                           0.89        54
   macro avg       0.92      0.90      0.90        54
weighted 

In [21]:
for i in range(1,6):
  knn = KNeighborsClassifier(n_neighbors=i)
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  accuracy = accuracy_score(y_test, y_pred)
  print("="*100)
  print("Número de vizinhos:", i)
  print("Acurácia:", accuracy_score(y_test, y_pred))
  print("Matriz de Confusão:\n", confusion_matrix(y_test, y_pred))
  print("Relatório de Classificação:\n", classification_report(y_test,
  y_pred))
  print("="*100)

Número de vizinhos: 1
Acurácia: 0.7962962962962963
Matriz de Confusão:
 [[17  0  2]
 [ 3 16  2]
 [ 1  3 10]]
Relatório de Classificação:
               precision    recall  f1-score   support

         0.0       0.81      0.89      0.85        19
         1.0       0.84      0.76      0.80        21
         2.0       0.71      0.71      0.71        14

    accuracy                           0.80        54
   macro avg       0.79      0.79      0.79        54
weighted avg       0.80      0.80      0.80        54

Número de vizinhos: 2
Acurácia: 0.7037037037037037
Matriz de Confusão:
 [[17  0  2]
 [ 4 16  1]
 [ 2  7  5]]
Relatório de Classificação:
               precision    recall  f1-score   support

         0.0       0.74      0.89      0.81        19
         1.0       0.70      0.76      0.73        21
         2.0       0.62      0.36      0.45        14

    accuracy                           0.70        54
   macro avg       0.69      0.67      0.66        54
weighted avg     