<h2>Impotando dependências</h2>

In [62]:
import pandas as pd
import csv
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score,confusion_matrix,classification_report
from sklearn.preprocessing import Normalizer,MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from IPython.display import Image  
from sklearn import tree
import pydotplus
import numpy as np

<h2>Função de scores</h2>

In [69]:
def scores(y_test,prediction):
#     print("Accuracy: " + str(accuracy_score(y_test, prediction) * 100) + "%")
#     print("Precision: " + str(precision_score(y_test, prediction) * 100) + "%")
#     print("F1 score: " + str(f1_score(y_test, prediction) * 100) + "%")
#     print("Recall: " + str(recall_score(y_test, prediction) * 100) + "%")
    scores = []
    scores.append(accuracy_score(y_test, prediction) * 100)
    scores.append(precision_score(y_test, prediction) * 100)
    scores.append(f1_score(y_test, prediction) * 100)
    scores.append(recall_score(y_test, prediction) * 100) 
    #print(confusion_matrix(y_test,prediction))
    print(classification_report(y_test, prediction, target_names=["red wine","white wine"]))
    return scores

<h3>Função para salvar imagens das árvores de decisão</h3>

In [39]:
def save_decision_tree(i,dados,classes):
    dot_data = tree.export_graphviz(clf, out_file=None, 
                                feature_names=dados.columns[0:12],  
                                class_names=classes)

    # Draw graph
    graph = pydotplus.graph_from_dot_data(dot_data)  
    graph.write_png("tree_images/wines"+str(i)+".png")

<h2>Carregando dataset</h2>

In [64]:
#carregando dataset de vinhos brancos e vermelhos
wines = pd.read_csv('wines.csv',sep=",")
#separando dados e saidas
X = wines.iloc[:,0:12]
Y = wines.iloc[:,-1]

<h2>Normalizando dados</h2>

<h3>Pela norma l1</h3>

In [65]:
normalizer = Normalizer(norm='l1')
winesl1 = X.copy()
winesl1.iloc[0:,0:12] = normalizer.transform(X)
winesl1.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,0.030496,0.000659,0.001236,0.056459,0.000231,0.135996,0.692343,0.004114,0.011951,0.001813,0.035853,0.028848
1,0.037031,0.002051,0.001766,0.077195,0.000302,0.113942,0.64377,0.005671,0.018231,0.00319,0.062668,0.034182
2,0.086411,0.003227,0.00406,0.024986,0.000812,0.176987,0.447672,0.010353,0.03446,0.008016,0.130137,0.072877
3,0.036697,0.003556,0.002356,0.009061,0.000553,0.16763,0.693174,0.004517,0.014543,0.003126,0.042134,0.022653
4,0.042753,0.001579,0.002499,0.006577,0.000178,0.203899,0.591964,0.006507,0.021311,0.002368,0.080902,0.039464


In [66]:
# Create decision tree classifer object using gini
clf = DecisionTreeClassifier(criterion='gini', random_state=0)

<h3>Kfold</h3>

In [67]:
def cross_validation(dados,folds=10):
    kf = KFold(n_splits=folds)
    i = 0
    for train_index, test_index in kf.split(dados):
        X_train, X_test = dados.iloc[train_index], dados.iloc[test_index]
        y_train, y_test = Y[train_index], Y[test_index]
        model = clf.fit(X_train,y_train)
        prediction = model.predict(X_test)
        classes =[str(x) for x in list(model.classes_)]
        #save_decision_tree(i,dados,classes)
        i+=1
        print(scores(y_test,prediction))

In [70]:
cross_validation(winesl1,12)

              precision    recall  f1-score   support

    red wine       0.96      0.96      0.96       135
  white wine       0.99      0.99      0.99       407

   micro avg       0.98      0.98      0.98       542
   macro avg       0.97      0.97      0.97       542
weighted avg       0.98      0.98      0.98       542

[97.97047970479706, 98.76847290640394, 98.6469864698647, 98.52579852579852]
              precision    recall  f1-score   support

    red wine       0.96      0.98      0.97       138
  white wine       0.99      0.99      0.99       404

   micro avg       0.99      0.99      0.99       542
   macro avg       0.98      0.98      0.98       542
weighted avg       0.99      0.99      0.99       542

[98.5239852398524, 99.25373134328358, 99.00744416873448, 98.76237623762376]
              precision    recall  f1-score   support

    red wine       0.96      0.97      0.96       122
  white wine       0.99      0.99      0.99       420

   micro avg       0.98      0

<h3>Pela norma l2</h3>

In [71]:
normalizer = Normalizer(norm='l2')
winesl2 = X.copy()
winesl2.iloc[0:,0:12] = normalizer.transform(X)
winesl2.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,0.042946,0.000929,0.001741,0.079509,0.000325,0.191518,0.975001,0.005793,0.01683,0.002554,0.050491,0.040625
1,0.055812,0.003091,0.002662,0.116346,0.000455,0.171728,0.970265,0.008547,0.027477,0.004808,0.094451,0.051519
2,0.16835,0.006288,0.00791,0.04868,0.001582,0.344813,0.872175,0.02017,0.067137,0.015618,0.253539,0.141982
3,0.051258,0.004968,0.003291,0.012656,0.000772,0.234141,0.968204,0.006309,0.020313,0.004366,0.058852,0.031641
4,0.06739,0.002488,0.00394,0.010368,0.00028,0.321397,0.933089,0.010256,0.033591,0.003732,0.127522,0.062206


In [72]:
cross_validation(winesl2,12)

              precision    recall  f1-score   support

    red wine       0.99      0.97      0.98       135
  white wine       0.99      1.00      0.99       407

   micro avg       0.99      0.99      0.99       542
   macro avg       0.99      0.98      0.99       542
weighted avg       0.99      0.99      0.99       542

[99.07749077490774, 99.02439024390245, 99.38800489596082, 99.75429975429975]
              precision    recall  f1-score   support

    red wine       0.98      0.97      0.97       138
  white wine       0.99      0.99      0.99       404

   micro avg       0.99      0.99      0.99       542
   macro avg       0.98      0.98      0.98       542
weighted avg       0.99      0.99      0.99       542

[98.70848708487084, 99.01234567901234, 99.13473423980223, 99.25742574257426]
              precision    recall  f1-score   support

    red wine       0.97      0.98      0.97       122
  white wine       0.99      0.99      0.99       420

   micro avg       0.99     

<h3>Pela norma max</h3>

In [73]:
normalizer = Normalizer(norm='max')
winesmax = X.copy()
winesmax.iloc[0:,0:12] = normalizer.transform(X)
winesmax.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,0.044048,0.000952,0.001786,0.081548,0.000333,0.196429,1.0,0.005942,0.017262,0.002619,0.051786,0.041667
1,0.057522,0.003186,0.002743,0.119912,0.000469,0.176991,1.0,0.008809,0.028319,0.004956,0.097345,0.053097
2,0.193023,0.007209,0.00907,0.055814,0.001814,0.395349,1.0,0.023127,0.076977,0.017907,0.290698,0.162791
3,0.052941,0.005131,0.003399,0.013072,0.000797,0.24183,1.0,0.006516,0.02098,0.00451,0.060784,0.03268
4,0.072222,0.002667,0.004222,0.011111,0.0003,0.344444,1.0,0.010992,0.036,0.004,0.136667,0.066667


In [74]:
cross_validation(winesmax,12)

              precision    recall  f1-score   support

    red wine       0.98      0.97      0.98       135
  white wine       0.99      1.00      0.99       407

   micro avg       0.99      0.99      0.99       542
   macro avg       0.99      0.98      0.99       542
weighted avg       0.99      0.99      0.99       542

[98.8929889298893, 99.02200488997555, 99.26470588235293, 99.5085995085995]
              precision    recall  f1-score   support

    red wine       0.97      0.98      0.97       138
  white wine       0.99      0.99      0.99       404

   micro avg       0.99      0.99      0.99       542
   macro avg       0.98      0.98      0.98       542
weighted avg       0.99      0.99      0.99       542

[98.70848708487084, 99.25558312655087, 99.13258983890955, 99.00990099009901]
              precision    recall  f1-score   support

    red wine       0.97      0.98      0.97       122
  white wine       0.99      0.99      0.99       420

   micro avg       0.99      0

<h3>Por MinMaxScaler</h3>

In [76]:
scaller = MinMaxScaler(feature_range=(0, 12))
winesminmax = X.copy()
winesminmax.iloc[0:,0:12] = scaller.fit_transform(X)
winesminmax.head()

  return self.partial_fit(X, y)


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,3.570248,0.64,2.168675,2.411043,0.936877,1.333333,4.479263,2.577212,1.674419,1.483146,1.217391,8.0
1,2.677686,2.24,2.240964,2.383436,0.877076,0.791667,2.958525,1.927126,4.465116,2.292135,5.217391,6.0
2,4.46281,1.84,2.819277,0.331288,1.375415,0.666667,1.023041,1.695778,5.488372,3.707865,7.826087,8.0
3,4.264463,5.64,3.759036,0.257669,2.252492,1.5,4.064516,2.264893,4.55814,3.168539,2.26087,4.0
4,2.677686,1.28,2.746988,0.07362,0.358804,1.25,2.322581,0.497397,4.837209,0.94382,7.478261,6.0


In [77]:
cross_validation(winesminmax,12)

              precision    recall  f1-score   support

    red wine       0.98      0.98      0.98       135
  white wine       0.99      0.99      0.99       407

   micro avg       0.99      0.99      0.99       542
   macro avg       0.99      0.99      0.99       542
weighted avg       0.99      0.99      0.99       542

[98.8929889298893, 99.26289926289927, 99.26289926289927, 99.26289926289927]
              precision    recall  f1-score   support

    red wine       0.99      0.97      0.98       138
  white wine       0.99      1.00      0.99       404

   micro avg       0.99      0.99      0.99       542
   macro avg       0.99      0.98      0.99       542
weighted avg       0.99      0.99      0.99       542

[98.8929889298893, 99.01477832512316, 99.25925925925925, 99.5049504950495]
              precision    recall  f1-score   support

    red wine       0.98      0.98      0.98       122
  white wine       1.00      0.99      0.99       420

   micro avg       0.99      0.