In [1]:
import pandas as pd

In [39]:
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import time

In [3]:
df_wines = pd.read_csv('completoWine.csv')

In [4]:
X = df_wines.drop('quality', axis=1)
Y = df_wines.quality

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.9)



In [5]:
df_wines.groupby('quality').size()

quality
3      30
4     216
5    2138
6    2836
7    1079
8     193
9       5
dtype: int64

In [38]:
df_scores = pd.DataFrame({})

In [36]:
MAX_DEPTH = 50
MIN_SAMPLES_SPLIT = [2,4,6,8,10,12]
MIN_SAMPLES_LEAF = [1,2,4,6,8,9]
MAX_FEATURES = [2,3,4,5,6,7,8,9,10,11,12,13]
N_ESTIMATORS=[100,200,300,400,500,600,700,800,900,1000]

In [None]:
for min_samples_split in MIN_SAMPLES_SPLIT:
    for min_samples_leaf in MIN_SAMPLES_LEAF:
        for max_feature in MAX_FEATURES:
            for n_estimators in N_ESTIMATORS:
                start = time.clock()
                decision_tree_classifier=DecisionTreeClassifier(max_depth=MAX_DEPTH, min_samples_split=min_samples_split, max_features=max_feature, min_samples_leaf=min_samples_leaf, criterion='entropy')
                clf = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=0.01, base_estimator=decision_tree_classifier, algorithm='SAMME')
                scores_train = cross_val_score(clf, X_train, Y_train, scoring='f1_micro', cv=3)
                end = time.clock()

                Y_predict = cross_val_predict(clf, X_test, Y_test, cv=3)
                f1_score_test = f1_score(Y_test, Y_predict, average='micro')
                print(min_samples_split, min_samples_leaf, max_feature, n_estimators, scores_train.mean(), f1_score_test)
                df_scores = df_scores.append({'time': start-end,
                                              'min_samples_leaf': min_samples_leaf,
                                              'min_samples_split': min_samples_split,
                                              'max_feature': max_feature,
                                              'n_estimators': n_estimators,
                                              'f1_micro_mean_train': scores_train.mean(),
                                              'f1_micro_mean_test': f1_score_test,
                                              'f1_micro_std_train': scores_train.std()}, 
                                             ignore_index=True)
                                



2 1 2 100 0.5370202154121392 0.4292307692307692
2 1 2 200 0.5365079203301719 0.4107692307692308




2 1 2 300 0.5370339794275181 0.41846153846153844
2 1 2 400 0.5430089178645167 0.44153846153846155




2 1 2 500 0.5452328037197537 0.37846153846153846
2 1 2 600 0.5216315428502193 0.42153846153846153




2 1 2 700 0.5341142235961154 0.4015384615384615
2 1 2 800 0.5430083017561685 0.39692307692307693




2 1 2 900 0.5498522319326516 0.4323076923076923
2 1 2 1000 0.5373601590012947 0.4092307692307692




2 1 3 100 0.5455772250352637 0.43538461538461537




2 1 3 200 0.5310361511516343 0.4169230769230769




2 1 3 300 0.5510576706978371 0.3953846153846154




2 1 3 400 0.5513886768438818 0.41999999999999993




2 1 3 500 0.5452264068906346 0.4276923076923077




2 1 3 600 0.5546371618678522 0.42615384615384616




2 1 3 700 0.5390807097296729 0.4015384615384615




2 1 3 800 0.5488197472614275 0.38153846153846155




2 1 3 900 0.5490061237286931 0.4107692307692308




2 1 3 1000 0.5454094427575646 0.43538461538461537




2 1 4 100 0.547807704204334 0.41999999999999993




2 1 4 200 0.5438715970598009 0.4292307692307692




2 1 4 300 0.5537895577866826 0.4061538461538461




2 1 4 400 0.5387510971663226 0.43538461538461537




2 1 4 500 0.5315496802512575 0.44769230769230767




2 1 4 600 0.5551577845879416 0.43538461538461537




2 1 4 700 0.5495098230096237 0.43076923076923074




2 1 4 800 0.5347949170641298 0.4000000000000001




2 1 4 900 0.5361729646792824 0.4323076923076923




2 1 4 1000 0.5419922177054727 0.4169230769230769




2 1 5 100 0.5460858378748209 0.4461538461538462




2 1 5 200 0.5356534754832921 0.44461538461538463




2 1 5 300 0.5589093554859342 0.4153846153846154




2 1 5 400 0.5435272645316089 0.43846153846153846




2 1 5 500 0.550871559692046 0.41384615384615386




2 1 5 600 0.5519057966971572 0.42615384615384616




2 1 5 700 0.5460924149800159 0.40307692307692305




2 1 5 800 0.5460853141557106 0.41230769230769226




2 1 5 900 0.5481329134209697 0.4338461538461538




2 1 5 1000 0.5407929175117707 0.41230769230769226




2 1 6 100 0.5471173647956228 0.43076923076923074




2 1 6 200 0.5561814070961746 0.4323076923076923




2 1 6 300 0.5525807067429612 0.4153846153846154




2 1 6 400 0.5448871555904268 0.4092307692307692




2 1 6 500 0.5442086529900108 0.41846153846153844




2 1 6 600 0.5565234626708896 0.40307692307692305




2 1 6 700 0.5551537507981782 0.4169230769230769




2 1 6 800 0.5613017303154963 0.4000000000000001




2 1 6 900 0.5578833645354662 0.4276923076923077




2 1 6 1000 0.5455866172207247 0.44461538461538463




2 1 7 100 0.5484866381346224 0.4538461538461538




2 1 7 200 0.5433543893196872 0.44153846153846155




2 1 7 300 0.540277197544549 0.40307692307692305




2 1 7 400 0.5460888206244441 0.43846153846153846




2 1 7 500 0.5385523574702115 0.42615384615384616




2 1 7 600 0.5412955621506416 0.4092307692307692




2 1 7 700 0.5430183862305772 0.4276923076923077




2 1 7 800 0.5445462389426904 0.4092307692307692




2 1 7 900 0.5432009001793565 0.4230769230769231




2 1 7 1000 0.5498585390739725 0.41384615384615386




2 1 8 100 0.5536066922905499 0.3861538461538461




2 1 8 200 0.5445526411746887 0.41230769230769226




2 1 8 300 0.552748745477336 0.44




2 1 8 400 0.5481485230599533 0.44769230769230767




2 1 8 500 0.5510469701150805 0.4369230769230769




2 1 8 600 0.5383986970586039 0.4046153846153846




2 1 8 700 0.5353188776831154 0.44769230769230767




2 1 8 800 0.5527521685616309 0.4046153846153846




2 1 8 900 0.5389125777055803 0.4153846153846154




2 1 8 1000 0.5443772244482951 0.44




2 1 9 100 0.5416396346202381 0.39076923076923076




2 1 9 200 0.5460868979197601 0.4076923076923077




2 1 9 300 0.547634042513267 0.38461538461538464




2 1 9 400 0.5382291543426988 0.41999999999999993




2 1 9 500 0.5433483413364818 0.4523076923076923




2 1 9 600 0.550021591671041 0.44153846153846155




2 1 9 700 0.5438714221866041 0.44




2 1 9 800 0.5549876365694472 0.4061538461538461




2 1 9 900 0.5408017760728422 0.41846153846153844




2 1 9 1000 0.5483106976891184 0.4015384615384615




2 1 10 100 0.5467707463091548 0.4461538461538462




2 1 10 200 0.5454040996700256 0.4292307692307692




2 1 10 300 0.5483171851065154 0.4153846153846154




2 1 10 400 0.5421621011629723 0.44461538461538463




2 1 10 500 0.5476370234619303 0.4092307692307692




2 1 10 600 0.544031576318968 0.38




2 1 10 700 0.5450673862823698 0.41384615384615386




2 1 10 800 0.538727341065773 0.3830769230769231




2 1 10 900 0.5455816130738201 0.41999999999999993




2 1 10 1000 0.5507193834514109 0.41846153846153844




2 1 11 100 0.5542913298605295 0.40307692307692305




2 1 11 200 0.5436928514393101 0.4046153846153846




2 1 11 300 0.5385650767488096 0.4369230769230769




2 1 11 400 0.5483225344974137 0.44769230769230767




2 1 11 500 0.5394231132498214 0.43076923076923074




2 1 11 600 0.5353155433861388 0.39230769230769236




2 1 11 700 0.5491775030634037 0.39076923076923076




2 1 11 800 0.5498602107248602 0.4276923076923077




2 1 11 900 0.5421535080633754 0.4061538461538461




2 1 11 1000 0.5423276073876743 0.4153846153846154




2 1 12 100 0.5460965393580574 0.3953846153846154




2 1 12 200 0.54864450991063 0.4276923076923077




2 1 12 300 0.5452414058241496 0.41846153846153844




2 1 12 400 0.5519007997540921 0.41846153846153844




2 1 12 500 0.5472899691431904 0.41230769230769226




2 1 12 600 0.5462523951396631 0.41846153846153844




2 1 12 700 0.5421467551845037 0.44153846153846155




2 1 12 800 0.5305283319950617 0.43076923076923074




2 1 12 900 0.5435251561479694 0.3892307692307692




2 1 12 1000 0.5508816450669346 0.41846153846153844




2 1 13 100 0.5359981658621966 0.4076923076923077




2 1 13 200 0.5383934355545438 0.4169230769230769




2 1 13 300 0.5387355781156171 0.4153846153846154




2 1 13 400 0.5382217064734397 0.4169230769230769




2 1 13 500 0.5412952151056878 0.4230769230769231




2 1 13 600 0.5392455935426675 0.4169230769230769




2 1 13 700 0.5322314265603718 0.4000000000000001




2 1 13 800 0.541815313206187 0.4076923076923077




2 1 13 900 0.5387315470272934 0.4107692307692308




2 1 13 1000 0.5322372996703806 0.4153846153846154




2 2 2 100 0.6572596256287518 0.5538461538461539




2 2 2 200 0.6534934970323957 0.5430769230769231




2 2 2 300 0.6589653468939325 0.5446153846153846




2 2 2 400 0.6579361956192047 0.5476923076923077




2 2 2 500 0.6582800044282864 0.5492307692307692




2 2 2 600 0.6574272303318146 0.5553846153846154




2 2 2 700 0.6591397053763468 0.54




2 2 2 800 0.6596523511051874 0.5553846153846154




2 2 2 900 0.6603334813059537 0.5507692307692308




2 2 2 1000 0.6606761529890166 0.5446153846153846




2 2 3 100 0.6504180647949837 0.5353846153846153




2 2 3 200 0.6521271149542613 0.5292307692307693




2 2 3 300 0.6536632119200578 0.5292307692307693




2 2 3 400 0.6562285453459807 0.5461538461538461




2 2 3 500 0.6550319651418669 0.54




2 2 3 600 0.6565709524680489 0.5415384615384615




2 2 3 700 0.6552068491443513 0.536923076923077




2 2 3 800 0.6584539270784288 0.5415384615384615




2 2 3 900 0.6558880672319559 0.5446153846153846




2 2 3 1000 0.6610205770059664 0.5492307692307692




2 2 4 100 0.643405648345615 0.5261538461538462




2 2 4 200 0.6471722118737632 0.5323076923076923




2 2 4 300 0.6495635347627053 0.5353846153846153




2 2 4 400 0.651443879971775 0.5338461538461539




2 2 4 500 0.6519623136252256 0.5323076923076923




2 2 4 600 0.6516156978401973 0.5384615384615384




2 2 4 700 0.6517882170023664 0.5430769230769231




2 2 4 800 0.6570850016848627 0.5323076923076923




2 2 4 900 0.6531543363204658 0.5384615384615384




2 2 4 1000 0.6553792813201619 0.5446153846153846




2 2 5 100 0.6432357576842762 0.5307692307692308




2 2 5 200 0.6456293620290949 0.5384615384615384




2 2 5 300 0.6471740538954481 0.5276923076923077




2 2 5 400 0.6478537824092009 0.5276923076923077




2 2 5 500 0.6488791608532405 0.52




2 2 5 600 0.6483701991677698 0.536923076923077




2 2 5 700 0.6497393873213709 0.5292307692307693




2 2 5 800 0.6538382752981385 0.5292307692307693




2 2 5 900 0.653839677885632 0.536923076923077




2 2 5 1000 0.6509338663456843 0.5215384615384615




2 2 6 100 0.6403274925166951 0.5169230769230769




2 2 6 200 0.6452879216622481 0.5276923076923077




2 2 6 300 0.6442654362800339 0.5061538461538462




2 2 6 400 0.6485369286043691 0.5138461538461538




2 2 6 500 0.648201361347531 0.5215384615384615


In [15]:
decision_tree_classifier=DecisionTreeClassifier(max_depth=100, min_samples_split=6, min_samples_leaf=6, criterion='entropy')

In [20]:
clf = AdaBoostClassifier(n_estimators=200, learning_rate=0.01, base_estimator=decision_tree_classifier, algorithm='SAMME')

#clf.fit(X_train, Y_train)

#Y_predict = clf.predict(X_test)

#f1_score(Y_test, Y_predict, average='micro')

In [21]:
scores = cross_val_score(clf, X_train, Y_train, scoring='f1_micro', cv=3)
scores.mean()

0.6239098489561236

In [10]:
Y_predict = cross_val_predict(clf, X_test, Y_test, cv=3)



In [12]:
f1_score(Y_test, Y_predict, average='micro')

0.49538461538461537

In [14]:
confusion_matrix(Y_test, Y_predict)

array([[  0,   0,   1,   2,   0,   0,   0],
       [  0,   0,   7,   7,   0,   0,   0],
       [  0,   1, 105,  90,   8,   0,   0],
       [  0,   3,  65, 185,  38,   5,   1],
       [  0,   0,   7,  75,  27,   0,   0],
       [  0,   0,   0,  10,   6,   5,   0],
       [  0,   0,   0,   2,   0,   0,   0]])

In [269]:
clf.classes_

AttributeError: 'AdaBoostClassifier' object has no attribute 'classes_'

In [209]:
clf.feature_importances_

array([ 0.05966777,  0.07152935,  0.09708522,  0.06851571,  0.07382377,
        0.07950466,  0.10435181,  0.08513251,  0.06454349,  0.07548736,
        0.080881  ,  0.13813066,  0.00134668])

In [202]:
[clf.tree_.max_depth for clf in clf.estimators_]

[24,
 23,
 23,
 23,
 22,
 26,
 22,
 24,
 23,
 22,
 22,
 21,
 24,
 24,
 24,
 23,
 23,
 24,
 22,
 22,
 24,
 22,
 23,
 22,
 23,
 21,
 25,
 26,
 26,
 26,
 30,
 30,
 30,
 24,
 22,
 22,
 22,
 23,
 22,
 23,
 22,
 25,
 22,
 21,
 24,
 22,
 23,
 26,
 25,
 22,
 29,
 29,
 23,
 29,
 29,
 29,
 26,
 29,
 28,
 29,
 28,
 25,
 30,
 21,
 23,
 22,
 22,
 25,
 21,
 22,
 26,
 29,
 26,
 22,
 27,
 22,
 26,
 23,
 23,
 23,
 26,
 22,
 22,
 22,
 22,
 22,
 23,
 28,
 27,
 22,
 22,
 24,
 25,
 25,
 23,
 28,
 22,
 24,
 21,
 23,
 28,
 28,
 29,
 29,
 21,
 28,
 27,
 27,
 27,
 27,
 25,
 26,
 26,
 26,
 25,
 25,
 28,
 25,
 26,
 27,
 27,
 27,
 27,
 27,
 27,
 31,
 27,
 27,
 27,
 30,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 24,
 26,
 25,
 26,
 25,
 28,
 26,
 26,
 27,
 26,
 27,
 27,
 23,
 29,
 29,
 32,
 29,
 32,
 26,
 25,
 25,
 25,
 27,
 26,
 26,
 27,
 27,
 26,
 27,
 26,
 29,
 28,
 26,
 23,
 26,
 23,
 24,
 23,
 24,
 24,
 26,
 23,
 25,
 23,
 23,
 24,
 24,
 24,
 25,
 29,
 25,
 29,
 29,
 25,
 26,
 26,
 27,
 25,
 27,
 24,
 27,
 27,
 27,


In [None]:
clf.estimators