In [10]:
import pandas as pd

In [11]:
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import time

In [12]:
df_wines = pd.read_csv('completoWine.csv')

In [13]:
X = df_wines.drop('quality', axis=1)
Y = df_wines.quality

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8)



In [14]:
df_wines.groupby('quality').size()

quality
3      30
4     216
5    2138
6    2836
7    1079
8     193
9       5
dtype: int64

In [15]:
df_scores = pd.DataFrame({})
models = []

In [16]:
MAX_DEPTH = 50
CRITERIA=['gini', 'entropy']
MIN_SAMPLES_SPLIT = [2,4,8,12]
MIN_SAMPLES_LEAF = [2,4,8]
MAX_FEATURES = [2,4,6,8,10,12,13]
N_ESTIMATORS=[100,300,500,700,800,900]

In [None]:
for min_samples_split in MIN_SAMPLES_SPLIT:
    for min_samples_leaf in MIN_SAMPLES_LEAF:
        for max_feature in MAX_FEATURES:
            for criteria in CRITERIA:
                for n_estimators in N_ESTIMATORS:
                    start = time.clock()
                    clf = RandomForestClassifier(n_estimators=n_estimators, warm_start=True,
                                                 criterion=criteria, min_samples_split=min_samples_split,
                                                 min_samples_leaf=min_samples_leaf)
                    scores_train = cross_val_score(clf, X_train, Y_train, scoring='f1_micro', cv=3)
                    end = time.clock()
                    models.append(clf)
                    Y_predict = cross_val_predict(clf, X_test, Y_test, cv=3)
                    f1_score_test = f1_score(Y_test, Y_predict, average='micro')
                    print(min_samples_split, min_samples_leaf, max_feature, n_estimators, scores_train.mean(), f1_score_test)
                    df_scores = df_scores.append({'time': start-end,
                                                  'min_samples_leaf': min_samples_leaf,
                                                  'min_samples_split': min_samples_split,
                                                  'max_feature': max_feature,
                                                  'n_estimators': n_estimators,
                                                  'f1_micro_mean_train': scores_train.mean(),
                                                  'f1_micro_mean_test': f1_score_test,
                                                  'f1_micro_std_train': scores_train.std(),
                                                  'depth': clf.max_depth}, 
                                                 ignore_index=True)
                                



2 2 2 100 0.640184701081244 0.5330769230769231




2 2 2 300 0.6476928118829649 0.5407692307692308




2 2 2 500 0.6424969481703121 0.546923076923077




2 2 2 700 0.6434582265931912 0.54




2 2 2 800 0.6444210624613259 0.5415384615384615




2 2 2 900 0.6440379270812723 0.5430769230769231




2 2 2 100 0.6428755291931604 0.5407692307692308




2 2 2 300 0.6436481246300908 0.5430769230769231




2 2 2 500 0.6444203949847878 0.55




2 2 2 700 0.6473035704711406 0.5484615384615384




2 2 2 800 0.6467240905516753 0.5438461538461539




2 2 2 900 0.6463407326794423 0.5430769230769231




2 2 4 100 0.6390270742718264 0.5292307692307693




2 2 4 300 0.6444172858649667 0.5361538461538462




2 2 4 500 0.6428797488503148 0.5430769230769231




2 2 4 700 0.6417254555764612 0.536923076923077




2 2 4 800 0.6451909976099209 0.5353846153846153




2 2 4 900 0.6448056411552004 0.5353846153846153




2 2 4 100 0.6490296821109965 0.5376923076923077




2 2 4 300 0.6478847142730425 0.54




2 2 4 500 0.6465341925147757 0.5461538461538461




2 2 4 700 0.6486446545856362 0.546923076923077




2 2 4 800 0.6459552678639773 0.5423076923076923




2 2 4 900 0.6480722867216577 0.5484615384615384




2 2 6 100 0.6432619961852145 0.5361538461538462




2 2 6 300 0.6409529685002754 0.54




2 2 6 500 0.6419190237725392 0.5361538461538462




2 2 6 700 0.6432664364109849 0.5407692307692308




2 2 6 800 0.6432647725285482 0.5407692307692308




2 2 6 900 0.6423031574820549 0.5384615384615384




2 2 6 100 0.6490283529286107 0.5430769230769231




2 2 6 300 0.647877385496067 0.5523076923076923




2 2 6 500 0.6469162173574959 0.5438461538461539




2 2 6 700 0.6488423321545608 0.55




2 2 6 800 0.64518433438592 0.5415384615384615




2 2 6 900 0.6498036086538762 0.5453846153846154




2 2 8 100 0.6411519752515858 0.5438461538461539




2 2 8 300 0.6438428052870656 0.5484615384615384




2 2 8 500 0.6451939964454342 0.5407692307692308




2 2 8 700 0.6446147390181484 0.5353846153846153




2 2 8 800 0.6442257200985034 0.54




2 2 8 900 0.6449963208000732 0.5392307692307692




2 2 8 100 0.6467194316808524 0.5407692307692308




2 2 8 300 0.6469196554066773 0.5484615384615384




2 2 8 500 0.6474949156689881 0.54




2 2 8 700 0.6492224667755377 0.5453846153846154




2 2 8 800 0.6467245393831612 0.54




2 2 8 900 0.6486497642115084 0.5415384615384615




2 2 10 100 0.6419168007743087 0.5461538461538461




2 2 10 300 0.6426898488898519 0.5423076923076923




2 2 10 500 0.6449966535765604 0.5423076923076923




2 2 10 700 0.64423093808512 0.54




2 2 10 800 0.6436491287302433 0.5376923076923077




2 2 10 900 0.6405732750165302 0.5361538461538462




2 2 10 100 0.640950410801994 0.5376923076923077




2 2 10 300 0.6465319714401088 0.5438461538461539




2 2 10 500 0.6480730625589404 0.5376923076923077




2 2 10 700 0.6474931415022432 0.5392307692307692




2 2 10 800 0.6453768984818454 0.5423076923076923




2 2 10 900 0.6461449472558244 0.546923076923077




2 2 12 100 0.6403800415205031 0.54




2 2 12 300 0.6457674786938732 0.5361538461538462




2 2 12 500 0.646153494930878 0.5423076923076923




2 2 12 700 0.6436543467168598 0.5407692307692308




2 2 12 800 0.6417345662144357 0.5430769230769231




2 2 12 900 0.6421090359408737 0.5415384615384615




2 2 12 100 0.6478767180195287 0.5484615384615384




2 2 12 300 0.6455742413507192 0.5407692307692308




2 2 12 500 0.6465314142478785 0.5438461538461539




2 2 12 700 0.6434556708184732 0.5423076923076923




2 2 12 800 0.6480713948293766 0.5523076923076923




2 2 12 900 0.6469161051496245 0.5438461538461539




2 2 13 100 0.6407629582555043 0.5384615384615384




2 2 13 300 0.6426911780722376 0.5384615384615384




2 2 13 500 0.6461553870761846 0.5392307692307692




2 2 13 700 0.6430727598541627 0.5430769230769231




2 2 13 800 0.6455761315724623 0.5423076923076923




2 2 13 900 0.6424980548605186 0.536923076923077




2 2 13 100 0.6486450976464315 0.5492307692307692




2 2 13 300 0.6446080796412744 0.5538461538461539




2 2 13 500 0.650578761789088 0.5492307692307692




2 2 13 700 0.6461478338834661 0.5407692307692308




2 2 13 800 0.6494204771209496 0.5438461538461539




2 2 13 900 0.6457652537720792 0.5384615384615384




2 4 2 100 0.6211298905624446 0.5330769230769231




2 4 2 300 0.6251812352686293 0.5238461538461539




2 4 2 500 0.6232563451403329 0.5269230769230769




2 4 2 700 0.6219080463802965 0.5246153846153846




2 4 2 800 0.624018508451157 0.53




2 4 2 900 0.6232526807518451 0.5276923076923077




2 4 2 100 0.6261311935202482 0.5407692307692308




2 4 2 300 0.6265205471399438 0.5315384615384615




2 4 2 500 0.6284506571784204 0.5323076923076923




2 4 2 700 0.6247917732881891 0.5269230769230769




2 4 2 800 0.6247889969448553 0.53




2 4 2 900 0.6269077899692806 0.5330769230769231




2 4 4 100 0.6201717174122597 0.5384615384615384




2 4 4 300 0.6222971775078129 0.5215384615384615




2 4 4 500 0.6203717263401591 0.5276923076923077




2 4 4 700 0.6232526807518451 0.5353846153846153




2 4 4 800 0.6228658809833036 0.5238461538461539




2 4 4 900 0.6257564974545035 0.5253846153846153




2 4 4 100 0.6255580459718599 0.5253846153846153




2 4 4 300 0.6286347838836001 0.536923076923077




2 4 4 500 0.627292589231771 0.5323076923076923




2 4 4 700 0.6282545370547515 0.5284615384615384




2 4 4 800 0.6271004662730774 0.5338461538461539




2 4 4 900 0.6278669614489276 0.5284615384615384




2 4 6 100 0.6238283879220781 0.5230769230769231




2 4 6 300 0.6232525704675371 0.5261538461538462




2 4 6 500 0.6219077116802456 0.5238461538461539




2 4 6 700 0.6220999487703742 0.5315384615384615




2 4 6 800 0.6220990588016567 0.5284615384615384




2 4 6 900 0.620562295700724 0.5284615384615384




2 4 6 100 0.6219028226229895 0.53




2 4 6 300 0.6270996885122312 0.5307692307692308




2 4 6 500 0.6255618187210922 0.5353846153846153




2 4 6 700 0.6319133106606749 0.5376923076923077




2 4 6 800 0.6288313509151912 0.536923076923077




2 4 6 900 0.6278666286724403 0.53




2 4 8 100 0.6236335892865412 0.52




2 4 8 300 0.6201702760220025 0.53




2 4 8 500 0.622867326220688 0.5269230769230769




2 4 8 700 0.6234464714401023 0.5323076923076923




2 4 8 800 0.6203656203083886 0.53




2 4 8 900 0.6228678795657911 0.5223076923076924




2 4 8 100 0.6276711760253096 0.5307692307692308




2 4 8 300 0.6284481014037026 0.5307692307692308




2 4 8 500 0.6267152220262281 0.5353846153846153




2 4 8 700 0.6247907730351635 0.54




2 4 8 800 0.6290210322306018 0.5315384615384615




2 4 8 900 0.6270999129279741 0.53




2 4 10 100 0.6251738000544728 0.5307692307692308




2 4 10 300 0.6249873419903181 0.5292307692307693




2 4 10 500 0.620561187086954 0.536923076923077




2 4 10 700 0.6259460684856063 0.5323076923076923




2 4 10 800 0.6222918434661978 0.5276923076923077




2 4 10 900 0.6234431379045384 0.5330769230769231




2 4 10 100 0.6224839702720185 0.5261538461538462




2 4 10 300 0.6251731306543711 0.5261538461538462




2 4 10 500 0.6269051239102548 0.5346153846153846




2 4 10 700 0.6240211706630557 0.5361538461538462




2 4 10 800 0.625368809640808 0.5284615384615384




2 4 10 900 0.6272939241848473 0.53




2 4 12 100 0.6226708733205318 0.5315384615384615




2 4 12 300 0.6192125517033032 0.5253846153846153




2 4 12 500 0.6230625582992025 0.5230769230769231




2 4 12 700 0.6217144743370917 0.5284615384615384




2 4 12 800 0.6217119243330643 0.5315384615384615




2 4 12 900 0.6228672140128165 0.5261538461538462




2 4 12 100 0.6234402512768967 0.5392307692307692




2 4 12 300 0.6263315333010714 0.5284615384615384




2 4 12 500 0.6259460646384793 0.5323076923076923




2 4 12 700 0.6292150396403481 0.5353846153846153




2 4 12 800 0.6284468844291881 0.5307692307692308




2 4 12 900 0.6290238085739355 0.5269230769230769




2 4 13 100 0.6236388111202847 0.5261538461538462




2 4 13 300 0.6257556094093494 0.52




2 4 13 500 0.6257571630074782 0.5307692307692308




2 4 13 700 0.6215250174374237 0.5284615384615384




2 4 13 800 0.6251783524881147 0.53




2 4 13 900 0.6226799781878157 0.5261538461538462




2 4 13 100 0.6259411775047865 0.536923076923077




2 4 13 300 0.625944623248222 0.5292307692307693




2 4 13 500 0.6284437714622401 0.5353846153846153




2 4 13 700 0.6295986200047604 0.5307692307692308




2 4 13 800 0.6244055307118779 0.5353846153846153




2 4 13 900 0.6263279791968915 0.53




2 8 2 100 0.6016992734405674 0.5292307692307693




2 8 2 300 0.6032366982473477 0.5315384615384615




2 8 2 500 0.6026612193399846 0.5284615384615384




2 8 2 700 0.6009276744095357 0.5253846153846153




2 8 2 800 0.5986219744894699 0.5253846153846153




2 8 2 900 0.601119793521102 0.5238461538461539




2 8 2 100 0.6043886601622265 0.5253846153846153




2 8 2 300 0.6003495275195833 0.5223076923076924




2 8 2 500 0.6032395867985528 0.5284615384615384




2 8 2 700 0.6018911777542085 0.5276923076923077




2 8 2 800 0.5991972366753441 0.53




2 8 2 900 0.6013139169858467 0.5292307692307693




2 8 4 100 0.6040085236176859 0.5253846153846153




2 8 4 300 0.60112345790959 0.5269230769230769




2 8 4 500 0.6026625542930609 0.5246153846153846




2 8 4 700 0.6040111877531481 0.53




2 8 4 800 0.6018923966522864 0.5253846153846153




2 8 4 900 0.5988153201933684 0.5276923076923077




2 8 4 100 0.6018866233970032 0.5292307692307693




2 8 4 300 0.6038154023295302 0.5315384615384615




2 8 4 500 0.6016986059640291 0.5353846153846153




2 8 4 700 0.6041996482469174 0.5230769230769231




2 8 4 800 0.6045846700015872 0.5269230769230769




2 8 4 900 0.603044909988705 0.5276923076923077




2 8 6 100 0.602653780278701 0.52




2 8 6 300 0.6040098547236353 0.5246153846153846




2 8 6 500 0.6003554149063014 0.5269230769230769




2 8 6 700 0.6003558560435333 0.5261538461538462




2 8 6 800 0.6009277846938436 0.5269230769230769




2 8 6 900 0.6003501949961214 0.53




2 8 6 100 0.5980438333702081 0.5261538461538462




2 8 6 300 0.6061235400457519 0.5307692307692308




2 8 6 500 0.604199646323354 0.5315384615384615




2 8 6 700 0.6047782362741015 0.5253846153846153




2 8 6 800 0.5999638382883753 0.5315384615384615




2 8 6 900 0.6013140272701547 0.5238461538461539




2 8 8 100 0.6007368806332278 0.5261538461538462




2 8 8 300 0.6040041936762234 0.5284615384615384




2 8 8 500 0.600350307203993 0.5315384615384615




2 8 8 700 0.5991994558264476 0.5284615384615384




2 8 8 800 0.6005489811788158 0.5253846153846153




2 8 8 900 0.5992004560794731 0.5269230769230769




2 8 8 100 0.5976601427214877 0.5246153846153846




2 8 8 300 0.6041994238311745 0.5269230769230769




2 8 8 500 0.6015050416150783 0.5284615384615384




2 8 8 700 0.5997762677633235 0.5269230769230769




2 8 8 800 0.5999680579455298 0.5346153846153846




2 8 8 900 0.5995823687143218 0.5307692307692308




2 8 10 100 0.5991975713753949 0.5323076923076923




2 8 10 300 0.5988113230283932 0.5261538461538462




2 8 10 500 0.6032395867985528 0.5261538461538462




2 8 10 700 0.603430378651297 0.5292307692307693




2 8 10 800 0.6030463552260893 0.52




2 8 10 900 0.600931116305844 0.5253846153846153




2 8 10 100 0.6030454671809352 0.5307692307692308




2 8 10 300 0.6015018202873857 0.53




2 8 10 500 0.6045847802858951 0.5292307692307693




2 8 10 700 0.6028554511654737 0.5307692307692308




2 8 10 800 0.5997724911669642 0.5284615384615384




2 8 10 900 0.6011234598331535 0.5307692307692308




2 8 12 100 0.6022815277798029 0.5384615384615384




2 8 12 300 0.6009297851998946 0.5261538461538462




2 8 12 500 0.6028538975673449 0.5261538461538462




2 8 12 700 0.6022746420636226 0.5261538461538462




2 8 12 800 0.6005474237335601 0.5238461538461539




2 8 12 900 0.6007369928410993 0.5253846153846153




2 8 12 100 0.6013113612111289 0.5246153846153846




2 8 12 300 0.6005448679588422 0.5253846153846153




2 8 12 500 0.5993879124730898 0.5307692307692308




2 8 12 700 0.6001584028903517 0.5307692307692308




2 8 12 800 0.5988144302246509 0.5276923076923077


In [None]:
df_scores.sort_values('f1_micro_mean_test', ascending=False)

In [15]:
decision_tree_classifier=DecisionTreeClassifier(max_depth=100, min_samples_split=6, min_samples_leaf=6, criterion='entropy')

In [20]:
clf = AdaBoostClassifier(n_estimators=200, learning_rate=0.01, base_estimator=decision_tree_classifier, algorithm='SAMME')

#clf.fit(X_train, Y_train)

#Y_predict = clf.predict(X_test)

#f1_score(Y_test, Y_predict, average='micro')

In [21]:
scores = cross_val_score(clf, X_train, Y_train, scoring='f1_micro', cv=3)
scores.mean()

0.6239098489561236

In [10]:
Y_predict = cross_val_predict(clf, X_test, Y_test, cv=3)



In [12]:
f1_score(Y_test, Y_predict, average='micro')

0.49538461538461537

In [14]:
confusion_matrix(Y_test, Y_predict)

array([[  0,   0,   1,   2,   0,   0,   0],
       [  0,   0,   7,   7,   0,   0,   0],
       [  0,   1, 105,  90,   8,   0,   0],
       [  0,   3,  65, 185,  38,   5,   1],
       [  0,   0,   7,  75,  27,   0,   0],
       [  0,   0,   0,  10,   6,   5,   0],
       [  0,   0,   0,   2,   0,   0,   0]])

In [269]:
clf.classes_

AttributeError: 'AdaBoostClassifier' object has no attribute 'classes_'

In [209]:
clf.feature_importances_

array([ 0.05966777,  0.07152935,  0.09708522,  0.06851571,  0.07382377,
        0.07950466,  0.10435181,  0.08513251,  0.06454349,  0.07548736,
        0.080881  ,  0.13813066,  0.00134668])

In [11]:
[clf.tree_.max_depth for clf in clf.estimators_]

AttributeError: 'RandomForestClassifier' object has no attribute 'estimators_'

In [21]:
clf = RandomForestClassifier(n_estimators=n_estimators, warm_start=True,
                                                 criterion=criteria, min_samples_split=min_samples_split,
                                                 min_samples_leaf=min_samples_leaf)

In [22]:
clf.max_depth