# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from datetime import datetime
from datetime import timedelta
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_recall_fscore_support
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

# Data

In [2]:
data = pd.read_csv('dataset_finale.csv', header ='infer').iloc[:,1:]
#data's classes were initially (1,2,3,4) hence change to (0,1,2,3)
data['class']=data['class']-1 

## Feature engineering

In [3]:
data['log(views)/log(com)']=(np.log(2+data['views']))/np.log((2+data['n_comments']))
data['log(com)/log(views)']=(np.log(2+data['n_comments']))/np.log((2+data['views']))

## Duration- minutes only

In [4]:
data['durata']=round(data['durata']/60)

## Titles elaboration

In [5]:
import nltk
from nltk.tokenize import RegexpTokenizer
from nltk.stem import *
from nltk.corpus import stopwords
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /home/simone/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [6]:
def clean(string):
    tokenizer = RegexpTokenizer(r'\w+')
    string=tokenizer.tokenize(string)
    for word in range(len(string)):
        string[word] = string[word].lower() 
    #Remove stopwords
    string = [word for word in string if not word in stopwords.words()]
    #STEMMING
    stemmer = PorterStemmer()
    string = [stemmer.stem(word) for word in string]
    return " ".join(string)

In [7]:
#remove all the bad stuff we don't need
titles=list(data['titolo'])
for i,title in tqdm(enumerate(titles)):
    titles[i]=clean(title)

13374it [04:24, 50.64it/s]


In [8]:
#create dictionary with key the word and value the number of times it has been seen
diz={}
for title in titles:
    for word in title.split(' '):
        if word in diz:
            diz[word]+=1
        else:
            diz[word]=1  
print('the total number of different words is: ',len(diz.keys()))
print('the total number of words is: ',np.sum(list(diz.values())))

the total number of different words is:  30244
the total number of words is:  76584


In [9]:
#update value of the dictionary with its prob computed as #(times is observed)/#(total words)
tot=np.sum(list(diz.values()))
for key in diz.keys():
    diz[key]=diz[key]/tot

In [10]:
prob_title=[]
for title in tqdm(titles):
    somma=0
    for word in title.split(' '):
        somma+=(diz[word])
    prob_title.append(somma)

100%|█████████████████████████████████| 13374/13374 [00:00<00:00, 283508.06it/s]


In [11]:
data=data.drop(columns = ['titolo'])
data['titolo']=prob_title
data['titolo']=(data['titolo']-data['titolo'].min())/(data['titolo'].max()-data['titolo'].min())

# Classification Model

### Train Test Splitting

In [12]:
from sklearn import model_selection
from sklearn.preprocessing import LabelEncoder

In [13]:
data.columns

Index(['durata', 'views', 'n_comments', 'n_like', 'genere', 'subscribers',
       'publ', 'max_quality', 'timedelta', 'score', 'class',
       'log(views)/log(com)', 'log(com)/log(views)', 'titolo'],
      dtype='object')

In [14]:
Y = data['class'] # Extract the target feature

In [15]:
X = data.drop(columns = ['class','score','publ','timedelta']) # Remove from the data useful to the analysis: "score, publ, titolo, class"

In [16]:
le = LabelEncoder()
X['genere'] = le.fit_transform(X['genere']) # Transform the Categorical genere feature in a numerical feature

In [17]:
X_train,X_test,y_train,y_test = train_test_split(X,Y, train_size=.8, random_state=42) # Split dataset in train and test

In [18]:
X_train,X_val,y_train,y_val = train_test_split(X_train,y_train, train_size = .8, random_state = 42) # Split train in train and validation

### Excursus

We want to predict the class of a given video. The classes were defined using a home-made score.

To predict we will try different models.
Let's start with SVM

### SVC

In [19]:
from sklearn.svm import SVC

REMINDER OF THE PARAMETERS


C=1.0, kernel='rbf', degree=3, gamma='scale', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=- 1, decision_function_shape='ovr', break_ties=False, random_state=None

Try to use SVC without scaling data

In [20]:
clf = SVC(gamma = 'auto', random_state = 42)
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print(f1_score(y_val, y_pred, average = 'macro'))
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'))

0.167131569327672
0.4897196261682243 0.2514992503748126


Try to use SVC with Standard Scaler

In [21]:
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',random_state = 42))
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print(f1_score(y_val, y_pred, average = 'macro'))
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'))

0.4152634868298607
0.5682242990654206 0.40886480885536736


Try to use SVC with MinMaxScaler

In [22]:
clf = make_pipeline(MinMaxScaler(), SVC(gamma='auto',random_state = 42))
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print(f1_score(y_val, y_pred, average = 'macro'))
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'))

0.3065007325181236
0.5532710280373832 0.34307846076961523


Try to use SVC with MinMaxScaler and Standard Scaler (in that order)

In [23]:
clf = make_pipeline(MinMaxScaler(),StandardScaler(), SVC(gamma='auto',random_state = 42))
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print(f1_score(y_val, y_pred, average = 'macro'))
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'))

0.4152634868298607
0.5682242990654206 0.40886480885536736


Try to use SVC with Standard Scaler and MinMaxScaler (in that order)

In [24]:
clf = make_pipeline(StandardScaler(),MinMaxScaler(), SVC(gamma='auto',random_state = 42))
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print(f1_score(y_val, y_pred, average = 'macro'))
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'))

0.3065007325181236
0.5532710280373832 0.34307846076961523


Same Results as One Scaler.


Best Results without Scaling.

In [25]:
#svc = SVC()
#
##def gridsearch
#params = {
#    'kernel' : ('poly', 'rbf', 'sigmoid'),
#    'C' : np.linspace(1, 100, num=5), 
#    #'degree' : [3,5,8],
#    #'gamma' : ('auto','scale')
#    
#}
#
#search = GridSearchCV(svc,param_grid=params,scoring='f1_macro',
#                                  n_jobs=-1, refit=True, verbose=10, pre_dispatch='10*n_jobs',
#                                  return_train_score=True)
#search.fit(X_train,y_train)

In [26]:
#best_one=search.best_estimator_
#y_pred = best_one.predict(X_val)

In [27]:
#f1_score(y_val, y_pred, average = 'macro')

### Random Forest

In [28]:
from sklearn.ensemble import RandomForestClassifier as RFC

In [29]:
clf = RFC(random_state = 42)
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print('f1-score using Random Forest without scaling data: ',f1_score(y_val, y_pred, average = 'macro'))
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'))

f1-score using Random Forest without scaling data:  0.6341116494268896
0.677570093457944 0.6125594601734696


In [30]:
clf = make_pipeline(MinMaxScaler(), RFC(random_state = 42))
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print(f1_score(y_val, y_pred, average = 'macro'))
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'))

0.6323011282860391
0.6719626168224299 0.6144606772864322


In [31]:
clf = make_pipeline(StandardScaler(), RFC(random_state = 42))
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print(f1_score(y_val, y_pred, average = 'macro'))
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'))

0.6290272219642454
0.677570093457944 0.6053535822504661


In [32]:
clf = make_pipeline(MinMaxScaler(), RFC(random_state = 42, n_jobs = -1))

#def gridsearch
params = {'randomforestclassifier__criterion' : ['gini','entropy'],
          'randomforestclassifier__max_features' : [None],
          'randomforestclassifier__n_estimators' : [50,100,200]
         }

search = GridSearchCV(clf,param_grid=params,scoring='f1_macro',
                                  n_jobs=-1,refit=True,verbose=10, pre_dispatch='10*n_jobs',
                                  return_train_score=True)
search.fit(X_train,y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


GridSearchCV(estimator=Pipeline(steps=[('minmaxscaler', MinMaxScaler()),
                                       ('randomforestclassifier',
                                        RandomForestClassifier(n_jobs=-1,
                                                               random_state=42))]),
             n_jobs=-1,
             param_grid={'randomforestclassifier__criterion': ['gini',
                                                               'entropy'],
                         'randomforestclassifier__max_features': [None],
                         'randomforestclassifier__n_estimators': [50, 100,
                                                                  200]},
             pre_dispatch='10*n_jobs', return_train_score=True,
             scoring='f1_macro', verbose=10)

In [33]:
best_one=search.best_estimator_
y_pred = best_one.predict(X_val)
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'))

0.6785046728971963 0.6211939095250446


In [34]:
f1_score(y_val, y_pred, average = 'macro')

0.6370255990039098

### Logistic Regression

In [35]:
from sklearn.linear_model import LogisticRegression

In [36]:
clf = make_pipeline(StandardScaler(), LogisticRegression(random_state=42, multi_class = 'multinomial',
                                                         solver = 'newton-cg',
                                                         n_jobs = -1))
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print('F1 SCORE: ',f1_score(y_val, y_pred, average = 'macro'))
print('PRECISION:',precision_recall_fscore_support(y_val, y_pred, average='macro')[0])
print('RECALL:',precision_recall_fscore_support(y_val, y_pred, average='macro')[1])
print('ACCURACY:', sum(y_pred == y_val)/len(y_val))

F1 SCORE:  0.37530499008824464
PRECISION: 0.449174794397171
RECALL: 0.36641039938138226
ACCURACY: 0.5504672897196262


### KNN

In [37]:
from sklearn.neighbors import KNeighborsClassifier

In [38]:
clf = make_pipeline(StandardScaler(),KNeighborsClassifier(n_jobs = -1) )
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print('F1 SCORE: ',f1_score(y_val, y_pred, average = 'macro'))
print('PRECISION:',precision_recall_fscore_support(y_val, y_pred, average='macro')[0])
print('RECALL:',precision_recall_fscore_support(y_val, y_pred, average='macro')[1])
print('ACCURACY:', sum(y_pred == y_val)/len(y_val))

F1 SCORE:  0.44377398370086324
PRECISION: 0.5451408831045041
RECALL: 0.42072701443130156
ACCURACY: 0.5514018691588785


### Decision Tree Classifier

In [39]:
from sklearn.tree import DecisionTreeClassifier

In [40]:
clf = make_pipeline(StandardScaler(),DecisionTreeClassifier(random_state = 42) )
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print('F1 SCORE: ',f1_score(y_val, y_pred, average = 'macro'))
print('PRECISION:',precision_recall_fscore_support(y_val, y_pred, average='macro')[0])
print('RECALL:',precision_recall_fscore_support(y_val, y_pred, average='macro')[1])
print('ACCURACY:', sum(y_pred == y_val)/len(y_val))

F1 SCORE:  0.5445718983668641
PRECISION: 0.5523157838773175
RECALL: 0.5381562036944156
ACCURACY: 0.5934579439252337


### XGBoost

In [41]:
from xgboost import XGBClassifier

In [42]:
clf = XGBClassifier(random_state=42)
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print('f1-score using XGBoost without scaling data: ',f1_score(y_val, y_pred, average = 'macro'))
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'))

f1-score using XGBoost without scaling data:  0.6371253033381947
0.6780373831775701 0.6244349246413563


In [43]:
clf = make_pipeline(MinMaxScaler(), XGBClassifier(random_state=42))
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print('f1-score using XGBoost using MinMaxScaler: ',f1_score(y_val, y_pred, average = 'macro'))
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'))

f1-score using XGBoost using MinMaxScaler:  0.6375949578127305
0.6785046728971963 0.6248097372350594


In [44]:
clf = make_pipeline(StandardScaler(), XGBClassifier(random_state=42))
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
print('f1-score using XGBoost using StandardScaler: ',f1_score(y_val, y_pred, average = 'macro'))
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'))

f1-score using XGBoost using StandardScaler:  0.6341134136963655
0.6771028037383178 0.6200598849158905


In [49]:
clf = make_pipeline(MinMaxScaler(), XGBClassifier(use_label_encoder=False,random_state=42))

#def gridsearch
params = {'xgbclassifier__learning_rate' : [.2,.3,.4],
          'xgbclassifier__n_estimators' : [600,800,1000]
         }

search = GridSearchCV(clf,param_grid=params,scoring='f1_macro',
                                  n_jobs=-1,refit=True,verbose=10, pre_dispatch='10*n_jobs',
                                  return_train_score=True)
search.fit(X_train,y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits


  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index
  from pandas import MultiIndex, Int64Index




GridSearchCV(estimator=Pipeline(steps=[('minmaxscaler', MinMaxScaler()),
                                       ('xgbclassifier',
                                        XGBClassifier(base_score=None,
                                                      booster=None,
                                                      colsample_bylevel=None,
                                                      colsample_bynode=None,
                                                      colsample_bytree=None,
                                                      enable_categorical=False,
                                                      gamma=None, gpu_id=None,
                                                      importance_type=None,
                                                      interaction_constraints=None,
                                                      learning_rate=None,
                                                      max_delta_step=None,
                               

In [50]:
xgb_params=search.best_params_
xgb_params

{'xgbclassifier__learning_rate': 0.3, 'xgbclassifier__n_estimators': 600}

In [51]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_val, y_pred)

array([[455, 203,   9,   0],
       [192, 755,  94,   3],
       [  6, 123, 199,  22],
       [  1,   9,  29,  40]])

In [52]:
best_one=search.best_estimator_
y_pred = best_one.predict(X_val)
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'), f1_score(y_val, y_pred, average = 'macro'))

0.6677570093457944 0.6096385396813347 0.6243253215664104


### One vs Rest Classifier - ORCO

In [53]:
from sklearn.multiclass import OneVsRestClassifier as ORC

In [54]:
clf=make_pipeline(MinMaxScaler(), XGBClassifier(learning_rate=xgb_params['xgbclassifier__learning_rate'],n_estimators=xgb_params['xgbclassifier__n_estimators'],random_state=42))
classifier=ORC(clf)
classifier.fit(X_train, y_train)



OneVsRestClassifier(estimator=Pipeline(steps=[('minmaxscaler', MinMaxScaler()),
                                              ('xgbclassifier',
                                               XGBClassifier(base_score=None,
                                                             booster=None,
                                                             colsample_bylevel=None,
                                                             colsample_bynode=None,
                                                             colsample_bytree=None,
                                                             enable_categorical=False,
                                                             gamma=None,
                                                             gpu_id=None,
                                                             importance_type=None,
                                                             interaction_constraints=None,
                                          

In [55]:
y_pred = classifier.predict(X_val)
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'), f1_score(y_val, y_pred, average = 'macro'))

0.6696261682242991 0.6159125703875006 0.6308186703437165


### One vs One Classifier - ORCO2

In [56]:
from sklearn.multiclass import OneVsOneClassifier as OOC

In [57]:
clf=make_pipeline(MinMaxScaler(), XGBClassifier(learning_rate=xgb_params['xgbclassifier__learning_rate'],n_estimators=xgb_params['xgbclassifier__n_estimators'],random_state=42))
classifier=OOC(clf)
classifier.fit(X_train, y_train)



OneVsOneClassifier(estimator=Pipeline(steps=[('minmaxscaler', MinMaxScaler()),
                                             ('xgbclassifier',
                                              XGBClassifier(base_score=None,
                                                            booster=None,
                                                            colsample_bylevel=None,
                                                            colsample_bynode=None,
                                                            colsample_bytree=None,
                                                            enable_categorical=False,
                                                            gamma=None,
                                                            gpu_id=None,
                                                            importance_type=None,
                                                            interaction_constraints=None,
                                                      

In [58]:
y_pred = classifier.predict(X_val)
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'), f1_score(y_val, y_pred, average = 'macro'))

0.6677570093457944 0.6128365323064808 0.6235577443586411


### Gradient Boosting

In [59]:
from sklearn.ensemble import GradientBoostingClassifier

In [60]:
clf = make_pipeline(StandardScaler(), GradientBoostingClassifier(random_state = 42))
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
precision_recall = precision_recall_fscore_support(y_val, y_pred, average='macro')
print('F1 SCORE: ',f1_score(y_val, y_pred, average = 'macro'))
print('PRECISION:',precision_recall[0])
print('RECALL:',precision_recall[1])
print('ACCURACY:', sum(y_pred == y_val)/len(y_val))

F1 SCORE:  0.6558215690265023
PRECISION: 0.6860272016066233
RECALL: 0.6337452446169922
ACCURACY: 0.6962616822429907


In [61]:
clf = make_pipeline(MinMaxScaler(), GradientBoostingClassifier(random_state = 42))
clf.fit(X_train,y_train)
y_pred = clf.predict(X_val)
precision_recall = precision_recall_fscore_support(y_val, y_pred, average='macro')
print('F1 SCORE: ',f1_score(y_val, y_pred, average = 'macro'))
print('PRECISION:',precision_recall[0])
print('RECALL:',precision_recall[1])
print('ACCURACY:', sum(y_pred == y_val)/len(y_val))

F1 SCORE:  0.6483922876166766
PRECISION: 0.6633200839334152
RECALL: 0.6358695049702394
ACCURACY: 0.6873831775700935


In [62]:
clf = make_pipeline(StandardScaler(), GradientBoostingClassifier(random_state = 42))

#def gridsearch
params = {'gradientboostingclassifier__learning_rate' : [.25,.3,.35],#.285
          'gradientboostingclassifier__n_estimators' : [70,75,80],
          #'gradientboostingclassifier__n_iter_no_change':[2,5,10],
          'gradientboostingclassifier__max_depth':[3,4,5]
         }

search = GridSearchCV(clf,param_grid=params,scoring='f1_macro',
                                  n_jobs=-1,refit=True,verbose=1, pre_dispatch='10*n_jobs',
                                  return_train_score=True)
search.fit(X_train,y_train)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


GridSearchCV(estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('gradientboostingclassifier',
                                        GradientBoostingClassifier(random_state=42))]),
             n_jobs=-1,
             param_grid={'gradientboostingclassifier__learning_rate': [0.25,
                                                                       0.3,
                                                                       0.35],
                         'gradientboostingclassifier__max_depth': [3, 4, 5],
                         'gradientboostingclassifier__n_estimators': [70, 75,
                                                                      80]},
             pre_dispatch='10*n_jobs', return_train_score=True,
             scoring='f1_macro', verbose=1)

In [63]:
gb_params=search.best_params_
gb_params

{'gradientboostingclassifier__learning_rate': 0.25,
 'gradientboostingclassifier__max_depth': 3,
 'gradientboostingclassifier__n_estimators': 70}

In [64]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_val, y_pred)

array([[447, 209,  10,   1],
       [175, 776,  91,   2],
       [  4, 119, 205,  22],
       [  0,   4,  32,  43]])

In [65]:
best_one=search.best_estimator_
y_pred = best_one.predict(X_val)
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'), f1_score(y_val, y_pred, average = 'macro'))

0.6878504672897197 0.6301900067145451 0.6442553895245835


### One vs Rest Classifier (GB) - ORCO

In [66]:
clf=make_pipeline(StandardScaler(), GradientBoostingClassifier(learning_rate=gb_params['gradientboostingclassifier__learning_rate'],n_estimators=gb_params['gradientboostingclassifier__n_estimators'],max_depth=gb_params['gradientboostingclassifier__max_depth'],random_state=42))
classifier=ORC(clf)
classifier.fit(X_train, y_train)

OneVsRestClassifier(estimator=Pipeline(steps=[('standardscaler',
                                               StandardScaler()),
                                              ('gradientboostingclassifier',
                                               GradientBoostingClassifier(learning_rate=0.25,
                                                                          n_estimators=70,
                                                                          random_state=42))]))

In [67]:
y_pred = classifier.predict(X_val)
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'), f1_score(y_val, y_pred, average = 'macro'))

0.6911214953271028 0.6221435288986007 0.6456388047178951


### One vs One Classifier (GB) - ORCO2

In [68]:
clf=make_pipeline(StandardScaler(), GradientBoostingClassifier(learning_rate=gb_params['gradientboostingclassifier__learning_rate'],n_estimators=gb_params['gradientboostingclassifier__n_estimators'],max_depth=gb_params['gradientboostingclassifier__max_depth'],random_state=42))
classifier=OOC(clf)
classifier.fit(X_train, y_train)

OneVsOneClassifier(estimator=Pipeline(steps=[('standardscaler',
                                              StandardScaler()),
                                             ('gradientboostingclassifier',
                                              GradientBoostingClassifier(learning_rate=0.25,
                                                                         n_estimators=70,
                                                                         random_state=42))]))

In [69]:
y_pred = classifier.predict(X_val)
print(accuracy_score(y_val, y_pred), recall_score(y_val, y_pred, average='macro'), f1_score(y_val, y_pred, average = 'macro'))

0.6948598130841122 0.6459549550119454 0.6646510085722817
[CV 5/5; 2/9] START xgbclassifier__learning_rate=0.2, xgbclassifier__n_estimators=800
[CV 5/5; 2/9] END xgbclassifier__learning_rate=0.2, xgbclassifier__n_estimators=800;, score=(train=1.000, test=0.602) total time= 3.4min
[CV 2/5; 6/9] START xgbclassifier__learning_rate=0.3, xgbclassifier__n_estimators=1000
[CV 2/5; 6/9] END xgbclassifier__learning_rate=0.3, xgbclassifier__n_estimators=1000;, score=(train=1.000, test=0.613) total time= 3.5min
[CV 5/5; 9/9] START xgbclassifier__learning_rate=0.4, xgbclassifier__n_estimators=1000
[CV 5/5; 9/9] END xgbclassifier__learning_rate=0.4, xgbclassifier__n_estimators=1000;, score=(train=1.000, test=0.593) total time= 1.5min
[CV 1/5; 2/9] START xgbclassifier__learning_rate=0.2, xgbclassifier__n_estimators=800
[CV 1/5; 2/9] END xgbclassifier__learning_rate=0.2, xgbclassifier__n_estimators=800;, score=(train=1.000, test=0.619) total time= 3.3min
[CV 3/5; 5/9] START xgbclassifier__learning_rat