# CSI5386 Natural Language Processing
## Project - Automatic Classification of Poems by Themes
### Aaditya Suri, Ranjan Goyal, Paritosh Pal Singh

In [2]:
#import data
import pandas as pd
import numpy as np

In [3]:
import os
pardir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
topics = os.path.join(pardir, "Project", "data", "topics")
anger = os.path.join(topics, "anger")
animals = os.path.join(topics, "animal")
children = os.path.join(topics, "children")
father = os.path.join(topics, "father")
friend = os.path.join(topics, "friend")
river = os.path.join(topics, "river")
romance = os.path.join(topics, "romance")
sky = os.path.join(topics, "sky")
water = os.path.join(topics, "water")
winter = os.path.join(topics, "winter")

In [4]:
def fetch_poems(path):
    poems_arr = []
    for p in os.listdir(path):
        poem_path = os.path.join(path, p)
        f = open(poem_path)
        poem_text = ""
        for i in f:
            poem_text += i.strip() + "\t"
        poems_arr.append(poem_text)
        f.close()
    return poems_arr

In [5]:
p_anger = fetch_poems(anger)
p_animal = fetch_poems(animals)
p_children = fetch_poems(children)
p_father = fetch_poems(father)
p_friend = fetch_poems(friend)
p_river = fetch_poems(river)
p_romance = fetch_poems(romance)
p_sky = fetch_poems(sky)
p_water = fetch_poems(water)
p_winter = fetch_poems(winter)

In [6]:
data = []
p_array = [p_anger, p_animal, p_children, p_father, p_friend, p_river, p_romance, p_sky, p_water, p_winter]
data_labels = []
for i in range(0, len(p_array)):
    data.extend(p_array[i])
    data_labels.extend([i]*len(p_array[i]))
data = np.array(data)
data_labels = np.array(data_labels)

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, data_labels, test_size = 0.2)

In [8]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((797,), (200,), (797,), (200,))

In [9]:
# run this in conda terminal
# conda install -c conda-forge spacy
# python -m spacy download en_core_web_sm

In [10]:
def vectors(nlp, data):
    vectors = []
    for row in data:
        row = str(row)
        doc = nlp(row)
        vectors.append(doc.vector)
    return np.array(vectors)

In [11]:
import spacy
nlp = spacy.load('en_core_web_md')
train_vectors = vectors(nlp, X_train)

In [12]:
test_vectors = vectors(nlp, X_test)

In [13]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
def evaluate(y_true, y_pred):
    # print evaluation results for model
    # print confusion matrix
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    # accuracy
    print("Accuracy: ", accuracy_score(y_true, y_pred))
    # precision
    print("Precision: ", precision_score(y_true, y_pred, average = 'weighted'))
    # recall
    print("Recall ", recall_score(y_true, y_pred, average = 'weighted'))
    # f-measure
    print("F1: ", f1_score(y_true, y_pred, average = 'weighted'))

## Decision Tree Classifier

In [14]:
from sklearn.tree import DecisionTreeClassifier
dt_model = DecisionTreeClassifier()
dt_model.fit(train_vectors, y_train)
y_pred_dt = dt_model.predict(test_vectors)

In [15]:
evaluate(y_test, y_pred_dt)

Confusion Matrix:
[[ 7  1  1  0  2  1  3  0  0  0]
 [ 0  8  3  1  1  1  2  2  0  0]
 [ 2  4 10  0  1  1  3  0  2  1]
 [ 3  3  2  6  2  0  2  0  0  3]
 [ 2  4  1  6  5  0  3  1  0  0]
 [ 0  0  1  2  1  8  0  1  3  4]
 [ 1  2  1  3  1  1  5  2  1  1]
 [ 0  3  0  2  1  0  5  6  6  2]
 [ 1  1  1  0  2  4  1  0  8  1]
 [ 1  0  2  0  0  6  1  2  1  5]]
Accuracy:  0.34
Precision:  0.34959124453977397
Recall  0.34
F1:  0.3385940563380786


## Random Forest Classifier

In [16]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier(n_estimators = 200)
rf_model.fit(train_vectors, y_train)
y_pred_rf = rf_model.predict(test_vectors)

In [17]:
evaluate(y_test, y_pred_rf)

Confusion Matrix:
[[13  0  1  1  0  0  0  0  0  0]
 [ 1 14  1  0  1  0  1  0  0  0]
 [ 2  1 15  0  3  0  0  0  0  3]
 [ 0  0  3 13  2  0  1  0  0  2]
 [ 0  3  4  4  9  0  1  0  1  0]
 [ 1  1  0  0  1 10  1  0  5  1]
 [ 2  2  2  0  2  2  6  2  0  0]
 [ 1  0  1  1  1  2  1 14  1  3]
 [ 1  0  0  1  0  3  0  1 11  2]
 [ 0  1  1  1  0  0  0  0  0 15]]
Accuracy:  0.6
Precision:  0.6059265235968023
Recall  0.6
F1:  0.5921690083721202


## SVM Classifier

In [18]:
from sklearn import svm
svm_model = svm.SVC()
svm_model.fit(train_vectors, y_train)
y_pred_svm = svm_model.predict(test_vectors)

In [19]:
evaluate(y_test, y_pred_svm)

Confusion Matrix:
[[13  0  1  0  0  0  1  0  0  0]
 [ 0 12  2  1  1  0  2  0  0  0]
 [ 0  2 14  0  3  0  3  0  0  2]
 [ 0  1  2  9  7  0  0  0  0  2]
 [ 0  1  4  1 11  0  2  0  1  2]
 [ 1  0  0  1  3  5  1  0  1  8]
 [ 1  4  2  0  2  1  5  1  0  2]
 [ 1  0  2  0  2  3  2 11  1  3]
 [ 0  0  0  0  0  5  2  0  7  5]
 [ 0  0  1  0  1  0  2  0  0 14]]
Accuracy:  0.505
Precision:  0.5664763471177945
Recall  0.505
F1:  0.5064542730182225


## XGBoost Classifier

In [22]:
from xgboost import XGBClassifier
xg_model = XGBClassifier()
xg_model.fit(train_vectors, y_train)
y_pred_xg = xg_model.predict(test_vectors)



In [23]:
evaluate(y_test, y_pred_xg)

Confusion Matrix:
[[11  0  0  1  0  0  0  1  2  0]
 [ 1 13  0  0  2  0  1  1  0  0]
 [ 2  2 13  1  2  0  0  1  1  2]
 [ 0  2  3  9  6  0  1  0  0  0]
 [ 0  2  4  4  9  0  2  0  1  0]
 [ 1  2  0  0  2  8  1  1  4  1]
 [ 2  1  0  0  3  1  9  2  0  0]
 [ 0  0  3  1  1  0  1 14  2  3]
 [ 1  0  0  0  1  1  1  1 13  1]
 [ 0  2  1  0  0  2  0  0  0 13]]
Accuracy:  0.56
Precision:  0.5695434085841695
Recall  0.56
F1:  0.5571678533185801


## CatBoost Classifier

In [20]:
from catboost import CatBoostClassifier
cb_model = CatBoostClassifier()
cb_model.fit(train_vectors, y_train)
y_pred_cb = cb_model.predict(test_vectors)

Learning rate set to 0.078216
0:	learn: 2.2676639	total: 197ms	remaining: 3m 16s
1:	learn: 2.2408256	total: 339ms	remaining: 2m 49s
2:	learn: 2.2084886	total: 471ms	remaining: 2m 36s
3:	learn: 2.1721006	total: 610ms	remaining: 2m 31s
4:	learn: 2.1458795	total: 753ms	remaining: 2m 29s
5:	learn: 2.1115766	total: 899ms	remaining: 2m 28s
6:	learn: 2.0827590	total: 1.04s	remaining: 2m 27s
7:	learn: 2.0549665	total: 1.18s	remaining: 2m 25s
8:	learn: 2.0226124	total: 1.32s	remaining: 2m 25s
9:	learn: 1.9957879	total: 1.46s	remaining: 2m 24s
10:	learn: 1.9716602	total: 1.61s	remaining: 2m 24s
11:	learn: 1.9441094	total: 1.75s	remaining: 2m 24s
12:	learn: 1.9198009	total: 1.9s	remaining: 2m 24s
13:	learn: 1.8973747	total: 2.05s	remaining: 2m 24s
14:	learn: 1.8723057	total: 2.19s	remaining: 2m 24s
15:	learn: 1.8459118	total: 2.34s	remaining: 2m 23s
16:	learn: 1.8257906	total: 2.48s	remaining: 2m 23s
17:	learn: 1.8022903	total: 2.63s	remaining: 2m 23s
18:	learn: 1.7776916	total: 2.78s	remaining: 

158:	learn: 0.4821937	total: 23.1s	remaining: 2m 2s
159:	learn: 0.4786763	total: 23.3s	remaining: 2m 2s
160:	learn: 0.4744785	total: 23.4s	remaining: 2m 2s
161:	learn: 0.4706625	total: 23.6s	remaining: 2m 2s
162:	learn: 0.4690005	total: 23.7s	remaining: 2m 1s
163:	learn: 0.4651219	total: 23.9s	remaining: 2m 1s
164:	learn: 0.4618905	total: 24s	remaining: 2m 1s
165:	learn: 0.4590778	total: 24.2s	remaining: 2m 1s
166:	learn: 0.4565217	total: 24.3s	remaining: 2m 1s
167:	learn: 0.4535207	total: 24.5s	remaining: 2m 1s
168:	learn: 0.4508508	total: 24.6s	remaining: 2m
169:	learn: 0.4480228	total: 24.7s	remaining: 2m
170:	learn: 0.4441776	total: 24.9s	remaining: 2m
171:	learn: 0.4416290	total: 25s	remaining: 2m
172:	learn: 0.4389079	total: 25.2s	remaining: 2m
173:	learn: 0.4355825	total: 25.3s	remaining: 2m
174:	learn: 0.4322754	total: 25.5s	remaining: 2m
175:	learn: 0.4297672	total: 25.6s	remaining: 1m 59s
176:	learn: 0.4266531	total: 25.8s	remaining: 1m 59s
177:	learn: 0.4242374	total: 25.9s	

314:	learn: 0.2034670	total: 46s	remaining: 1m 40s
315:	learn: 0.2024604	total: 46.2s	remaining: 1m 39s
316:	learn: 0.2013891	total: 46.3s	remaining: 1m 39s
317:	learn: 0.2003298	total: 46.4s	remaining: 1m 39s
318:	learn: 0.1998416	total: 46.6s	remaining: 1m 39s
319:	learn: 0.1990343	total: 46.7s	remaining: 1m 39s
320:	learn: 0.1981688	total: 46.8s	remaining: 1m 39s
321:	learn: 0.1971966	total: 47s	remaining: 1m 38s
322:	learn: 0.1967249	total: 47.1s	remaining: 1m 38s
323:	learn: 0.1958921	total: 47.3s	remaining: 1m 38s
324:	learn: 0.1947643	total: 47.4s	remaining: 1m 38s
325:	learn: 0.1942406	total: 47.5s	remaining: 1m 38s
326:	learn: 0.1936833	total: 47.7s	remaining: 1m 38s
327:	learn: 0.1925475	total: 47.8s	remaining: 1m 37s
328:	learn: 0.1917756	total: 47.9s	remaining: 1m 37s
329:	learn: 0.1907643	total: 48.1s	remaining: 1m 37s
330:	learn: 0.1902843	total: 48.2s	remaining: 1m 37s
331:	learn: 0.1894755	total: 48.3s	remaining: 1m 37s
332:	learn: 0.1889207	total: 48.5s	remaining: 1m 3

470:	learn: 0.1153744	total: 1m 8s	remaining: 1m 17s
471:	learn: 0.1151997	total: 1m 9s	remaining: 1m 17s
472:	learn: 0.1148663	total: 1m 9s	remaining: 1m 17s
473:	learn: 0.1145697	total: 1m 9s	remaining: 1m 16s
474:	learn: 0.1141329	total: 1m 9s	remaining: 1m 16s
475:	learn: 0.1136711	total: 1m 9s	remaining: 1m 16s
476:	learn: 0.1135186	total: 1m 9s	remaining: 1m 16s
477:	learn: 0.1131342	total: 1m 9s	remaining: 1m 16s
478:	learn: 0.1128806	total: 1m 10s	remaining: 1m 16s
479:	learn: 0.1126617	total: 1m 10s	remaining: 1m 16s
480:	learn: 0.1124168	total: 1m 10s	remaining: 1m 16s
481:	learn: 0.1120873	total: 1m 10s	remaining: 1m 15s
482:	learn: 0.1119413	total: 1m 10s	remaining: 1m 15s
483:	learn: 0.1117272	total: 1m 10s	remaining: 1m 15s
484:	learn: 0.1113945	total: 1m 11s	remaining: 1m 15s
485:	learn: 0.1111197	total: 1m 11s	remaining: 1m 15s
486:	learn: 0.1106306	total: 1m 11s	remaining: 1m 15s
487:	learn: 0.1103421	total: 1m 11s	remaining: 1m 15s
488:	learn: 0.1100250	total: 1m 11s	

626:	learn: 0.0779589	total: 1m 31s	remaining: 54.7s
627:	learn: 0.0777817	total: 1m 32s	remaining: 54.5s
628:	learn: 0.0775875	total: 1m 32s	remaining: 54.4s
629:	learn: 0.0774039	total: 1m 32s	remaining: 54.3s
630:	learn: 0.0772319	total: 1m 32s	remaining: 54.1s
631:	learn: 0.0771584	total: 1m 32s	remaining: 54s
632:	learn: 0.0769692	total: 1m 32s	remaining: 53.8s
633:	learn: 0.0767263	total: 1m 32s	remaining: 53.7s
634:	learn: 0.0764932	total: 1m 33s	remaining: 53.5s
635:	learn: 0.0763624	total: 1m 33s	remaining: 53.3s
636:	learn: 0.0761475	total: 1m 33s	remaining: 53.2s
637:	learn: 0.0759806	total: 1m 33s	remaining: 53s
638:	learn: 0.0758096	total: 1m 33s	remaining: 52.9s
639:	learn: 0.0758044	total: 1m 33s	remaining: 52.7s
640:	learn: 0.0757462	total: 1m 33s	remaining: 52.6s
641:	learn: 0.0755678	total: 1m 34s	remaining: 52.4s
642:	learn: 0.0755168	total: 1m 34s	remaining: 52.3s
643:	learn: 0.0753164	total: 1m 34s	remaining: 52.1s
644:	learn: 0.0751614	total: 1m 34s	remaining: 52s

783:	learn: 0.0608610	total: 1m 54s	remaining: 31.6s
784:	learn: 0.0607287	total: 1m 54s	remaining: 31.5s
785:	learn: 0.0605901	total: 1m 55s	remaining: 31.3s
786:	learn: 0.0604966	total: 1m 55s	remaining: 31.2s
787:	learn: 0.0604347	total: 1m 55s	remaining: 31s
788:	learn: 0.0603351	total: 1m 55s	remaining: 30.9s
789:	learn: 0.0602215	total: 1m 55s	remaining: 30.8s
790:	learn: 0.0601294	total: 1m 55s	remaining: 30.6s
791:	learn: 0.0600380	total: 1m 56s	remaining: 30.5s
792:	learn: 0.0598776	total: 1m 56s	remaining: 30.3s
793:	learn: 0.0597517	total: 1m 56s	remaining: 30.2s
794:	learn: 0.0596802	total: 1m 56s	remaining: 30s
795:	learn: 0.0595532	total: 1m 56s	remaining: 29.9s
796:	learn: 0.0594408	total: 1m 56s	remaining: 29.8s
797:	learn: 0.0593238	total: 1m 56s	remaining: 29.6s
798:	learn: 0.0592393	total: 1m 57s	remaining: 29.5s
799:	learn: 0.0592091	total: 1m 57s	remaining: 29.3s
800:	learn: 0.0591077	total: 1m 57s	remaining: 29.2s
801:	learn: 0.0590326	total: 1m 57s	remaining: 29s

941:	learn: 0.0491749	total: 2m 17s	remaining: 8.44s
942:	learn: 0.0490876	total: 2m 17s	remaining: 8.29s
943:	learn: 0.0490340	total: 2m 17s	remaining: 8.15s
944:	learn: 0.0489782	total: 2m 17s	remaining: 8s
945:	learn: 0.0489211	total: 2m 17s	remaining: 7.86s
946:	learn: 0.0489193	total: 2m 17s	remaining: 7.71s
947:	learn: 0.0489025	total: 2m 17s	remaining: 7.56s
948:	learn: 0.0488381	total: 2m 18s	remaining: 7.42s
949:	learn: 0.0488340	total: 2m 18s	remaining: 7.27s
950:	learn: 0.0487688	total: 2m 18s	remaining: 7.13s
951:	learn: 0.0487027	total: 2m 18s	remaining: 6.98s
952:	learn: 0.0486480	total: 2m 18s	remaining: 6.83s
953:	learn: 0.0486367	total: 2m 18s	remaining: 6.69s
954:	learn: 0.0485341	total: 2m 18s	remaining: 6.54s
955:	learn: 0.0485003	total: 2m 18s	remaining: 6.4s
956:	learn: 0.0484979	total: 2m 19s	remaining: 6.25s
957:	learn: 0.0484071	total: 2m 19s	remaining: 6.11s
958:	learn: 0.0483139	total: 2m 19s	remaining: 5.96s
959:	learn: 0.0483121	total: 2m 19s	remaining: 5.8

In [24]:
evaluate(y_test, y_pred_cb)

Confusion Matrix:
[[14  0  0  1  0  0  0  0  0  0]
 [ 0 16  0  0  1  0  0  1  0  0]
 [ 1  2 16  0  1  0  0  1  0  3]
 [ 1  0  2 15  1  0  0  1  0  1]
 [ 0  1  2  6 10  0  1  0  1  1]
 [ 1  1  0  0  1 10  0  1  5  1]
 [ 1  3  0  0  3  0  9  1  1  0]
 [ 1  0  1  0  1  0  2 18  0  2]
 [ 1  0  0  0  0  2  0  0 14  2]
 [ 0  2  0  0  1  0  0  0  0 15]]
Accuracy:  0.685
Precision:  0.6970069709847742
Recall  0.685
F1:  0.6787651257326527


In [25]:
## Taking only 3 labels instead

In [26]:
data = []
p_array = [p_anger, p_sky, p_water]
data_labels = []
for i in range(0, len(p_array)):
    data.extend(p_array[i])
    data_labels.extend([i]*len(p_array[i]))
data = np.array(data)
data_labels = np.array(data_labels)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, data_labels, test_size = 0.2)

In [27]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((238,), (60,), (238,), (60,))

In [28]:
train_vectors = vectors(nlp, X_train)
test_vectors = vectors(nlp, X_test)

## Decision Tree Classifier

In [31]:
from sklearn.tree import DecisionTreeClassifier
dt_model = DecisionTreeClassifier()
dt_model.fit(train_vectors, y_train)
y_pred_dt = dt_model.predict(test_vectors)

In [32]:
evaluate(y_test, y_pred_dt)

Confusion Matrix:
[[14  5  3]
 [ 1 17  8]
 [ 2  2  8]]
Accuracy:  0.65
Precision:  0.6931157550739594
Recall  0.65
F1:  0.6611403363661429


## Random Forest Classifier

In [33]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier(n_estimators = 200)
rf_model.fit(train_vectors, y_train)
y_pred_rf = rf_model.predict(test_vectors)

In [34]:
evaluate(y_test, y_pred_rf)

Confusion Matrix:
[[21  1  0]
 [ 1 20  5]
 [ 0  1 11]]
Accuracy:  0.8666666666666667
Precision:  0.881439393939394
Recall  0.8666666666666667
F1:  0.8682539682539683


## SVM Classifier

In [35]:
from sklearn import svm
svm_model = svm.SVC()
svm_model.fit(train_vectors, y_train)
y_pred_svm = svm_model.predict(test_vectors)

In [36]:
evaluate(y_test, y_pred_svm)

Confusion Matrix:
[[21  1  0]
 [ 2 17  7]
 [ 0  0 12]]
Accuracy:  0.8333333333333334
Precision:  0.8703576574285956
Recall  0.8333333333333334
F1:  0.8319094167481265


## XGBoost Classifier

In [37]:
from xgboost import XGBClassifier
xg_model = XGBClassifier()
xg_model.fit(train_vectors, y_train)
y_pred_xg = xg_model.predict(test_vectors)





In [38]:
evaluate(y_test, y_pred_xg)

Confusion Matrix:
[[22  0  0]
 [ 1 21  4]
 [ 0  1 11]]
Accuracy:  0.9
Precision:  0.9110276679841898
Recall  0.9
F1:  0.9006481481481481


## CatBoost Classifier

In [39]:
cb_model = CatBoostClassifier()
cb_model.fit(train_vectors, y_train)
y_pred_cb = cb_model.predict(test_vectors)

Learning rate set to 0.073541
0:	learn: 1.0663043	total: 63.1ms	remaining: 1m 3s
1:	learn: 1.0351491	total: 113ms	remaining: 56.2s
2:	learn: 1.0046522	total: 160ms	remaining: 53.2s
3:	learn: 0.9827246	total: 206ms	remaining: 51.2s
4:	learn: 0.9549297	total: 257ms	remaining: 51.1s
5:	learn: 0.9267618	total: 307ms	remaining: 50.8s
6:	learn: 0.9024306	total: 353ms	remaining: 50.1s
7:	learn: 0.8825979	total: 400ms	remaining: 49.6s
8:	learn: 0.8561100	total: 457ms	remaining: 50.3s
9:	learn: 0.8374655	total: 504ms	remaining: 49.9s
10:	learn: 0.8172720	total: 553ms	remaining: 49.8s
11:	learn: 0.7956737	total: 599ms	remaining: 49.3s
12:	learn: 0.7775129	total: 642ms	remaining: 48.8s
13:	learn: 0.7633607	total: 687ms	remaining: 48.4s
14:	learn: 0.7496336	total: 735ms	remaining: 48.2s
15:	learn: 0.7354645	total: 778ms	remaining: 47.9s
16:	learn: 0.7209015	total: 824ms	remaining: 47.6s
17:	learn: 0.7086350	total: 868ms	remaining: 47.4s
18:	learn: 0.6939853	total: 919ms	remaining: 47.5s
19:	learn:

164:	learn: 0.0979296	total: 7.99s	remaining: 40.5s
165:	learn: 0.0969311	total: 8.04s	remaining: 40.4s
166:	learn: 0.0958206	total: 8.09s	remaining: 40.3s
167:	learn: 0.0949506	total: 8.13s	remaining: 40.3s
168:	learn: 0.0943050	total: 8.18s	remaining: 40.2s
169:	learn: 0.0932970	total: 8.22s	remaining: 40.2s
170:	learn: 0.0920522	total: 8.27s	remaining: 40.1s
171:	learn: 0.0912205	total: 8.31s	remaining: 40s
172:	learn: 0.0904723	total: 8.36s	remaining: 40s
173:	learn: 0.0894171	total: 8.4s	remaining: 39.9s
174:	learn: 0.0888360	total: 8.45s	remaining: 39.8s
175:	learn: 0.0880721	total: 8.5s	remaining: 39.8s
176:	learn: 0.0874804	total: 8.55s	remaining: 39.7s
177:	learn: 0.0868616	total: 8.59s	remaining: 39.7s
178:	learn: 0.0860455	total: 8.64s	remaining: 39.6s
179:	learn: 0.0852778	total: 8.68s	remaining: 39.6s
180:	learn: 0.0844922	total: 8.73s	remaining: 39.5s
181:	learn: 0.0837133	total: 8.78s	remaining: 39.4s
182:	learn: 0.0828618	total: 8.83s	remaining: 39.4s
183:	learn: 0.0823

327:	learn: 0.0345401	total: 15.6s	remaining: 31.9s
328:	learn: 0.0343557	total: 15.6s	remaining: 31.8s
329:	learn: 0.0342169	total: 15.6s	remaining: 31.8s
330:	learn: 0.0340848	total: 15.7s	remaining: 31.7s
331:	learn: 0.0339541	total: 15.7s	remaining: 31.7s
332:	learn: 0.0338288	total: 15.8s	remaining: 31.6s
333:	learn: 0.0337078	total: 15.8s	remaining: 31.6s
334:	learn: 0.0335533	total: 15.9s	remaining: 31.5s
335:	learn: 0.0334031	total: 15.9s	remaining: 31.5s
336:	learn: 0.0332879	total: 16s	remaining: 31.4s
337:	learn: 0.0331721	total: 16s	remaining: 31.4s
338:	learn: 0.0330251	total: 16.1s	remaining: 31.3s
339:	learn: 0.0329127	total: 16.1s	remaining: 31.2s
340:	learn: 0.0327556	total: 16.1s	remaining: 31.2s
341:	learn: 0.0326490	total: 16.2s	remaining: 31.1s
342:	learn: 0.0325308	total: 16.2s	remaining: 31.1s
343:	learn: 0.0323580	total: 16.3s	remaining: 31s
344:	learn: 0.0322586	total: 16.3s	remaining: 31s
345:	learn: 0.0321601	total: 16.4s	remaining: 30.9s
346:	learn: 0.032053

486:	learn: 0.0199285	total: 22.9s	remaining: 24.1s
487:	learn: 0.0198773	total: 22.9s	remaining: 24.1s
488:	learn: 0.0198422	total: 23s	remaining: 24s
489:	learn: 0.0197869	total: 23s	remaining: 24s
490:	learn: 0.0197401	total: 23.1s	remaining: 23.9s
491:	learn: 0.0196856	total: 23.1s	remaining: 23.9s
492:	learn: 0.0196387	total: 23.2s	remaining: 23.8s
493:	learn: 0.0195653	total: 23.2s	remaining: 23.8s
494:	learn: 0.0195217	total: 23.3s	remaining: 23.7s
495:	learn: 0.0194714	total: 23.3s	remaining: 23.7s
496:	learn: 0.0194048	total: 23.4s	remaining: 23.6s
497:	learn: 0.0193591	total: 23.4s	remaining: 23.6s
498:	learn: 0.0192848	total: 23.4s	remaining: 23.5s
499:	learn: 0.0192342	total: 23.5s	remaining: 23.5s
500:	learn: 0.0191846	total: 23.5s	remaining: 23.4s
501:	learn: 0.0191359	total: 23.6s	remaining: 23.4s
502:	learn: 0.0190926	total: 23.6s	remaining: 23.3s
503:	learn: 0.0190373	total: 23.7s	remaining: 23.3s
504:	learn: 0.0189881	total: 23.7s	remaining: 23.2s
505:	learn: 0.018929

649:	learn: 0.0135467	total: 30.7s	remaining: 16.5s
650:	learn: 0.0135239	total: 30.7s	remaining: 16.5s
651:	learn: 0.0134981	total: 30.7s	remaining: 16.4s
652:	learn: 0.0134736	total: 30.8s	remaining: 16.4s
653:	learn: 0.0134514	total: 30.8s	remaining: 16.3s
654:	learn: 0.0134233	total: 30.9s	remaining: 16.3s
655:	learn: 0.0133852	total: 30.9s	remaining: 16.2s
656:	learn: 0.0133549	total: 31s	remaining: 16.2s
657:	learn: 0.0133191	total: 31s	remaining: 16.1s
658:	learn: 0.0132969	total: 31.1s	remaining: 16.1s
659:	learn: 0.0132702	total: 31.1s	remaining: 16s
660:	learn: 0.0132365	total: 31.2s	remaining: 16s
661:	learn: 0.0131908	total: 31.2s	remaining: 15.9s
662:	learn: 0.0131684	total: 31.2s	remaining: 15.9s
663:	learn: 0.0131408	total: 31.3s	remaining: 15.8s
664:	learn: 0.0131214	total: 31.3s	remaining: 15.8s
665:	learn: 0.0130943	total: 31.4s	remaining: 15.7s
666:	learn: 0.0130690	total: 31.4s	remaining: 15.7s
667:	learn: 0.0130333	total: 31.5s	remaining: 15.6s
668:	learn: 0.013002

808:	learn: 0.0101771	total: 38s	remaining: 8.96s
809:	learn: 0.0101634	total: 38s	remaining: 8.92s
810:	learn: 0.0101507	total: 38.1s	remaining: 8.87s
811:	learn: 0.0101397	total: 38.1s	remaining: 8.82s
812:	learn: 0.0101286	total: 38.1s	remaining: 8.78s
813:	learn: 0.0101141	total: 38.2s	remaining: 8.73s
814:	learn: 0.0101015	total: 38.2s	remaining: 8.68s
815:	learn: 0.0100911	total: 38.3s	remaining: 8.63s
816:	learn: 0.0100781	total: 38.3s	remaining: 8.59s
817:	learn: 0.0100658	total: 38.4s	remaining: 8.54s
818:	learn: 0.0100517	total: 38.4s	remaining: 8.49s
819:	learn: 0.0100246	total: 38.5s	remaining: 8.45s
820:	learn: 0.0100056	total: 38.5s	remaining: 8.4s
821:	learn: 0.0099944	total: 38.6s	remaining: 8.35s
822:	learn: 0.0099779	total: 38.6s	remaining: 8.31s
823:	learn: 0.0099655	total: 38.7s	remaining: 8.26s
824:	learn: 0.0099467	total: 38.7s	remaining: 8.21s
825:	learn: 0.0099319	total: 38.8s	remaining: 8.17s
826:	learn: 0.0099132	total: 38.8s	remaining: 8.12s
827:	learn: 0.009

970:	learn: 0.0081312	total: 45.8s	remaining: 1.37s
971:	learn: 0.0081199	total: 45.9s	remaining: 1.32s
972:	learn: 0.0081119	total: 45.9s	remaining: 1.27s
973:	learn: 0.0081040	total: 46s	remaining: 1.23s
974:	learn: 0.0080946	total: 46.1s	remaining: 1.18s
975:	learn: 0.0080836	total: 46.1s	remaining: 1.13s
976:	learn: 0.0080727	total: 46.2s	remaining: 1.09s
977:	learn: 0.0080631	total: 46.2s	remaining: 1.04s
978:	learn: 0.0080541	total: 46.3s	remaining: 993ms
979:	learn: 0.0080440	total: 46.4s	remaining: 946ms
980:	learn: 0.0080370	total: 46.4s	remaining: 899ms
981:	learn: 0.0080219	total: 46.5s	remaining: 851ms
982:	learn: 0.0080150	total: 46.5s	remaining: 804ms
983:	learn: 0.0080047	total: 46.6s	remaining: 757ms
984:	learn: 0.0079970	total: 46.6s	remaining: 710ms
985:	learn: 0.0079863	total: 46.7s	remaining: 663ms
986:	learn: 0.0079740	total: 46.8s	remaining: 616ms
987:	learn: 0.0079666	total: 46.8s	remaining: 569ms
988:	learn: 0.0079573	total: 46.9s	remaining: 521ms
989:	learn: 0.

In [40]:
evaluate(y_test, y_pred_cb)

Confusion Matrix:
[[22  0  0]
 [ 0 21  5]
 [ 0  1 11]]
Accuracy:  0.9
Precision:  0.9178030303030303
Recall  0.9
F1:  0.9029761904761905
