# CSI5386 Natural Language Processing
## Project - Automatic Classification of Poems by Themes
### Aaditya Suri, Ranjan Goyal, Paritosh Pal Singh

In [4]:
#import data
import pandas as pd
import numpy as np

In [25]:
import os
pardir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
topics = os.path.join(pardir, "Project", "data", "topics")
anger = os.path.join(topics, "anger")
animals = os.path.join(topics, "animal")
children = os.path.join(topics, "children")
father = os.path.join(topics, "father")
friend = os.path.join(topics, "friend")
river = os.path.join(topics, "river")
romance = os.path.join(topics, "romance")
sky = os.path.join(topics, "sky")
water = os.path.join(topics, "water")
winter = os.path.join(topics, "winter")

In [28]:
def fetch_poems(path):
    poems_arr = []
    for p in os.listdir(path):
        poem_path = os.path.join(path, p)
        f = open(poem_path)
        poem_text = ""
        for i in f:
            poem_text += i.strip() + "\t"
        poems_arr.append(poem_text)
        f.close()
    return poems_arr

In [29]:
p_anger = fetch_poems(anger)
p_animal = fetch_poems(animals)
p_children = fetch_poems(children)
p_father = fetch_poems(father)
p_friend = fetch_poems(friend)
p_river = fetch_poems(river)
p_romance = fetch_poems(romance)
p_sky = fetch_poems(sky)
p_water = fetch_poems(water)
p_winter = fetch_poems(winter)

In [30]:
data = []
p_array = [p_anger, p_animal, p_children, p_father, p_friend, p_river, p_romance, p_sky, p_water, p_winter]
data_labels = []
for i in range(0, len(p_array)):
    data.extend(p_array[i])
    data_labels.extend([i]*len(p_array[i]))
data = np.array(data)
data_labels = np.array(data_labels)

In [90]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, data_labels, test_size = 0.2)

In [91]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((797,), (200,), (797,), (200,))

In [92]:
# run this in conda terminal
# conda install -c conda-forge spacy
# python -m spacy download en_core_web_sm

In [93]:
def vectors(nlp, data):
    vectors = []
    for row in data:
        row = str(row)
        doc = nlp(row)
        vectors.append(doc.vector)
    return np.array(vectors)

In [94]:
import spacy
nlp = spacy.load('en_core_web_md')
train_vectors = vectors(nlp, X_train)

In [95]:
test_vectors = vectors(nlp, X_test)

## Random Forest Classifier

In [96]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
def evaluate(y_true, y_pred):
    # print evaluation results for model
    # print confusion matrix
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    # accuracy
    print("Accuracy: ", accuracy_score(y_true, y_pred))
    # precision
    print("Precision: ", precision_score(y_true, y_pred, average = 'weighted'))
    # recall
    print("Recall ", recall_score(y_true, y_pred, average = 'weighted'))
    # f-measure
    print("F1: ", f1_score(y_true, y_pred, average = 'weighted'))

In [97]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier(n_estimators = 200)
rf_model.fit(train_vectors, y_train)
y_pred_rf = rf_model.predict(test_vectors)

In [98]:
evaluate(y_test, y_pred_rf)

Confusion Matrix:
[[20  2  1  0  0  0  0  0  0  0]
 [ 4 15  1  0  1  0  3  0  0  0]
 [ 1  2 13  3  0  0  1  0  0  2]
 [ 1  0  2 10  4  0  0  0  0  1]
 [ 2  0  2  1  7  3  1  1  1  1]
 [ 1  0  0  0  0 14  1  2  1  2]
 [ 0  0  0  2  3  1  8  2  0  1]
 [ 0  0  0  0  1  1  1  9  0  2]
 [ 1  0  1  1  3  4  0  1 10  2]
 [ 0  0  0  0  2  1  1  2  0 13]]
Accuracy:  0.595
Precision:  0.6156118421052632
Recall  0.595
F1:  0.5936339546017354


## SVM Classifier

In [99]:
from sklearn import svm
svm_model = svm.SVC()
svm_model.fit(train_vectors, y_train)
y_pred_svm = svm_model.predict(test_vectors)

In [100]:
evaluate(y_test, y_pred_svm)

Confusion Matrix:
[[18  2  1  0  1  0  0  0  0  1]
 [ 3 11  2  2  3  0  3  0  0  0]
 [ 0  0 17  0  2  0  1  0  0  2]
 [ 0  0  1  7  9  0  0  0  0  1]
 [ 1  0  2  1  8  3  2  1  0  1]
 [ 1  0  0  0  1 12  0  2  0  5]
 [ 0  0  0  1  4  1  8  1  0  2]
 [ 0  1  0  0  1  0  2  6  0  4]
 [ 0  0  1  0  3  4  2  1  6  6]
 [ 0  0  0  0  2  1  1  1  0 14]]
Accuracy:  0.535
Precision:  0.6245619675302337
Recall  0.535
F1:  0.5389408665273593


## CatBoost Classifier

In [101]:
from catboost import CatBoostClassifier
cb_model = CatBoostClassifier()
cb_model.fit(train_vectors, y_train)
y_pred_cb = cb_model.predict(test_vectors)

Learning rate set to 0.078216
0:	learn: 2.2676887	total: 130ms	remaining: 2m 10s
1:	learn: 2.2363092	total: 265ms	remaining: 2m 12s
2:	learn: 2.2009664	total: 397ms	remaining: 2m 11s
3:	learn: 2.1636493	total: 530ms	remaining: 2m 11s
4:	learn: 2.1376829	total: 657ms	remaining: 2m 10s
5:	learn: 2.1093778	total: 797ms	remaining: 2m 12s
6:	learn: 2.0815135	total: 931ms	remaining: 2m 12s
7:	learn: 2.0526458	total: 1.06s	remaining: 2m 11s
8:	learn: 2.0229530	total: 1.19s	remaining: 2m 10s
9:	learn: 1.9936711	total: 1.32s	remaining: 2m 10s
10:	learn: 1.9682733	total: 1.47s	remaining: 2m 12s
11:	learn: 1.9407877	total: 1.63s	remaining: 2m 13s
12:	learn: 1.9205791	total: 1.77s	remaining: 2m 14s
13:	learn: 1.8966448	total: 1.94s	remaining: 2m 16s
14:	learn: 1.8700209	total: 2.1s	remaining: 2m 17s
15:	learn: 1.8453529	total: 2.24s	remaining: 2m 17s
16:	learn: 1.8207772	total: 2.38s	remaining: 2m 17s
17:	learn: 1.7959569	total: 2.53s	remaining: 2m 18s
18:	learn: 1.7748922	total: 2.67s	remaining: 

159:	learn: 0.4725023	total: 23.1s	remaining: 2m 1s
160:	learn: 0.4685324	total: 23.3s	remaining: 2m 1s
161:	learn: 0.4654748	total: 23.4s	remaining: 2m 1s
162:	learn: 0.4615867	total: 23.5s	remaining: 2m
163:	learn: 0.4575537	total: 23.7s	remaining: 2m
164:	learn: 0.4543087	total: 23.8s	remaining: 2m
165:	learn: 0.4516844	total: 23.9s	remaining: 2m
166:	learn: 0.4480558	total: 24.1s	remaining: 2m
167:	learn: 0.4449699	total: 24.2s	remaining: 1m 59s
168:	learn: 0.4414010	total: 24.4s	remaining: 1m 59s
169:	learn: 0.4392561	total: 24.5s	remaining: 1m 59s
170:	learn: 0.4361948	total: 24.6s	remaining: 1m 59s
171:	learn: 0.4316876	total: 24.8s	remaining: 1m 59s
172:	learn: 0.4283400	total: 24.9s	remaining: 1m 58s
173:	learn: 0.4248920	total: 25s	remaining: 1m 58s
174:	learn: 0.4227611	total: 25.2s	remaining: 1m 58s
175:	learn: 0.4188546	total: 25.3s	remaining: 1m 58s
176:	learn: 0.4154432	total: 25.4s	remaining: 1m 58s
177:	learn: 0.4129723	total: 25.6s	remaining: 1m 58s
178:	learn: 0.4093

315:	learn: 0.1941246	total: 45.9s	remaining: 1m 39s
316:	learn: 0.1933861	total: 46s	remaining: 1m 39s
317:	learn: 0.1923208	total: 46.1s	remaining: 1m 38s
318:	learn: 0.1913711	total: 46.3s	remaining: 1m 38s
319:	learn: 0.1906072	total: 46.4s	remaining: 1m 38s
320:	learn: 0.1898614	total: 46.6s	remaining: 1m 38s
321:	learn: 0.1890851	total: 46.7s	remaining: 1m 38s
322:	learn: 0.1882602	total: 46.9s	remaining: 1m 38s
323:	learn: 0.1874566	total: 47s	remaining: 1m 38s
324:	learn: 0.1864790	total: 47.2s	remaining: 1m 37s
325:	learn: 0.1854285	total: 47.3s	remaining: 1m 37s
326:	learn: 0.1847749	total: 47.5s	remaining: 1m 37s
327:	learn: 0.1842233	total: 47.6s	remaining: 1m 37s
328:	learn: 0.1832132	total: 47.7s	remaining: 1m 37s
329:	learn: 0.1824049	total: 47.9s	remaining: 1m 37s
330:	learn: 0.1815909	total: 48s	remaining: 1m 37s
331:	learn: 0.1806537	total: 48.2s	remaining: 1m 36s
332:	learn: 0.1800414	total: 48.3s	remaining: 1m 36s
333:	learn: 0.1794306	total: 48.5s	remaining: 1m 36s

471:	learn: 0.1091946	total: 1m 7s	remaining: 1m 15s
472:	learn: 0.1087778	total: 1m 7s	remaining: 1m 15s
473:	learn: 0.1084360	total: 1m 8s	remaining: 1m 15s
474:	learn: 0.1080928	total: 1m 8s	remaining: 1m 15s
475:	learn: 0.1077454	total: 1m 8s	remaining: 1m 15s
476:	learn: 0.1073291	total: 1m 8s	remaining: 1m 14s
477:	learn: 0.1070094	total: 1m 8s	remaining: 1m 14s
478:	learn: 0.1067567	total: 1m 8s	remaining: 1m 14s
479:	learn: 0.1063527	total: 1m 8s	remaining: 1m 14s
480:	learn: 0.1059925	total: 1m 8s	remaining: 1m 14s
481:	learn: 0.1057207	total: 1m 9s	remaining: 1m 14s
482:	learn: 0.1053539	total: 1m 9s	remaining: 1m 14s
483:	learn: 0.1051913	total: 1m 9s	remaining: 1m 13s
484:	learn: 0.1049275	total: 1m 9s	remaining: 1m 13s
485:	learn: 0.1046700	total: 1m 9s	remaining: 1m 13s
486:	learn: 0.1043218	total: 1m 9s	remaining: 1m 13s
487:	learn: 0.1040950	total: 1m 9s	remaining: 1m 13s
488:	learn: 0.1040436	total: 1m 9s	remaining: 1m 13s
489:	learn: 0.1037073	total: 1m 10s	remaining:

627:	learn: 0.0745429	total: 1m 29s	remaining: 53.1s
628:	learn: 0.0743647	total: 1m 29s	remaining: 53s
629:	learn: 0.0741772	total: 1m 30s	remaining: 52.9s
630:	learn: 0.0740163	total: 1m 30s	remaining: 52.7s
631:	learn: 0.0738866	total: 1m 30s	remaining: 52.6s
632:	learn: 0.0737376	total: 1m 30s	remaining: 52.4s
633:	learn: 0.0735921	total: 1m 30s	remaining: 52.3s
634:	learn: 0.0734233	total: 1m 30s	remaining: 52.2s
635:	learn: 0.0732875	total: 1m 30s	remaining: 52s
636:	learn: 0.0731423	total: 1m 31s	remaining: 51.9s
637:	learn: 0.0729782	total: 1m 31s	remaining: 51.7s
638:	learn: 0.0727931	total: 1m 31s	remaining: 51.6s
639:	learn: 0.0726266	total: 1m 31s	remaining: 51.5s
640:	learn: 0.0724804	total: 1m 31s	remaining: 51.3s
641:	learn: 0.0724518	total: 1m 31s	remaining: 51.2s
642:	learn: 0.0724214	total: 1m 31s	remaining: 51.1s
643:	learn: 0.0722102	total: 1m 32s	remaining: 50.9s
644:	learn: 0.0720398	total: 1m 32s	remaining: 50.8s
645:	learn: 0.0718422	total: 1m 32s	remaining: 50.

783:	learn: 0.0572713	total: 1m 52s	remaining: 30.9s
784:	learn: 0.0572448	total: 1m 52s	remaining: 30.7s
785:	learn: 0.0572199	total: 1m 52s	remaining: 30.6s
786:	learn: 0.0571320	total: 1m 52s	remaining: 30.4s
787:	learn: 0.0570678	total: 1m 52s	remaining: 30.3s
788:	learn: 0.0569825	total: 1m 52s	remaining: 30.2s
789:	learn: 0.0569047	total: 1m 52s	remaining: 30s
790:	learn: 0.0567899	total: 1m 53s	remaining: 29.9s
791:	learn: 0.0566678	total: 1m 53s	remaining: 29.7s
792:	learn: 0.0565911	total: 1m 53s	remaining: 29.6s
793:	learn: 0.0565735	total: 1m 53s	remaining: 29.5s
794:	learn: 0.0565589	total: 1m 53s	remaining: 29.3s
795:	learn: 0.0564501	total: 1m 53s	remaining: 29.2s
796:	learn: 0.0563226	total: 1m 54s	remaining: 29s
797:	learn: 0.0562006	total: 1m 54s	remaining: 28.9s
798:	learn: 0.0561791	total: 1m 54s	remaining: 28.8s
799:	learn: 0.0560361	total: 1m 54s	remaining: 28.6s
800:	learn: 0.0560039	total: 1m 54s	remaining: 28.5s
801:	learn: 0.0559990	total: 1m 54s	remaining: 28.

941:	learn: 0.0466474	total: 2m 15s	remaining: 8.37s
942:	learn: 0.0466425	total: 2m 16s	remaining: 8.22s
943:	learn: 0.0466325	total: 2m 16s	remaining: 8.08s
944:	learn: 0.0465720	total: 2m 16s	remaining: 7.94s
945:	learn: 0.0465361	total: 2m 16s	remaining: 7.79s
946:	learn: 0.0465054	total: 2m 16s	remaining: 7.65s
947:	learn: 0.0464977	total: 2m 16s	remaining: 7.5s
948:	learn: 0.0464161	total: 2m 16s	remaining: 7.36s
949:	learn: 0.0463286	total: 2m 17s	remaining: 7.22s
950:	learn: 0.0462613	total: 2m 17s	remaining: 7.07s
951:	learn: 0.0461924	total: 2m 17s	remaining: 6.93s
952:	learn: 0.0461411	total: 2m 17s	remaining: 6.78s
953:	learn: 0.0460729	total: 2m 17s	remaining: 6.64s
954:	learn: 0.0460041	total: 2m 17s	remaining: 6.5s
955:	learn: 0.0459665	total: 2m 18s	remaining: 6.35s
956:	learn: 0.0459250	total: 2m 18s	remaining: 6.21s
957:	learn: 0.0458490	total: 2m 18s	remaining: 6.06s
958:	learn: 0.0457706	total: 2m 18s	remaining: 5.92s
959:	learn: 0.0456918	total: 2m 18s	remaining: 5

In [102]:
evaluate(y_test, y_pred_cb)

Confusion Matrix:
[[20  2  0  0  1  0  0  0  0  0]
 [ 3 17  0  0  2  0  2  0  0  0]
 [ 2  4 12  2  0  0  0  0  0  2]
 [ 1  0  0 13  3  0  0  0  0  1]
 [ 2  0  1  2  7  2  1  1  2  1]
 [ 1  0  0  0  0 15  1  2  2  0]
 [ 0  0  0  1  3  0 11  1  0  1]
 [ 0  2  0  0  0  0  0  9  0  3]
 [ 2  0  1  1  1  3  0  1 13  1]
 [ 0  1  0  1  0  0  1  2  0 14]]
Accuracy:  0.655
Precision:  0.666888211620283
Recall  0.655
F1:  0.6522211786240669


In [103]:
## Taking only 3 labels instead

In [109]:
data = []
p_array = [p_anger, p_sky, p_water]
data_labels = []
for i in range(0, len(p_array)):
    data.extend(p_array[i])
    data_labels.extend([i]*len(p_array[i]))
data = np.array(data)
data_labels = np.array(data_labels)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, data_labels, test_size = 0.2)

In [110]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((238,), (60,), (238,), (60,))

In [111]:
train_vectors = vectors(nlp, X_train)
test_vectors = vectors(nlp, X_test)

In [112]:
cb_model = CatBoostClassifier()
cb_model.fit(train_vectors, y_train)
y_pred_cb = cb_model.predict(test_vectors)

Learning rate set to 0.073541
0:	learn: 1.0714889	total: 58.6ms	remaining: 58.5s
1:	learn: 1.0424510	total: 104ms	remaining: 52.1s
2:	learn: 1.0117271	total: 160ms	remaining: 53.1s
3:	learn: 0.9871531	total: 209ms	remaining: 51.9s
4:	learn: 0.9649110	total: 259ms	remaining: 51.6s
5:	learn: 0.9383411	total: 307ms	remaining: 50.8s
6:	learn: 0.9159394	total: 356ms	remaining: 50.4s
7:	learn: 0.8884664	total: 407ms	remaining: 50.4s
8:	learn: 0.8675221	total: 456ms	remaining: 50.2s
9:	learn: 0.8463423	total: 502ms	remaining: 49.7s
10:	learn: 0.8287984	total: 548ms	remaining: 49.3s
11:	learn: 0.8071475	total: 597ms	remaining: 49.1s
12:	learn: 0.7866946	total: 646ms	remaining: 49s
13:	learn: 0.7708619	total: 700ms	remaining: 49.3s
14:	learn: 0.7589102	total: 753ms	remaining: 49.5s
15:	learn: 0.7455580	total: 805ms	remaining: 49.5s
16:	learn: 0.7306769	total: 857ms	remaining: 49.6s
17:	learn: 0.7160676	total: 911ms	remaining: 49.7s
18:	learn: 0.6984614	total: 969ms	remaining: 50s
19:	learn: 0.6

161:	learn: 0.0966272	total: 7.92s	remaining: 41s
162:	learn: 0.0957513	total: 7.97s	remaining: 40.9s
163:	learn: 0.0947876	total: 8.02s	remaining: 40.9s
164:	learn: 0.0938918	total: 8.06s	remaining: 40.8s
165:	learn: 0.0929123	total: 8.11s	remaining: 40.8s
166:	learn: 0.0920822	total: 8.16s	remaining: 40.7s
167:	learn: 0.0910511	total: 8.21s	remaining: 40.6s
168:	learn: 0.0901589	total: 8.25s	remaining: 40.6s
169:	learn: 0.0893630	total: 8.3s	remaining: 40.5s
170:	learn: 0.0883408	total: 8.34s	remaining: 40.4s
171:	learn: 0.0876499	total: 8.38s	remaining: 40.4s
172:	learn: 0.0868241	total: 8.43s	remaining: 40.3s
173:	learn: 0.0861442	total: 8.48s	remaining: 40.2s
174:	learn: 0.0852458	total: 8.52s	remaining: 40.2s
175:	learn: 0.0846813	total: 8.56s	remaining: 40.1s
176:	learn: 0.0839442	total: 8.61s	remaining: 40s
177:	learn: 0.0831284	total: 8.66s	remaining: 40s
178:	learn: 0.0826118	total: 8.71s	remaining: 39.9s
179:	learn: 0.0817588	total: 8.75s	remaining: 39.9s
180:	learn: 0.08110

321:	learn: 0.0348777	total: 15.2s	remaining: 32.1s
322:	learn: 0.0347615	total: 15.3s	remaining: 32s
323:	learn: 0.0346299	total: 15.3s	remaining: 32s
324:	learn: 0.0344898	total: 15.4s	remaining: 31.9s
325:	learn: 0.0343876	total: 15.4s	remaining: 31.9s
326:	learn: 0.0342067	total: 15.5s	remaining: 31.8s
327:	learn: 0.0340992	total: 15.5s	remaining: 31.8s
328:	learn: 0.0339768	total: 15.6s	remaining: 31.7s
329:	learn: 0.0338387	total: 15.6s	remaining: 31.7s
330:	learn: 0.0337083	total: 15.7s	remaining: 31.7s
331:	learn: 0.0336053	total: 15.7s	remaining: 31.6s
332:	learn: 0.0334512	total: 15.8s	remaining: 31.6s
333:	learn: 0.0333577	total: 15.8s	remaining: 31.5s
334:	learn: 0.0332258	total: 15.9s	remaining: 31.5s
335:	learn: 0.0330876	total: 15.9s	remaining: 31.4s
336:	learn: 0.0329097	total: 16s	remaining: 31.4s
337:	learn: 0.0327703	total: 16s	remaining: 31.3s
338:	learn: 0.0326478	total: 16.1s	remaining: 31.3s
339:	learn: 0.0325184	total: 16.1s	remaining: 31.3s
340:	learn: 0.032383

483:	learn: 0.0200852	total: 23s	remaining: 24.5s
484:	learn: 0.0199933	total: 23.1s	remaining: 24.5s
485:	learn: 0.0199257	total: 23.1s	remaining: 24.4s
486:	learn: 0.0198815	total: 23.2s	remaining: 24.4s
487:	learn: 0.0198149	total: 23.2s	remaining: 24.3s
488:	learn: 0.0197654	total: 23.2s	remaining: 24.3s
489:	learn: 0.0196938	total: 23.3s	remaining: 24.2s
490:	learn: 0.0196240	total: 23.3s	remaining: 24.2s
491:	learn: 0.0195471	total: 23.4s	remaining: 24.2s
492:	learn: 0.0194978	total: 23.4s	remaining: 24.1s
493:	learn: 0.0194338	total: 23.5s	remaining: 24.1s
494:	learn: 0.0193811	total: 23.5s	remaining: 24s
495:	learn: 0.0193206	total: 23.6s	remaining: 24s
496:	learn: 0.0192741	total: 23.6s	remaining: 23.9s
497:	learn: 0.0192254	total: 23.7s	remaining: 23.9s
498:	learn: 0.0191794	total: 23.7s	remaining: 23.8s
499:	learn: 0.0191432	total: 23.8s	remaining: 23.8s
500:	learn: 0.0191037	total: 23.8s	remaining: 23.7s
501:	learn: 0.0190547	total: 23.9s	remaining: 23.7s
502:	learn: 0.0190

644:	learn: 0.0138900	total: 30.7s	remaining: 16.9s
645:	learn: 0.0138707	total: 30.7s	remaining: 16.8s
646:	learn: 0.0138441	total: 30.8s	remaining: 16.8s
647:	learn: 0.0138182	total: 30.8s	remaining: 16.7s
648:	learn: 0.0137715	total: 30.9s	remaining: 16.7s
649:	learn: 0.0137470	total: 30.9s	remaining: 16.6s
650:	learn: 0.0137242	total: 31s	remaining: 16.6s
651:	learn: 0.0136975	total: 31s	remaining: 16.6s
652:	learn: 0.0136773	total: 31.1s	remaining: 16.5s
653:	learn: 0.0136525	total: 31.1s	remaining: 16.5s
654:	learn: 0.0136321	total: 31.2s	remaining: 16.4s
655:	learn: 0.0136122	total: 31.2s	remaining: 16.4s
656:	learn: 0.0135909	total: 31.2s	remaining: 16.3s
657:	learn: 0.0135694	total: 31.3s	remaining: 16.3s
658:	learn: 0.0135496	total: 31.3s	remaining: 16.2s
659:	learn: 0.0135280	total: 31.4s	remaining: 16.2s
660:	learn: 0.0135005	total: 31.4s	remaining: 16.1s
661:	learn: 0.0134757	total: 31.5s	remaining: 16.1s
662:	learn: 0.0134503	total: 31.5s	remaining: 16s
663:	learn: 0.0134

807:	learn: 0.0104129	total: 38.4s	remaining: 9.13s
808:	learn: 0.0104002	total: 38.5s	remaining: 9.08s
809:	learn: 0.0103796	total: 38.5s	remaining: 9.03s
810:	learn: 0.0103655	total: 38.6s	remaining: 8.99s
811:	learn: 0.0103538	total: 38.6s	remaining: 8.94s
812:	learn: 0.0103383	total: 38.7s	remaining: 8.89s
813:	learn: 0.0103170	total: 38.7s	remaining: 8.84s
814:	learn: 0.0103040	total: 38.7s	remaining: 8.79s
815:	learn: 0.0102822	total: 38.8s	remaining: 8.75s
816:	learn: 0.0102654	total: 38.8s	remaining: 8.7s
817:	learn: 0.0102547	total: 38.9s	remaining: 8.65s
818:	learn: 0.0102434	total: 38.9s	remaining: 8.61s
819:	learn: 0.0102267	total: 39s	remaining: 8.56s
820:	learn: 0.0102097	total: 39s	remaining: 8.51s
821:	learn: 0.0101880	total: 39.1s	remaining: 8.46s
822:	learn: 0.0101691	total: 39.1s	remaining: 8.41s
823:	learn: 0.0101555	total: 39.2s	remaining: 8.37s
824:	learn: 0.0101410	total: 39.2s	remaining: 8.32s
825:	learn: 0.0101302	total: 39.3s	remaining: 8.27s
826:	learn: 0.010

969:	learn: 0.0083489	total: 46.2s	remaining: 1.43s
970:	learn: 0.0083402	total: 46.2s	remaining: 1.38s
971:	learn: 0.0083269	total: 46.3s	remaining: 1.33s
972:	learn: 0.0083179	total: 46.3s	remaining: 1.28s
973:	learn: 0.0083079	total: 46.4s	remaining: 1.24s
974:	learn: 0.0082939	total: 46.4s	remaining: 1.19s
975:	learn: 0.0082742	total: 46.5s	remaining: 1.14s
976:	learn: 0.0082664	total: 46.5s	remaining: 1.09s
977:	learn: 0.0082511	total: 46.6s	remaining: 1.05s
978:	learn: 0.0082390	total: 46.6s	remaining: 1s
979:	learn: 0.0082280	total: 46.7s	remaining: 952ms
980:	learn: 0.0082178	total: 46.7s	remaining: 905ms
981:	learn: 0.0082068	total: 46.8s	remaining: 857ms
982:	learn: 0.0081952	total: 46.8s	remaining: 810ms
983:	learn: 0.0081831	total: 46.9s	remaining: 762ms
984:	learn: 0.0081750	total: 46.9s	remaining: 714ms
985:	learn: 0.0081646	total: 47s	remaining: 667ms
986:	learn: 0.0081572	total: 47s	remaining: 619ms
987:	learn: 0.0081477	total: 47.1s	remaining: 571ms
988:	learn: 0.00813

In [113]:
evaluate(y_test, y_pred_cb)

Confusion Matrix:
[[22  0  0]
 [ 0 19  1]
 [ 2  3 13]]
Accuracy:  0.9
Precision:  0.9025613275613276
Recall  0.9
F1:  0.8960619392684611
