In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
dataset = pd.read_csv("youtoxic_english_1000.csv")

### Preprocessing Dataset


In [3]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

In [4]:
# dataset.replace({False: 0, True: 1}, inplace=True)
# dataset = dataset[dataset['IsToxic']== 1]
# dataset = dataset.reset_index(drop=True)
# dataset.info()


In [5]:
corpus = []
for i in range(0, 1000):
    review = re.sub('[^a-zA-Z]', ' ', dataset['Text'][i])
    review = review.lower()
    review = review.split()
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)


### Vectorization

In [6]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 1500)
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, 1].values

### Training and Classification


In [7]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)


### Multinomial NB



In [8]:
# Multinomial NB

# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import MultinomialNB
classifier = MultinomialNB(alpha=0.1)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print ("Confusion Matrix:\n",cm)

# Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
score1 = accuracy_score(y_test,y_pred)
score2 = precision_score(y_test,y_pred,average='micro')
score3= recall_score(y_test,y_pred,average='micro')
print("\n")
print("Accuracy is ",round(score1*100,2),"%")
print("Precision is ",round(score2,2))
print("Recall is ",round(score3,2))


Confusion Matrix:
 [[ 0  2  0  0  0  0  0  0  0  0  1  0  0]
 [ 0 18  2  0  3 10  3  5  2  2  3  0  1]
 [ 0  1  2  0  1  2  0  0  0  0  0  0  2]
 [ 0  2  0  0  0  0  0  2  0  0  1  0  0]
 [ 0  1  0  0  1  3  0  2  0  1  3  0  1]
 [ 2  9  3  1  1 50  0  8  1  2  2  3  4]
 [ 1  3  0  0  1  4  0  0  0  0  0  0  0]
 [ 0  9  2  0  1  2  0 12  1  2  3  1  3]
 [ 0  0  1  0  1  2  0  0  0  0  0  0  0]
 [ 0  1  0  0  0  4  0  1  0  4  1  0  1]
 [ 0  5  1  0  0  5  4  4  0  0 23  1  0]
 [ 0  1  0  0  0  2  0  0  0  0  0  1  0]
 [ 0  3  1  0  1  7  0  2  0  0  0  1 14]]


Accuracy is  41.67 %
Precision is  0.42
Recall is  0.42


### Bernoulli NB

In [9]:
# Bernoulli NB

# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import BernoulliNB
classifier = BernoulliNB(alpha=0.8)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print ("Confusion Matrix:\n",cm)

# Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
score1 = accuracy_score(y_test,y_pred)
score2 = precision_score(y_test,y_pred,average='micro')
score3= recall_score(y_test,y_pred,average='micro')
print("\n")
print("Accuracy is ",round(score1*100,2),"%")
print("Precision is ",round(score2,2))
print("Recall is ",round(score3,2))

Confusion Matrix:
 [[ 0  0  0  0  0  0  0  0  0  0  3  0  0]
 [ 0 13  1  0  0  9  0  4  0  1 21  0  0]
 [ 0  3  0  0  0  1  0  0  0  0  2  1  1]
 [ 0  1  0  0  0  0  0  0  0  0  4  0  0]
 [ 0  0  0  0  0  3  0  0  0  0  9  0  0]
 [ 0  5  2  0  0 55  0  2  0  0 18  0  4]
 [ 0  1  0  0  0  3  0  0  0  0  5  0  0]
 [ 0  4  0  0  0  4  0  5  0  0 23  0  0]
 [ 0  0  1  0  0  0  0  0  0  0  3  0  0]
 [ 0  0  0  0  0  2  0  2  0  0  8  0  0]
 [ 0  1  0  0  0  2  0  0  0  0 40  0  0]
 [ 0  0  0  0  0  1  0  0  0  0  3  0  0]
 [ 0  0  0  0  0  1  0  1  0  0 15  0 12]]


Accuracy is  41.67 %
Precision is  0.42
Recall is  0.42


### Logistic Regression



In [10]:
# Logistic Regression

# Fitting Logistic Regression to the Training set
from sklearn import linear_model
classifier = linear_model.LogisticRegression(C=1.5)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print ("Confusion Matrix:\n",cm)

# Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
score1 = accuracy_score(y_test,y_pred)
score2 = precision_score(y_test,y_pred,average='micro')
score3= recall_score(y_test,y_pred,average='micro')
print("\n")
print("Accuracy is ",round(score1*100,2),"%")
print("Precision is ",round(score2,2))
print("Recall is ",round(score3,2))

Confusion Matrix:
 [[ 0  1  0  0  0  1  0  0  0  0  1  0  0]
 [ 0 22  2  0  0 10  1  2  0  0 11  0  1]
 [ 0  1  1  0  1  2  0  1  0  0  2  0  0]
 [ 1  1  0  0  0  0  0  1  0  0  2  0  0]
 [ 0  2  0  0  0  4  0  0  0  0  5  0  1]
 [ 0 12  1  0  0 55  1  6  0  0  8  0  3]
 [ 0  2  0  0  0  3  0  2  0  0  2  0  0]
 [ 1  8  0  0  1 10  0  6  0  0  7  0  3]
 [ 0  0  0  0  0  3  0  0  0  0  1  0  0]
 [ 0  2  0  0  0  5  0  1  0  1  3  0  0]
 [ 0  5  0  0  0  6  0  2  0  0 30  0  0]
 [ 0  1  0  0  0  2  0  0  0  0  1  0  0]
 [ 0  1  0  0  0  4  0  0  0  0  3  0 21]]


Accuracy is  45.33 %
Precision is  0.45
Recall is  0.45


### KNeighborsClassifier KNN算法

In [13]:
from sklearn.neighbors import KNeighborsClassifier #KNN算法
classifier = KNeighborsClassifier()
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print ("Confusion Matrix:\n",cm)

# Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
score1 = accuracy_score(y_test,y_pred)
score2 = precision_score(y_test,y_pred,average='micro')
score3= recall_score(y_test,y_pred,average='micro')
print("\n")
print("Accuracy is ",round(score1*100,2),"%")
print("Precision is ",round(score2,2))
print("Recall is ",round(score3,2))

Confusion Matrix:
 [[ 0  0  0  0  0  0  0  0  0  3  0  0  0]
 [ 6 11  1  0  4  2  0  5  0  8  5  0  7]
 [ 0  1  0  0  1  1  0  0  0  4  0  0  1]
 [ 2  0  0  0  0  0  0  1  0  1  1  0  0]
 [ 2  2  0  0  1  0  0  1  0  4  1  0  1]
 [10  7  2  0  8 20  1  4  0 16 13  0  5]
 [ 2  0  0  0  1  0  0  0  0  3  2  0  1]
 [ 3  5  0  1  5  1  1  3  0 10  7  0  0]
 [ 1  0  0  0  0  1  0  0  0  1  1  0  0]
 [ 1  1  0  0  0  1  0  0  0  6  2  0  1]
 [ 4  0  0  0  5  0  0  2  0 19 13  0  0]
 [ 2  0  0  0  0  0  0  0  0  2  0  0  0]
 [ 3  3  0  0  1  0  0  4  0  8  2  0  8]]


Accuracy is  20.67 %
Precision is  0.21
Recall is  0.21


### DecisionTreeClassifier 决策树分类器

In [14]:
from sklearn.tree import DecisionTreeClassifier #决策树分类器

classifier = DecisionTreeClassifier(random_state=0)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print ("Confusion Matrix:\n",cm)

# Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
score1 = accuracy_score(y_test,y_pred)
score2 = precision_score(y_test,y_pred,average='micro')
score3= recall_score(y_test,y_pred,average='micro')
print("\n")
print("Accuracy is ",round(score1*100,2),"%")
print("Precision is ",round(score2,2))
print("Recall is ",round(score3,2))

Confusion Matrix:
 [[ 0  0  0  0  0  2  0  0  0  0  1  0  0]
 [ 0 17  1  0  0 10  2  4  0  5  7  0  3]
 [ 0  1  0  0  3  1  0  1  0  2  0  0  0]
 [ 0  1  0  0  0  0  2  1  0  0  1  0  0]
 [ 0  1  0  0  4  2  0  0  0  3  2  0  0]
 [ 0 10  3  0  1 40  2 10  0 10  6  1  3]
 [ 0  1  0  0  0  3  0  1  0  3  1  0  0]
 [ 0  9  0  0  1 11  1  3  0  5  5  0  1]
 [ 0  0  0  0  0  1  0  0  1  1  1  0  0]
 [ 0  0  0  0  0  3  0  1  0  3  2  0  3]
 [ 0  2  0  0  0  2  3  1  0  7 27  0  1]
 [ 0  1  0  0  0  0  0  0  0  2  1  0  0]
 [ 0  1  0  0  0  3  1  4  0  3  2  1 14]]


Accuracy is  36.33 %
Precision is  0.36
Recall is  0.36


### SVC 支持向量机

In [15]:
from sklearn.svm import SVC,LinearSVC #支持向量机

classifier = SVC()
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print ("Confusion Matrix:\n",cm)

# Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
score1 = accuracy_score(y_test,y_pred)
score2 = precision_score(y_test,y_pred,average='micro')
score3= recall_score(y_test,y_pred,average='micro')
print("\n")
print("Accuracy is ",round(score1*100,2),"%")
print("Precision is ",round(score2,2))
print("Recall is ",round(score3,2))

Confusion Matrix:
 [[ 0  0  0  0  0  2  0  0  0  0  1  0  0]
 [ 0 10  0  0  0 31  0  0  0  0  7  0  1]
 [ 0  0  0  0  0  8  0  0  0  0  0  0  0]
 [ 0  2  0  0  0  1  0  0  0  0  2  0  0]
 [ 0  1  0  0  0  8  0  0  0  0  3  0  0]
 [ 0  2  0  0  0 75  0  0  0  0  9  0  0]
 [ 0  0  0  0  0  8  0  0  0  0  1  0  0]
 [ 0  4  0  0  0 27  0  0  0  0  5  0  0]
 [ 0  0  0  0  0  4  0  0  0  0  0  0  0]
 [ 0  1  0  0  0  9  0  0  0  0  2  0  0]
 [ 0  2  0  0  0 13  0  0  0  0 28  0  0]
 [ 0  1  0  0  0  3  0  0  0  0  0  0  0]
 [ 0  0  0  0  0 16  0  0  0  0  3  0 10]]


Accuracy is  41.0 %
Precision is  0.41
Recall is  0.41


### GaussianNB 朴素贝叶斯

In [18]:
from sklearn.naive_bayes import GaussianNB #朴素贝叶斯

classifier = GaussianNB()
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print ("Confusion Matrix:\n",cm)

# Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
score1 = accuracy_score(y_test,y_pred)
score2 = precision_score(y_test,y_pred,average='micro')
score3= recall_score(y_test,y_pred,average='micro')
print("\n")
print("Accuracy is ",round(score1*100,2),"%")
print("Precision is ",round(score2,2))
print("Recall is ",round(score3,2))

Confusion Matrix:
 [[ 0  2  0  0  0  0  0  0  0  0  1  0  0]
 [ 0 19  1  2  0 17  1  2  0  1  4  1  1]
 [ 0  2  1  0  0  3  0  0  0  0  0  0  2]
 [ 0  1  0  0  0  1  0  2  0  1  0  0  0]
 [ 1  2  1  0  1  1  0  2  0  2  1  0  1]
 [ 0 16  1  3  1 54  0  4  0  3  1  0  3]
 [ 1  3  0  1  0  2  0  1  0  0  0  0  1]
 [ 0 10  1  2  0  9  0  8  0  2  3  0  1]
 [ 0  0  1  0  0  2  0  0  0  0  1  0  0]
 [ 0  3  0  0  0  4  0  1  0  3  0  0  1]
 [ 0  6  0  1  1 12  1  4  0  5 13  0  0]
 [ 0  2  0  0  0  1  0  0  1  0  0  0  0]
 [ 0  6  1  0  1  9  1  0  0  0  0  0 11]]


Accuracy is  36.67 %
Precision is  0.37
Recall is  0.37


### CatBoostClassifier

In [21]:
from catboost import CatBoostClassifier

classifier = CatBoostClassifier()
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print ("Confusion Matrix:\n",cm)

# Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
score1 = accuracy_score(y_test,y_pred)
score2 = precision_score(y_test,y_pred,average='micro')
score3= recall_score(y_test,y_pred,average='micro')
print("\n")
print("Accuracy is ",round(score1*100,2),"%")
print("Precision is ",round(score2,2))
print("Recall is ",round(score3,2))

Learning rate set to 0.0777
0:	learn: 2.4816223	total: 48.3ms	remaining: 48.3s
1:	learn: 2.4210570	total: 83.3ms	remaining: 41.6s
2:	learn: 2.3720533	total: 115ms	remaining: 38.3s
3:	learn: 2.3320723	total: 150ms	remaining: 37.5s
4:	learn: 2.2980187	total: 185ms	remaining: 36.7s
5:	learn: 2.2686063	total: 222ms	remaining: 36.8s
6:	learn: 2.2337770	total: 257ms	remaining: 36.5s
7:	learn: 2.1993571	total: 291ms	remaining: 36.1s
8:	learn: 2.1779045	total: 322ms	remaining: 35.5s
9:	learn: 2.1554979	total: 351ms	remaining: 34.7s
10:	learn: 2.1337870	total: 378ms	remaining: 34s
11:	learn: 2.1140719	total: 404ms	remaining: 33.2s
12:	learn: 2.0986318	total: 432ms	remaining: 32.8s
13:	learn: 2.0814183	total: 467ms	remaining: 32.9s
14:	learn: 2.0696986	total: 508ms	remaining: 33.4s
15:	learn: 2.0579921	total: 544ms	remaining: 33.5s
16:	learn: 2.0472001	total: 579ms	remaining: 33.5s
17:	learn: 2.0364725	total: 615ms	remaining: 33.6s
18:	learn: 2.0249983	total: 655ms	remaining: 33.8s
19:	learn: 2.

162:	learn: 1.5507097	total: 5.87s	remaining: 30.1s
163:	learn: 1.5495272	total: 5.9s	remaining: 30.1s
164:	learn: 1.5466298	total: 5.93s	remaining: 30s
165:	learn: 1.5445005	total: 5.96s	remaining: 29.9s
166:	learn: 1.5436620	total: 5.98s	remaining: 29.9s
167:	learn: 1.5408540	total: 6.02s	remaining: 29.8s
168:	learn: 1.5378793	total: 6.05s	remaining: 29.7s
169:	learn: 1.5341727	total: 6.08s	remaining: 29.7s
170:	learn: 1.5328388	total: 6.11s	remaining: 29.6s
171:	learn: 1.5303080	total: 6.14s	remaining: 29.5s
172:	learn: 1.5294582	total: 6.16s	remaining: 29.5s
173:	learn: 1.5274763	total: 6.19s	remaining: 29.4s
174:	learn: 1.5255167	total: 6.22s	remaining: 29.3s
175:	learn: 1.5250514	total: 6.25s	remaining: 29.3s
176:	learn: 1.5232863	total: 6.28s	remaining: 29.2s
177:	learn: 1.5212069	total: 6.31s	remaining: 29.1s
178:	learn: 1.5197342	total: 6.34s	remaining: 29.1s
179:	learn: 1.5141815	total: 6.37s	remaining: 29s
180:	learn: 1.5131113	total: 6.39s	remaining: 28.9s
181:	learn: 1.511

326:	learn: 1.3256088	total: 11.7s	remaining: 24.1s
327:	learn: 1.3246675	total: 11.7s	remaining: 24.1s
328:	learn: 1.3241307	total: 11.8s	remaining: 24s
329:	learn: 1.3230581	total: 11.8s	remaining: 24s
330:	learn: 1.3224578	total: 11.9s	remaining: 24s
331:	learn: 1.3220248	total: 11.9s	remaining: 24s
332:	learn: 1.3210202	total: 12s	remaining: 24s
333:	learn: 1.3204874	total: 12s	remaining: 24s
334:	learn: 1.3183279	total: 12.1s	remaining: 24s
335:	learn: 1.3179595	total: 12.1s	remaining: 23.9s
336:	learn: 1.3175486	total: 12.1s	remaining: 23.9s
337:	learn: 1.3171861	total: 12.2s	remaining: 23.9s
338:	learn: 1.3152452	total: 12.2s	remaining: 23.8s
339:	learn: 1.3148748	total: 12.3s	remaining: 23.8s
340:	learn: 1.3141368	total: 12.3s	remaining: 23.8s
341:	learn: 1.3136914	total: 12.4s	remaining: 23.8s
342:	learn: 1.3119217	total: 12.4s	remaining: 23.8s
343:	learn: 1.3110413	total: 12.5s	remaining: 23.7s
344:	learn: 1.3092471	total: 12.5s	remaining: 23.7s
345:	learn: 1.3067186	total: 1

489:	learn: 1.2030156	total: 17.2s	remaining: 17.9s
490:	learn: 1.2023417	total: 17.2s	remaining: 17.9s
491:	learn: 1.2014431	total: 17.3s	remaining: 17.8s
492:	learn: 1.2011451	total: 17.3s	remaining: 17.8s
493:	learn: 1.2008136	total: 17.3s	remaining: 17.8s
494:	learn: 1.2001611	total: 17.4s	remaining: 17.7s
495:	learn: 1.1994716	total: 17.4s	remaining: 17.7s
496:	learn: 1.1986920	total: 17.4s	remaining: 17.6s
497:	learn: 1.1984229	total: 17.5s	remaining: 17.6s
498:	learn: 1.1978024	total: 17.5s	remaining: 17.6s
499:	learn: 1.1968434	total: 17.5s	remaining: 17.5s
500:	learn: 1.1956952	total: 17.6s	remaining: 17.5s
501:	learn: 1.1952615	total: 17.6s	remaining: 17.5s
502:	learn: 1.1942665	total: 17.7s	remaining: 17.5s
503:	learn: 1.1937533	total: 17.7s	remaining: 17.4s
504:	learn: 1.1932929	total: 17.9s	remaining: 17.5s
505:	learn: 1.1928491	total: 17.9s	remaining: 17.5s
506:	learn: 1.1923269	total: 18s	remaining: 17.5s
507:	learn: 1.1918083	total: 18s	remaining: 17.5s
508:	learn: 1.18

649:	learn: 1.1076474	total: 23.6s	remaining: 12.7s
650:	learn: 1.1070524	total: 23.6s	remaining: 12.7s
651:	learn: 1.1067481	total: 23.6s	remaining: 12.6s
652:	learn: 1.1061407	total: 23.7s	remaining: 12.6s
653:	learn: 1.1058984	total: 23.7s	remaining: 12.5s
654:	learn: 1.1057124	total: 23.7s	remaining: 12.5s
655:	learn: 1.1050763	total: 23.7s	remaining: 12.4s
656:	learn: 1.1046484	total: 23.8s	remaining: 12.4s
657:	learn: 1.1044558	total: 23.8s	remaining: 12.4s
658:	learn: 1.1040286	total: 23.8s	remaining: 12.3s
659:	learn: 1.1028966	total: 23.9s	remaining: 12.3s
660:	learn: 1.1026302	total: 23.9s	remaining: 12.2s
661:	learn: 1.1022670	total: 23.9s	remaining: 12.2s
662:	learn: 1.1020394	total: 23.9s	remaining: 12.2s
663:	learn: 1.1017873	total: 24s	remaining: 12.1s
664:	learn: 1.1012528	total: 24s	remaining: 12.1s
665:	learn: 1.1008291	total: 24s	remaining: 12s
666:	learn: 1.1002528	total: 24.1s	remaining: 12s
667:	learn: 1.0999989	total: 24.1s	remaining: 12s
668:	learn: 1.0996642	to

810:	learn: 1.0375031	total: 28.9s	remaining: 6.75s
811:	learn: 1.0371668	total: 29s	remaining: 6.71s
812:	learn: 1.0367962	total: 29s	remaining: 6.67s
813:	learn: 1.0357067	total: 29s	remaining: 6.63s
814:	learn: 1.0354507	total: 29.1s	remaining: 6.6s
815:	learn: 1.0351035	total: 29.1s	remaining: 6.56s
816:	learn: 1.0347281	total: 29.1s	remaining: 6.52s
817:	learn: 1.0345056	total: 29.2s	remaining: 6.49s
818:	learn: 1.0336896	total: 29.2s	remaining: 6.45s
819:	learn: 1.0331019	total: 29.2s	remaining: 6.41s
820:	learn: 1.0325834	total: 29.2s	remaining: 6.37s
821:	learn: 1.0317164	total: 29.3s	remaining: 6.34s
822:	learn: 1.0309017	total: 29.3s	remaining: 6.3s
823:	learn: 1.0305001	total: 29.3s	remaining: 6.26s
824:	learn: 1.0302139	total: 29.3s	remaining: 6.22s
825:	learn: 1.0298760	total: 29.4s	remaining: 6.19s
826:	learn: 1.0295434	total: 29.4s	remaining: 6.15s
827:	learn: 1.0292236	total: 29.4s	remaining: 6.11s
828:	learn: 1.0284787	total: 29.5s	remaining: 6.08s
829:	learn: 1.028285

972:	learn: 0.9741322	total: 34.3s	remaining: 951ms
973:	learn: 0.9738511	total: 34.3s	remaining: 915ms
974:	learn: 0.9735873	total: 34.3s	remaining: 880ms
975:	learn: 0.9733528	total: 34.4s	remaining: 845ms
976:	learn: 0.9730672	total: 34.4s	remaining: 810ms
977:	learn: 0.9727840	total: 34.4s	remaining: 775ms
978:	learn: 0.9723274	total: 34.5s	remaining: 740ms
979:	learn: 0.9716078	total: 34.5s	remaining: 705ms
980:	learn: 0.9714294	total: 34.6s	remaining: 670ms
981:	learn: 0.9712572	total: 34.6s	remaining: 634ms
982:	learn: 0.9709982	total: 34.6s	remaining: 599ms
983:	learn: 0.9706620	total: 34.7s	remaining: 564ms
984:	learn: 0.9694069	total: 34.7s	remaining: 529ms
985:	learn: 0.9691877	total: 34.8s	remaining: 494ms
986:	learn: 0.9689225	total: 34.8s	remaining: 459ms
987:	learn: 0.9686875	total: 34.9s	remaining: 424ms
988:	learn: 0.9684634	total: 35s	remaining: 389ms
989:	learn: 0.9682794	total: 35s	remaining: 354ms
990:	learn: 0.9680665	total: 35s	remaining: 318ms
991:	learn: 0.9677

### GradientBoostingClassifier

In [23]:
from sklearn.ensemble import GradientBoostingClassifier

classifier = GradientBoostingClassifier()
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print ("Confusion Matrix:\n",cm)

# Accuracy, Precision and Recall
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
score1 = accuracy_score(y_test,y_pred)
score2 = precision_score(y_test,y_pred,average='micro')
score3= recall_score(y_test,y_pred,average='micro')
print("\n")
print("Accuracy is ",round(score1*100,2),"%")
print("Precision is ",round(score2,2))
print("Recall is ",round(score3,2))

Confusion Matrix:
 [[ 0  1  0  0  0  2  0  0  0  0  0  0  0]
 [ 1 18  2  0  1 20  2  1  0  1  2  0  1]
 [ 0  0  1  0  0  4  0  2  0  0  0  0  1]
 [ 0  1  0  0  0  1  0  1  0  0  2  0  0]
 [ 0  1  0  0  0  7  0  0  0  0  3  1  0]
 [ 0  2  2  2  0 69  1  3  2  2  1  1  1]
 [ 0  2  0  0  1  5  0  1  0  0  0  0  0]
 [ 0  2  0  2  0 20  0  4  0  2  4  0  2]
 [ 0  0  0  0  0  4  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  7  0  0  0  3  0  0  2]
 [ 0  1  0  0  1 19  0  2  0  1 18  0  1]
 [ 0  1  0  0  0  3  0  0  0  0  0  0  0]
 [ 0  0  0  1  1 10  0  2  0  0  1  2 12]]


Accuracy is  41.67 %
Precision is  0.42
Recall is  0.42
