In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, cross_val_predict, KFold, StratifiedKFold
from sklearn.naive_bayes import BernoulliNB, MultinomialNB
from sklearn.metrics import confusion_matrix
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score, recall_score, f1_score, classification_report, confusion_matrix, make_scorer, precision_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
from numpy import mean

In [2]:
#Input Dataset
dataset = pd.read_csv('hasilpreproces_balance.csv', delimiter = ';', quoting = 3)
corpus = dataset['text'].tolist()
# corpus = ['suka tidak bisa sesuai tingkat bawa rasa',
#           'tidak usah alas guna tidak tahu sopan santun',
#           'suka sama sini tampar',
#          ]

In [3]:
#TF IDF
vectorizer = TfidfVectorizer()
x = vectorizer.fit_transform(corpus).toarray()
y = dataset.iloc[:, 1].values
x

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [4]:
#----------Uncomment model yang akan digunakan-----------------
# model =  AdaBoostClassifier(base_estimator = MultinomialNB())
model = MultinomialNB()
fold = StratifiedKFold(10,shuffle=False)
for train_index, test_index in fold.split(x, y):
    X_train, X_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred,average='macro',zero_division=0)
    recall = recall_score(y_test, y_pred, average='macro',zero_division=0)
    f1 = f1_score(y_test, y_pred, average='macro',zero_division=0)
    print(cm)
    print('akurasi = ',accuracy)
    print('precision = ',precision)
    print('recall = ',recall)
    print('f1 = ',f1)
    print('========================')

[[87  2 31]
 [35 35 50]
 [24  3 93]]
akurasi =  0.5972222222222222
precision =  0.6684577231931979
recall =  0.5972222222222222
f1 =  0.5747627998567849
[[ 86   1  33]
 [ 36  30  54]
 [ 15   3 102]]
akurasi =  0.6055555555555555
precision =  0.6832575690454608
recall =  0.6055555555555555
f1 =  0.5730217549189255
[[96  3 21]
 [24 81 15]
 [29  2 89]]
akurasi =  0.7388888888888889
precision =  0.7660519223765672
recall =  0.7388888888888889
f1 =  0.7422310086917793
[[ 93   2  25]
 [  8 110   2]
 [ 34   1  85]]
akurasi =  0.8
precision =  0.8070895959170296
recall =  0.7999999999999999
f1 =  0.8021254646597429
[[ 83   2  35]
 [  8 107   5]
 [ 13   2 105]]
akurasi =  0.8194444444444444
precision =  0.82872627269179
recall =  0.8194444444444445
f1 =  0.8199770617223447
[[ 97   2  21]
 [  5 111   4]
 [ 14   0 106]]
akurasi =  0.8722222222222222
precision =  0.8758893622836625
recall =  0.8722222222222222
f1 =  0.8731483706067077
[[ 90   2  28]
 [  5 115   0]
 [  7   1 112]]
akurasi =  0.8805

In [4]:
model_boost =  AdaBoostClassifier()
fold = StratifiedKFold(10,shuffle=False)
for train_index, test_index in fold.split(x, y):
    X_train, X_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model_boost.fit(X_train, y_train)
    y_pred = model_boost.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred,average='macro',zero_division=0)
    recall = recall_score(y_test, y_pred, average='macro',zero_division=0)
    f1 = f1_score(y_test, y_pred, average='macro',zero_division=0)
    print(cm)
    print('akurasi = ',accuracy)
    print('precision = ',precision)
    print('recall = ',recall)
    print('f1 = ',f1)
    print('========================')

[[63 19 38]
 [23 85 12]
 [42 20 58]]
akurasi =  0.5722222222222222
precision =  0.5715694693349264
recall =  0.5722222222222223
f1 =  0.5711859191430012
[[ 79   9  32]
 [  5 106   9]
 [ 50  17  53]]
akurasi =  0.6611111111111111
precision =  0.6521374430234386
recall =  0.6611111111111111
f1 =  0.6528813960560226
[[ 47   3  70]
 [  3 108   9]
 [ 31   6  83]]
akurasi =  0.6611111111111111
precision =  0.6718898385565053
recall =  0.6611111111111111
f1 =  0.655902192958361
[[ 61   3  56]
 [  1 119   0]
 [ 25  11  84]]
akurasi =  0.7333333333333333
precision =  0.7319620891308731
recall =  0.7333333333333334
f1 =  0.7254124297602558
[[ 69   5  46]
 [  1 119   0]
 [ 32   8  80]]
akurasi =  0.7444444444444445
precision =  0.7376354582236936
recall =  0.7444444444444445
f1 =  0.7388241900437023
[[ 46   5  69]
 [  1 119   0]
 [ 17   3 100]]
akurasi =  0.7361111111111112
precision =  0.749157950115703
recall =  0.7361111111111112
f1 =  0.7185347585092622
[[ 31   4  85]
 [  1 119   0]
 [  4   6

In [4]:
#Cross Val NBC
classifierNB = MultinomialNB()
cv_nb = cross_val_score(classifierNB, x, y, cv = 10)
print(cv_nb)
print("Rata - Rata Akurasi : ",mean(cv_nb))

[0.59722222 0.60555556 0.73888889 0.8        0.81944444 0.87222222
 0.88055556 0.79166667 0.78333333 0.69444444]
Rata - Rata Akurasi :  0.7583333333333334


In [43]:
#Cross Val Adaboost NBC
classifierBoost = AdaBoostClassifier(base_estimator = classifierNB, algorithm = "SAMME")
cv_ada = cross_val_score(classifierBoost, x, y, cv = 10)
print(cv_ada)
print("Rata - Rata Akurasi : ",mean(cv_ada))

[0.46666667 0.49722222 0.62222222 0.73333333 0.76388889 0.82777778
 0.83055556 0.81944444 0.78888889 0.61944444]
Rata - Rata Akurasi :  0.6969444444444444


In [5]:
scoring = {'accuracy' : make_scorer(accuracy_score), 
           'precision' : make_scorer(precision_score),
           'recall' : make_scorer(recall_score), 
           'f1_score' : make_scorer(f1_score)}

kfold = model_selection.KFold(n_splits=10)
results = model_selection.cross_val_score(estimator=classifierBoost,
                                          X=x,
                                          y=y,
                                          cv=kfold,
                                          scoring=scoring)

NameError: name 'model_selection' is not defined

In [29]:
#Split
x_train,x_test, y_train, y_test = train_test_split(x, y, train_size = 0.9)

In [30]:
#Klasifikasi Data NBC
classifierNB.fit(x_train, y_train)
data_pred = classifierNB.predict(x)

In [31]:
#Confusion Matrix Data NBC
cm_Dnb = confusion_matrix(y, data_pred)
print('Confusion Matrix Data Real Naive Bayes :\n\n', cm_Dnb)

Confusion Matrix Data Real Naive Bayes :

 [[1124   13   63]
 [  50 1106   44]
 [  55    1 1144]]


In [32]:
#Nilai Data Confusion Matrix NBC
score_Dnb = classification_report(y, data_pred, zero_division=0)
print('Score Confusion Matrix Data Real Naive Bayes :\n\n', score_Dnb)

Score Confusion Matrix Data Real Naive Bayes :

               precision    recall  f1-score   support

          -1       0.91      0.94      0.93      1200
           0       0.99      0.92      0.95      1200
           1       0.91      0.95      0.93      1200

    accuracy                           0.94      3600
   macro avg       0.94      0.94      0.94      3600
weighted avg       0.94      0.94      0.94      3600



In [33]:
#Klasifikasi Data Adaboost NBC
classifierBoost.fit(x_train, y_train)
Dpred_boost = classifierBoost.predict(x)

In [34]:
#Confusion Matrix Test Adaboost NBC
cm_Dad = confusion_matrix(y, Dpred_boost)
print('Confusion Matrix Data Real Adaboost Naive Bayes :\n\n', cm_Dad)

Confusion Matrix Data Real Adaboost Naive Bayes :

 [[1063   86   51]
 [ 387  802   11]
 [ 579    1  620]]


In [35]:
#Nilai Data Confusion Matrix Adaboost NBC
score_Dad =classification_report(y,Dpred_boost, zero_division=0)
print('Score Confusion Matrix Data Real Adaboost Naive Bayes :\n\n', score_Dad)

Score Confusion Matrix Data Real Adaboost Naive Bayes :

               precision    recall  f1-score   support

          -1       0.52      0.89      0.66      1200
           0       0.90      0.67      0.77      1200
           1       0.91      0.52      0.66      1200

    accuracy                           0.69      3600
   macro avg       0.78      0.69      0.70      3600
weighted avg       0.78      0.69      0.70      3600

