In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, cross_val_predict, KFold, StratifiedKFold
from sklearn.naive_bayes import BernoulliNB, MultinomialNB
from sklearn.metrics import confusion_matrix
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score, recall_score, f1_score, classification_report, confusion_matrix, make_scorer, precision_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
from numpy import mean

In [2]:
#Input Dataset
dataset = pd.read_csv('hasilpreproces_imba.csv', delimiter = ';', quoting = 3)
corpus = dataset['text'].tolist()

In [3]:
#TF IDF
vectorizer = TfidfVectorizer()
x = vectorizer.fit_transform(corpus).toarray()
y = dataset.iloc[:, 1].values

In [4]:
model = MultinomialNB()
fold = StratifiedKFold(10,shuffle=False)
for train_index, test_index in fold.split(x, y):
    X_train, X_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred,average='weighted',zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted',zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted',zero_division=0)
    print(cm)
    print('akurasi = ',accuracy)
    print('precision = ',precision)
    print('recall = ',recall)
    print('f1 = ',f1)
    print('========================')

[[  0   0  47]
 [  0   0  50]
 [  0   0 370]]
akurasi =  0.7922912205567452
precision =  0.627725378171297
recall =  0.7922912205567452
f1 =  0.7004725247455095
[[  0   0  47]
 [  0   0  50]
 [  0   0 370]]
akurasi =  0.7922912205567452
precision =  0.627725378171297
recall =  0.7922912205567452
f1 =  0.7004725247455095
[[  0   0  47]
 [  0   0  50]
 [  0   0 370]]
akurasi =  0.7922912205567452
precision =  0.627725378171297
recall =  0.7922912205567452
f1 =  0.7004725247455095
[[  0   0  47]
 [  0   2  48]
 [  0   0 370]]
akurasi =  0.7965738758029979
precision =  0.7374916534272754
recall =  0.7965738758029979
f1 =  0.7103861787214568
[[  0   0  47]
 [  0   0  50]
 [  0   0 370]]
akurasi =  0.7922912205567452
precision =  0.627725378171297
recall =  0.7922912205567452
f1 =  0.7004725247455095
[[  0   0  47]
 [  0   0  50]
 [  0   0 370]]
akurasi =  0.7922912205567452
precision =  0.627725378171297
recall =  0.7922912205567452
f1 =  0.7004725247455095
[[  0   0  47]
 [  0   0  49]
 [ 

In [5]:
model_boost =  AdaBoostClassifier(base_estimator = MultinomialNB())
fold = StratifiedKFold(10,shuffle=False)
for train_index, test_index in fold.split(x, y):
    X_train, X_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    model_boost.fit(X_train, y_train)
    y_pred = model_boost.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred,average='weighted',zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted',zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted',zero_division=0)
    print(cm)
    print('akurasi = ',accuracy)
    print('precision = ',precision)
    print('recall = ',recall)
    print('f1 = ',f1)
    print('========================')

[[  0   0  47]
 [  0   0  50]
 [  0   0 370]]
akurasi =  0.7922912205567452
precision =  0.627725378171297
recall =  0.7922912205567452
f1 =  0.7004725247455095
[[  0   0  47]
 [  0   0  50]
 [  0   0 370]]
akurasi =  0.7922912205567452
precision =  0.627725378171297
recall =  0.7922912205567452
f1 =  0.7004725247455095
[[  0   0  47]
 [  0   0  50]
 [  0   0 370]]
akurasi =  0.7922912205567452
precision =  0.627725378171297
recall =  0.7922912205567452
f1 =  0.7004725247455095
[[  0   0  47]
 [  0   0  50]
 [  0   0 370]]
akurasi =  0.7922912205567452
precision =  0.627725378171297
recall =  0.7922912205567452
f1 =  0.7004725247455095
[[  0   0  47]
 [  0   0  50]
 [  0   0 370]]
akurasi =  0.7922912205567452
precision =  0.627725378171297
recall =  0.7922912205567452
f1 =  0.7004725247455095
[[  0   0  47]
 [  0   0  50]
 [  0   0 370]]
akurasi =  0.7922912205567452
precision =  0.627725378171297
recall =  0.7922912205567452
f1 =  0.7004725247455095
[[  0   0  47]
 [  0   0  49]
 [  

In [28]:
#Cross Val NB
classifierNB = MultinomialNB()
cv_nb = cross_val_score(classifierNB, x, y, cv = 10)
print(cv_nb)
print("Rata - Rata Akurasi : ",mean(cv_nb))

[0.70152091 0.70342205 0.70532319 0.70532319 0.70912548 0.70722433
 0.70912548 0.71102662 0.70722433 0.71047619]
Rata - Rata Akurasi :  0.7069791779829803


In [19]:
#Cross Val Adaboost NB 
classifierBoost = AdaBoostClassifier(base_estimator = classifierNB, algorithm="SAMME")
cv_ada = cross_val_score(classifierBoost, x, y, cv = 10)
print(cv_ada)
print("Rata - Rata Akurasi : ",mean(cv_ada))

[0.70342205 0.70342205 0.70342205 0.70342205 0.70342205 0.70532319
 0.70342205 0.70342205 0.67870722 0.7047619 ]
Rata - Rata Akurasi :  0.701274669563643


In [13]:
#Split
x_train,x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1)

In [14]:
#Test Klasifikasi NB
classifierNB.fit(x_train, y_train)
preds = classifierNB.predict(x)

In [15]:
#Nilai Test Confusion Matrix NB
score_nb = classification_report(y, preds, zero_division=0)
print('Score Confusion Matrix Data Test Naive Bayes :\n\n', score_nb)
print('Accuracy NB : ',accuracy_score(y, preds))

Score Confusion Matrix Data Test Naive Bayes :

               precision    recall  f1-score   support

          -1       1.00      0.14      0.25      1062
           0       1.00      0.03      0.06       499
           1       0.73      1.00      0.84      3698

    accuracy                           0.73      5259
   macro avg       0.91      0.39      0.38      5259
weighted avg       0.81      0.73      0.65      5259

Accuracy NB :  0.7347404449515117


In [16]:
#Klasifikasi Test Adaboost NB
classifierBoost.fit(x_train, y_train)
pred_boost = classifierBoost.predict(x)

In [17]:
#Nilai Test Confusion Matrix Adaboost NB
score_ad = classification_report(y,pred_boost, zero_division=0)
print('Score Confusion Matrix Data Test Adaboost Naive Bayes :\n\n', score_ad)
print('Accuracy Adaboost NB : ',accuracy_score(y, pred_boost))

Score Confusion Matrix Data Test Adaboost Naive Bayes :

               precision    recall  f1-score   support

          -1       0.85      0.12      0.20      1062
           0       0.00      0.00      0.00       499
           1       0.72      0.99      0.83      3698

    accuracy                           0.72      5259
   macro avg       0.52      0.37      0.35      5259
weighted avg       0.68      0.72      0.63      5259

Accuracy Adaboost NB :  0.7223806807377828
