In [28]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, KFold
from sklearn.naive_bayes import MultinomialNB
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
from PlatinumGroup1BaseData import fullcleanse, textcleansing
import pickle

In [29]:
processed_data = pd.read_csv('/Users/januardopanggabean/Challenge Platinum Binar/data/processed_data.csv')
train_data, test_data = train_test_split(processed_data, test_size=0.2)
train_data, val_data = train_test_split(train_data, test_size=0.2)
merged_data = pd.concat((train_data, val_data), axis=0)

In [30]:
cv = CountVectorizer()
train_transformed = cv.fit_transform(train_data['clean_text'])
test_transformed = cv.transform(test_data['clean_text'])
val_transformed = cv.transform(val_data['clean_text'])

In [31]:
le = LabelEncoder()
train_label = le.fit_transform(train_data['labels'])
test_label = le.transform(test_data['labels'])
val_label = le.transform(val_data['labels'])

In [32]:
mnb = MultinomialNB()
mlp = MLPClassifier()
lr = LogisticRegression(max_iter=1000)
svc = SVC()

In [33]:
MLPC_model = mlp.fit(train_transformed, train_label)
LR_Model = lr.fit(train_transformed, train_label)
MNB_model = mnb.fit(train_transformed, train_label)
SVC_model = svc.fit(train_transformed, train_label)

In [34]:
pickle.dump(MLPC_model, open('model_mlpc.h5','wb'))

In [35]:
pickle.dump(cv,open('CountVectorizer.pkl','wb'))

In [36]:
pickle.dump(le,open('LabelEncoder.pkl', 'wb'))

In [37]:
y_predict_mlp = mlp.predict(val_transformed)
y_predict_lr = lr.predict(val_transformed)
y_predict_mnb = mnb.predict(val_transformed)
y_predict_svc = svc.predict(val_transformed)

In [38]:
print("############################################################################")

print('Accuracy for Logistic Regression')
print(classification_report(y_pred=y_predict_lr, y_true=val_label))

print("############################################################################")

print('Accuracy for Multi Layer Process')
print(classification_report(y_pred=y_predict_mlp, y_true=val_label))

print("############################################################################")

print('Accuracy for Naive Bayes Method')
print(classification_report(y_pred=y_predict_mnb, y_true=val_label))

print("############################################################################")

print('Accuracy for SVC')
print(classification_report(y_pred=y_predict_svc, y_true=val_label))

print("############################################################################")


############################################################################
Accuracy for Logistic Regression
              precision    recall  f1-score   support

           0       0.81      0.84      0.83       545
           1       0.82      0.79      0.81       189
           2       0.92      0.91      0.91      1026

    accuracy                           0.88      1760
   macro avg       0.85      0.85      0.85      1760
weighted avg       0.88      0.88      0.88      1760

############################################################################
Accuracy for Multi Layer Process
              precision    recall  f1-score   support

           0       0.79      0.80      0.80       545
           1       0.82      0.73      0.77       189
           2       0.89      0.91      0.90      1026

    accuracy                           0.86      1760
   macro avg       0.84      0.81      0.82      1760
weighted avg       0.85      0.86      0.85      1760

##################

In [39]:
k_fold = KFold(n_splits=5,shuffle=True,random_state=0)

In [40]:
for train_index, test_index in k_fold.split(train_data):
    
    train_data_fold = train_data.iloc[train_index]
    test_data_fold = train_data.iloc[test_index]
    
    train_kdata_transformed = cv.fit_transform(train_data_fold['clean_text'])
    test_kdata_transformed = cv.transform(test_data_fold['clean_text'])
    
    train_klabel = le.fit_transform(train_data_fold['labels'])
    test_klabel = le.transform(test_data_fold['labels'])
        
    mlp.fit(train_kdata_transformed, train_klabel)
    y_kpred = mlp.predict(test_kdata_transformed)
    accuracy = accuracy_score(y_pred=y_kpred, y_true=test_klabel)
    print("#######################################################")
    print(" ")
    print(classification_report(y_pred = y_kpred, y_true=test_klabel))

    print("Nilai akurasi model adalah: ")
    print(accuracy)
    print(" ")
    print("#######################################################")

#######################################################
 
              precision    recall  f1-score   support

           0       0.76      0.80      0.78       460
           1       0.78      0.59      0.67       165
           2       0.87      0.89      0.88       783

    accuracy                           0.83      1408
   macro avg       0.80      0.76      0.78      1408
weighted avg       0.82      0.83      0.82      1408

Nilai akurasi model adalah: 
0.8252840909090909
 
#######################################################
#######################################################
 
              precision    recall  f1-score   support

           0       0.75      0.78      0.77       414
           1       0.80      0.59      0.68       145
           2       0.88      0.90      0.89       849

    accuracy                           0.83      1408
   macro avg       0.81      0.76      0.78      1408
weighted avg       0.83      0.83      0.83      1408

Nilai akurasi mo

In [41]:
original_text = "saya pergi ke kantor untuk bekerja"

text = cv.transform([textcleansing(original_text)])

result = MLPC_model.predict(text)[0]
decoded_result = le.inverse_transform([result])
print("Sentiment Encoded:")
print(result)

print("Sentiment Decoded:")
print(decoded_result)


print('Safe Progress')


print('Progress aman sampai disini')


Sentiment Encoded:
1
Sentiment Decoded:
['neutral']
Safe Progress
Progress aman sampai disini
