In [3]:
# Input:
path_train = '/content/emotion-labels-train.csv'
path_test = '/content/emotion-labels-test.csv'
path_x_train = '/content/X_train.csv'
path_y_train = '/content/y_train.csv'
path_x_val = '/content/X_val.csv'
path_y_val = '/content/y_val.csv'
path_model = '/content/text_classifier_lstm.hdf5'
path_tokenizer = '/content/text_emotion_tokenizer.pickle'
classes = ['anger','fear','joy','sadness']

In [4]:
import pickle
import pandas as pd

from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from modules.tf_modules import clean_text, predict_text_emotion

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [7]:
# Load data
X_train = pd.read_csv(path_x_train, index_col='Unnamed: 0')
y_train = pd.read_csv(path_y_train, index_col='Unnamed: 0')
y_train = y_train.idxmax(axis=1)
X_val = pd.read_csv(path_x_val, index_col='Unnamed: 0')
y_val = pd.read_csv(path_y_val, index_col='Unnamed: 0')
y_val = y_val.idxmax(axis=1)
test = pd.read_csv(path_test)

X_train.head(3)

Unnamed: 0,cleaned_text
2729,best revenge massive success
3359,im do piano blue play smooth talk ass nexttt
2614,time wound heelsnn drunkjesus rt lol wisdom qu...


In [9]:
# Load model and tokenizer
model = load_model(path_model)
tokenizer = pickle.load(open(path_tokenizer, 'rb'))

# Train

In [42]:
# Predict the dataset
pred_train = X_train['cleaned_text'].apply(predict_text_emotion, args=(tokenizer, model, classes))
X_train['pred'] = pred_train
X_train.head(3)

Unnamed: 0,cleaned_text,pred
2729,best revenge massive success,"(anger, [0.88, 0.15, 0.28, 0.44])"
3359,im do piano blue play smooth talk ass nexttt,"(sadness, [0.33, 0.47, 0.2, 0.85])"
2614,time wound heelsnn drunkjesus rt lol wisdom qu...,"(anger, [0.98, 0.03, 0.22, 0.21])"


In [46]:
pred_train = pd.DataFrame(X_train['pred'].tolist(), index=X_train.index, columns=['pred', 'prob'])
prob = pd.DataFrame(pred_train['prob'].to_list(), index=pred_train.index, columns=classes)
pred_train = pd.concat([pred_train['pred'], prob], axis=1)
print(pred_train.shape)
pred_train.head(3)

(2890, 5)


Unnamed: 0,pred,anger,fear,joy,sadness
2729,anger,0.88,0.15,0.28,0.44
3359,sadness,0.33,0.47,0.2,0.85
2614,anger,0.98,0.03,0.22,0.21


In [50]:
# Model evaluation
print('Confusion Matrix')
print(pd.DataFrame(confusion_matrix(y_train, pred_train['pred']), index=classes, columns=classes))
print('')
print('Classification Report')
print(classification_report(y_train, pred_train['pred']))

Confusion Matrix
         anger  fear  joy  sadness
anger      674     1    1       10
fear         4   893    0       20
joy          0     4  654        0
sadness      6     5    0      618

Classification Report
              precision    recall  f1-score   support

       anger       0.99      0.98      0.98       686
        fear       0.99      0.97      0.98       917
         joy       1.00      0.99      1.00       658
     sadness       0.95      0.98      0.97       629

    accuracy                           0.98      2890
   macro avg       0.98      0.98      0.98      2890
weighted avg       0.98      0.98      0.98      2890



In [51]:
# Save
pred_train.to_csv('nlp_pred_train.csv')

# Val

In [85]:
# Predict the dataset
X_val['pred'] = X_val['cleaned_text'].apply(predict_text_emotion, args=(tokenizer, model, classes))
X_val.head()

Unnamed: 0,cleaned_text,pred
715,breezy today cant go back school night bare le...,"(joy, [0.09, 0.82, 0.95, 0.28])"
284,watch amaze lively broadcast musically,"(joy, [0.03, 0.48, 1.0, 0.02])"
2266,everyone wigan biggest club bitter,"(anger, [0.93, 0.09, 0.24, 0.44])"
1241,understand concern look foundation contributio...,"(fear, [0.12, 0.95, 0.63, 0.47])"
3042,couldnt get either bits start droop shouldnt d...,"(fear, [0.06, 0.97, 0.26, 0.79])"


In [103]:
pred_val = pd.DataFrame(X_val['pred'].tolist(), index=X_val.index, columns=['pred', 'prob'])
prob = pd.DataFrame(pred_val['prob'].to_list(), index=pred_val.index, columns=classes)
pred_val = pd.concat([pred_val['pred'], prob], axis=1)
print(pred_val.shape)
pred_val.head(3)

(723, 5)


Unnamed: 0,pred,anger,fear,joy,sadness
715,joy,0.09,0.82,0.95,0.28
284,joy,0.03,0.48,1.0,0.02
2266,anger,0.93,0.09,0.24,0.44


In [104]:
# Model evaluation
print('Confusion Matrix')
print(pd.DataFrame(confusion_matrix(y_val, pred_val['pred']), index=classes, columns=classes))
print('')
print('Classification Report')
print(classification_report(y_val, pred_val['pred']))

Confusion Matrix
         anger  fear  joy  sadness
anger      143    12    3       13
fear        12   196    6       16
joy          4     8  151        2
sadness     11    19    8      119

Classification Report
              precision    recall  f1-score   support

       anger       0.84      0.84      0.84       171
        fear       0.83      0.85      0.84       230
         joy       0.90      0.92      0.91       165
     sadness       0.79      0.76      0.78       157

    accuracy                           0.84       723
   macro avg       0.84      0.84      0.84       723
weighted avg       0.84      0.84      0.84       723



In [110]:
# Save
pred_val.to_csv('nlp_pred_val.csv')

# Test

In [106]:
# Predict the dataset
test['pred'] = test['text'].apply(predict_text_emotion, args=(tokenizer, model, classes))
test.head(3)

Unnamed: 0,text,label,pred
0,You must be knowing #blithe means (adj.) Happ...,joy,"(joy, [0.18, 0.29, 0.99, 0.07])"
1,Old saying 'A #smile shared is one gained for ...,joy,"(joy, [0.28, 0.45, 0.91, 0.27])"
2,Bridget Jones' Baby was bloody hilarious 😅 #Br...,joy,"(joy, [0.39, 0.27, 0.93, 0.14])"


In [107]:
pred_test = pd.DataFrame(test['pred'].tolist(), index=test.index, columns=['pred', 'prob'])
prob = pd.DataFrame(pred_test['prob'].to_list(), index=pred_test.index, columns=classes)
pred_test = pd.concat([pred_test['pred'], prob], axis=1)
print(pred_test.shape)
pred_test.head(3)

(3142, 5)


Unnamed: 0,pred,anger,fear,joy,sadness
0,joy,0.18,0.29,0.99,0.07
1,joy,0.28,0.45,0.91,0.27
2,joy,0.39,0.27,0.93,0.14


In [108]:
# Model evaluation
print('Confusion Matrix')
print(pd.DataFrame(confusion_matrix(test['label'], pred_test['pred']), index=classes, columns=classes))
print('')
print('Classification Report')
print(classification_report(test['label'], pred_test['pred']))

Confusion Matrix
         anger  fear  joy  sadness
anger      573    62   55       70
fear        38   766   76      115
joy         14    59  608       33
sadness     51    70   33      519

Classification Report
              precision    recall  f1-score   support

       anger       0.85      0.75      0.80       760
        fear       0.80      0.77      0.78       995
         joy       0.79      0.85      0.82       714
     sadness       0.70      0.77      0.74       673

    accuracy                           0.78      3142
   macro avg       0.78      0.79      0.78      3142
weighted avg       0.79      0.78      0.79      3142



In [109]:
# Save
pred_test.to_csv('nlp_pred_test.csv')