In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score

In [10]:
msglbl_data = pd.read_csv('C:/Users/Asus/Downloads/Statements_data.csv', names=['Message', 'Label'])
print("Initial dataset shape: ", msglbl_data.shape)
msglbl_data = msglbl_data.dropna(subset=['Label'])
print("Dataset shape after removing NaN labels: ", msglbl_data.shape)
msglbl_data['labelnum'] = msglbl_data.Label.map({'pos': 1, 'neg': 0})
msglbl_data = msglbl_data.dropna(subset=['labelnum'])
print("Dataset shape after removing NaN labelnum: ", msglbl_data.shape)
if msglbl_data.shape[0] == 0:
    print("The dataset is empty after cleaning. Please check the data.")
else:
    X = msglbl_data["Message"]
    Y = msglbl_data.labelnum
    Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42)

Initial dataset shape:  (18, 2)
Dataset shape after removing NaN labels:  (18, 2)
Dataset shape after removing NaN labelnum:  (17, 3)


In [11]:
count_vect = CountVectorizer()
Xtrain_dims = count_vect.fit_transform(Xtrain)
Xtest_dims = count_vect.transform(Xtest)
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(Xtrain_dims, Ytrain)
prediction = clf.predict(Xtest_dims)
print('******** Accuracy Metrics *********')
print('Accuracy : ', accuracy_score(Ytest, prediction)) 
print('Recall : ', recall_score(Ytest, prediction)) 
print('Precision : ', precision_score(Ytest, prediction))
print('Confusion Matrix : \n', confusion_matrix(Ytest, prediction))
print(10 * "-")
test_stmt = [input("Enter any statement to predict: ")]
test_dims = count_vect.transform(test_stmt) 
pred = clf.predict(test_dims) 
for stmt, lbl in zip(test_stmt, pred):
   if lbl == 1:
      print(f"Statement: '{stmt}' is Positive")
   else:
      print(f"Statement: '{stmt}' is Negative")

******** Accuracy Metrics *********
Accuracy :  0.75
Recall :  0.6666666666666666
Precision :  1.0
Confusion Matrix : 
 [[1 0]
 [1 2]]
----------


Enter any statement to predict:  I feel very happy


Statement: 'I feel very happy' is Positive
