In [8]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, f1_score, classification_report
import joblib

In [2]:
df = pd.read_csv('nlp_dataset.csv')
df.head(10)

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear
5,i feel jealous becasue i wanted that kind of l...,anger
6,when a friend of mine keeps telling me morbid ...,anger
7,i finally fell asleep feeling angry useless an...,anger
8,i feel a bit annoyed and antsy in a good way,anger
9,i feel like i ve regained another vital part o...,joy


In [3]:
stop_words = set(stopwords.words('english'))
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    tokens = text.split()
    tokens = [w for w in tokens if w not in stop_words]
    return " ".join(tokens)

In [5]:
df.columns

Index(['Comment', 'Emotion'], dtype='object')

In [6]:
df['cleaned'] = df['Comment'].apply(clean_text)

In [7]:
df

Unnamed: 0,Comment,Emotion,cleaned
0,i seriously hate one subject to death but now ...,fear,seriously hate one subject death feel reluctan...
1,im so full of life i feel appalled,anger,im full life feel appalled
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feelings think afraid acce...
3,ive been really angry with r and i feel like a...,joy,ive really angry r feel like idiot trusting fi...
4,i feel suspicious if there is no one outside l...,fear,feel suspicious one outside like rapture happe...
...,...,...,...
5932,i begun to feel distressed for you,fear,begun feel distressed
5933,i left feeling annoyed and angry thinking that...,anger,left feeling annoyed angry thinking center stu...
5934,i were to ever get married i d have everything...,joy,ever get married everything ready offer got to...
5935,i feel reluctant in applying there because i w...,fear,feel reluctant applying want able find company...


Implement feature extraction using CountVectorizer or TfidfVectorizer. Describe how the
chosen method transforms the text data into numerical features.

In [9]:
tfidf = TfidfVectorizer(max_features=5000)
x = tfidf.fit_transform(df['cleaned'])

Here the tfidf vectorizer will Converts text into TF-IDF scores (Term Frequency–Inverse Document Frequency) instead of raw counts.

In [11]:
le = LabelEncoder()
y = le.fit_transform(df['Emotion'])

In [12]:
X_train, X_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42
)

Train the following machine learning models

In [13]:
nb_model = MultinomialNB()
nb_model.fit(X_train , y_train)

0,1,2
,alpha,1.0
,force_alpha,True
,fit_prior,True
,class_prior,


In [16]:
svm_model = LinearSVC()
svm_model.fit(X_train,y_train)

0,1,2
,penalty,'l2'
,loss,'squared_hinge'
,dual,'auto'
,tol,0.0001
,C,1.0
,multi_class,'ovr'
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,verbose,0


In [20]:
print('===== NAVIS BAYIS ====== ')
print('-------------------------------------')
y_pred_nb = nb_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred_nb))
print("F1-score:", f1_score(y_test, y_pred_nb, average='weighted'))
print(classification_report(y_test, y_pred_nb, target_names=le.classes_))

-------------------------------------
Accuracy: 0.9099326599326599
F1-score: 0.909789362255079
              precision    recall  f1-score   support

       anger       0.88      0.95      0.91       392
        fear       0.92      0.92      0.92       416
         joy       0.95      0.86      0.90       380

    accuracy                           0.91      1188
   macro avg       0.91      0.91      0.91      1188
weighted avg       0.91      0.91      0.91      1188



In [21]:
print('===== SUPPORT VECTOR MACHINE ====== ')
print('-------------------------------------')
y_pred_nb = svm_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred_nb))
print("F1-score:", f1_score(y_test, y_pred_nb, average='weighted'))
print(classification_report(y_test, y_pred_nb, target_names=le.classes_))

-------------------------------------
Accuracy: 0.952020202020202
F1-score: 0.9519894469368267
              precision    recall  f1-score   support

       anger       0.94      0.96      0.95       392
        fear       0.97      0.93      0.95       416
         joy       0.95      0.97      0.96       380

    accuracy                           0.95      1188
   macro avg       0.95      0.95      0.95      1188
weighted avg       0.95      0.95      0.95      1188



In [22]:
joblib.dump(nb_model, "naive_bayes_model.pkl")
joblib.dump(svm_model, "svm_model.pkl")
joblib.dump(tfidf, "tfidf_vectorizer.pkl")
joblib.dump(le, "label_encoder.pkl")

['label_encoder.pkl']

In [30]:
nb_model = joblib.load("naive_bayes_model.pkl")
svm_model = joblib.load("svm_model.pkl")
tfidf = joblib.load("tfidf_vectorizer.pkl")
le = joblib.load("label_encoder.pkl")



In [33]:
new_text = [
    "I am feeling very sad today",                  # Sad
    "This is the best day of my life!",            # Happy
    "I am so angry at what happened",              # Angry
    "I feel scared walking alone at night",        # Fear
    "The movie was touching and heartwarming",     # Happy
    "I am frustrated with my work deadlines",      # Angry / Frustrated
    "I am so excited about my new project",       # Happy / Excited
    "I feel lonely and depressed",                 # Sad
    "I am nervous about the exam tomorrow",        # Fear / Anxiety
    "The performance was terrible and disappointing",  # Angry / Negative
    "that was very bad performance" ,
    "this movie is so good right"
]


In [None]:
cleaned = [clean_text(t) for t in new_text]
X_new = tfidf.transform(cleaned)


pred_nb = le.inverse_transform(nb_model.predict(X_new))

pred_svm = le.inverse_transform(svm_model.predict(X_new))

for i in range(len(new_text)):
    print(new_text[i])
    print("Naive Bayes Prediction:", pred_nb[i])
    print("SVM Prediction:", pred_svm[i])
    print("-"*40)

I am feeling very sad today
Naive Bayes Prediction: anger
SVM Prediction: joy
----------------------------------------
This is the best day of my life!
Naive Bayes Prediction: joy
SVM Prediction: joy
----------------------------------------
I am so angry at what happened
Naive Bayes Prediction: anger
SVM Prediction: anger
----------------------------------------
I feel scared walking alone at night
Naive Bayes Prediction: fear
SVM Prediction: fear
----------------------------------------
The movie was touching and heartwarming
Naive Bayes Prediction: anger
SVM Prediction: anger
----------------------------------------
I am frustrated with my work deadlines
Naive Bayes Prediction: anger
SVM Prediction: anger
----------------------------------------
I am so excited about my new project
Naive Bayes Prediction: joy
SVM Prediction: joy
----------------------------------------
I feel lonely and depressed
Naive Bayes Prediction: anger
SVM Prediction: anger
------------------------------------