In [1]:
import pandas as pd
from nltk import *
import string
from nltk.corpus import stopwords
from textblob import TextBlob

In [2]:
df = pd.read_csv('mood_data.txt', names=['Text', 'Emotion'], sep=';')
df
print()

Unnamed: 0,Text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
15995,i just had a very brief time in the beanbag an...,sadness
15996,i am now turning and i feel pathetic that i am...,sadness
15997,i feel strong and good overall,joy
15998,i feel like this was such a rude comment and i...,anger


In [3]:
def clean(word):
    token=word_tokenize(word)
    text=" ".join(token)

    text=[char for char in text if char not in string.punctuation ]
    text="".join(text)

    text=[char for char in text.split() if text.lower() not in stopwords.words("english")]
    return " ".join(text)

df["cleaned_text"]=df["Text"].apply(clean)

In [4]:
df

Unnamed: 0,Text,Emotion,cleaned_text
0,i didnt feel humiliated,sadness,i didnt feel humiliated
1,i can go from feeling so hopeless to so damned...,sadness,i can go from feeling so hopeless to so damned...
2,im grabbing a minute to post i feel greedy wrong,anger,im grabbing a minute to post i feel greedy wrong
3,i am ever feeling nostalgic about the fireplac...,love,i am ever feeling nostalgic about the fireplac...
4,i am feeling grouchy,anger,i am feeling grouchy
...,...,...,...
15995,i just had a very brief time in the beanbag an...,sadness,i just had a very brief time in the beanbag an...
15996,i am now turning and i feel pathetic that i am...,sadness,i am now turning and i feel pathetic that i am...
15997,i feel strong and good overall,joy,i feel strong and good overall
15998,i feel like this was such a rude comment and i...,anger,i feel like this was such a rude comment and i...


In [5]:
# Doing regularization
features = df['cleaned_text']
processed_features = []

for sentence in range(0, len(features)):
   
    processed_feature = re.sub(r'\W', ' ', str(features[sentence]))
    

    processed_feature= re.sub(r'\s+[a-zA-Z]\s+', ' ', processed_feature)
    

    processed_feature = re.sub(r'\^[a-zA-Z]\s+', ' ', processed_feature) 
    

    processed_feature = re.sub(r'\s+', ' ', processed_feature, flags=re.I)
    

    processed_feature = processed_feature.lower()

    processed_features.append(processed_feature)

In [6]:
df['processed_text'] = processed_features
df

Unnamed: 0,Text,Emotion,cleaned_text,processed_text
0,i didnt feel humiliated,sadness,i didnt feel humiliated,i didnt feel humiliated
1,i can go from feeling so hopeless to so damned...,sadness,i can go from feeling so hopeless to so damned...,i can go from feeling so hopeless to so damned...
2,im grabbing a minute to post i feel greedy wrong,anger,im grabbing a minute to post i feel greedy wrong,im grabbing minute to post feel greedy wrong
3,i am ever feeling nostalgic about the fireplac...,love,i am ever feeling nostalgic about the fireplac...,i am ever feeling nostalgic about the fireplac...
4,i am feeling grouchy,anger,i am feeling grouchy,i am feeling grouchy
...,...,...,...,...
15995,i just had a very brief time in the beanbag an...,sadness,i just had a very brief time in the beanbag an...,i just had very brief time in the beanbag and ...
15996,i am now turning and i feel pathetic that i am...,sadness,i am now turning and i feel pathetic that i am...,i am now turning and feel pathetic that am sti...
15997,i feel strong and good overall,joy,i feel strong and good overall,i feel strong and good overall
15998,i feel like this was such a rude comment and i...,anger,i feel like this was such a rude comment and i...,i feel like this was such rude comment and im ...


In [7]:
final_df = df[["processed_text","Emotion"]]
final_df

Unnamed: 0,processed_text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing minute to post feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
15995,i just had very brief time in the beanbag and ...,sadness
15996,i am now turning and feel pathetic that am sti...,sadness
15997,i feel strong and good overall,joy
15998,i feel like this was such rude comment and im ...,anger


In [8]:

from nltk.tokenize import TweetTokenizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

In [9]:
def tokenize(text): 
    tk = TweetTokenizer()
    return tk.tokenize(text)

vectorizer = CountVectorizer(analyzer = 'word',tokenizer = tokenize,lowercase = True,ngram_range=(1, 1))

In [10]:
def polarity(text):
    sentiment=TextBlob(text).sentiment
    print(sentiment)
    return sentiment

In [11]:
count= vectorizer.fit_transform(final_df['processed_text'])



In [12]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score

In [13]:
X = final_df['processed_text'].values
y = final_df['Emotion'].values

X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=100, test_size=0.2)

In [14]:
vectorizer = TfidfVectorizer(max_features=1000)
X_train_idf = vectorizer.fit_transform(X_train)
X_test_idf = vectorizer.transform(X_test)

In [15]:

df_idf = pd.DataFrame(vectorizer.idf_, index=vectorizer.get_feature_names_out(),columns=["idf_weights"])

df_idf.sort_values(by=['idf_weights'],ascending = False).head()

Unnamed: 0,idf_weights
chest,7.624065
computer,7.412756
dream,7.366236
voice,7.366236
church,7.321784


In [16]:
mnb = MultinomialNB()
mnb.fit(X_train_idf, y_train)
result=[]

In [17]:
pred_mnb = mnb.predict(X_test_idf)

acc = accuracy_score(y_test, pred_mnb)

result.append(["Multinomial Naive Bayes",acc])



In [18]:
from sklearn.ensemble import RandomForestClassifier
clf_rf = RandomForestClassifier()
clf_rf.fit(X_train_idf, y_train)


y_pred_rf = clf_rf.predict(X_test_idf)

acc = accuracy_score(y_test, y_pred_rf)

result.append(["'Random Forest(Gini)",acc])



In [19]:
from sklearn.ensemble import RandomForestClassifier
clf_rf = RandomForestClassifier(criterion='entropy')
clf_rf.fit(X_train_idf, y_train)

y_pred_rf = clf_rf.predict(X_test_idf)

acc = accuracy_score(y_test, y_pred_rf)

model_results = pd.DataFrame([['Random Forest(Entropy)', acc]],
               columns = ['Model', 'Accuracy'])
result.append(['Random Forest(Entropy)',acc])



In [20]:
from sklearn.svm import SVC
clf_svc = SVC()
clf_rf.fit(X_train_idf, y_train)

y_pred_rf = clf_rf.predict(X_test_idf)

acc = accuracy_score(y_test, y_pred_rf)

model_results = pd.DataFrame([['SVC by SVM ', acc]],
               columns = ['Model', 'Accuracy'])
result.append(["SVC by SVM",acc])

print(result)

[['Multinomial Naive Bayes', 0.743125], ["'Random Forest(Gini)", 0.83375], ['Random Forest(Entropy)', 0.8203125], ['SVC by SVM', 0.8196875]]


In [21]:
results=pd.DataFrame(result,columns=["Model","Accuracy"])
results

Unnamed: 0,Model,Accuracy
0,Multinomial Naive Bayes,0.743125
1,'Random Forest(Gini),0.83375
2,Random Forest(Entropy),0.820312
3,SVC by SVM,0.819688


In [23]:

new_text = "I am feeling very happy today!"

new_text_cleaned = clean(new_text)

new_text_vectorized = vectorizer.transform([new_text_cleaned])

predicted_sentiment = mnb.predict(new_text_vectorized)

print("Predicted Sentiment:", predicted_sentiment[0])

Predicted Sentiment: joy
