# Sentimental Analysis.: Using techniques to extract opinion related information. such as emotion, mood to understand the sentiment.

In [1]:
# importing the libraries
import pandas as pd
import numpy as np
import matplotlib as plt
import re
import nltk

In [2]:
df = pd.read_csv("Downloads/mood_data.txt", names = ["Text","Emotion"], sep=";")
df.head()

Unnamed: 0,Text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [3]:
df.shape

(16000, 2)

In [4]:
# generating tokens
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

In [5]:
def clean_txt(mood):
  mood=word_tokenize(mood)
  mood=" ".join(mood)
  mood=[char for char in mood if char not in string.punctuation]
  mood=''.join(mood)
  mood=[word for word in mood.split() if mood.lower() not in stopwords.words('english')]
  return ' '.join(mood)

In [None]:
# df["cleaned_txt"] = df["Text"].apply(clean_txt)

In [20]:
# apply the function to clean the text
import nltk
nltk.download("punkt_tab")
nltk.download("stopwords")

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\mutyalasravanthi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\mutyalasravanthi\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [19]:
df["cleaned_txt"] = df["Text"].apply(clean_txt)
df.head()

Unnamed: 0,Text,Emotion,cleaned_txt,process_txt
0,i didnt feel humiliated,sadness,i didnt feel humiliated,i didnt feel humiliated
1,i can go from feeling so hopeless to so damned...,sadness,i can go from feeling so hopeless to so damned...,i can go from feeling so hopeless to so damned...
2,im grabbing a minute to post i feel greedy wrong,anger,im grabbing a minute to post i feel greedy wrong,im grabbing minute to post feel greedy wrong
3,i am ever feeling nostalgic about the fireplac...,love,i am ever feeling nostalgic about the fireplac...,i am ever feeling nostalgic about the fireplac...
4,i am feeling grouchy,anger,i am feeling grouchy,i am feeling grouchy


In [21]:
feature=df['cleaned_txt']
processes_features=[]
for sentence in range (0,len(feature)):
  process_feature=re.sub(r'\W',' ',str(feature[sentence]))
  process_feature=re.sub(r'\s+[a-zA-Z]\s+', ' ',process_feature)
  process_feature=re.sub(r'\^[a-zA-Z]\s+',' ',process_feature)
  process_feature=re.sub(r'\s+',' ',process_feature,flags=re.I)
  process_feature=process_feature.lower()
  # Add the process_feature string to the processes_features list
  processes_features.append(process_feature)

In [22]:
processes_features[:5]

['i didnt feel humiliated',
 'i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake',
 'im grabbing minute to post feel greedy wrong',
 'i am ever feeling nostalgic about the fireplace will know that it is still on the property',
 'i am feeling grouchy']

In [23]:
df["process_txt"] = processes_features
df.head()


Unnamed: 0,Text,Emotion,cleaned_txt,process_txt
0,i didnt feel humiliated,sadness,i didnt feel humiliated,i didnt feel humiliated
1,i can go from feeling so hopeless to so damned...,sadness,i can go from feeling so hopeless to so damned...,i can go from feeling so hopeless to so damned...
2,im grabbing a minute to post i feel greedy wrong,anger,im grabbing a minute to post i feel greedy wrong,im grabbing minute to post feel greedy wrong
3,i am ever feeling nostalgic about the fireplac...,love,i am ever feeling nostalgic about the fireplac...,i am ever feeling nostalgic about the fireplac...
4,i am feeling grouchy,anger,i am feeling grouchy,i am feeling grouchy


In [24]:
final_df = df[["process_txt","Emotion"]]
final_df.head()

Unnamed: 0,process_txt,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing minute to post feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [25]:
# generation tokens
from nltk.tokenize import TweetTokenizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer
from sklearn.model_selection import train_test_split


In [26]:
def tokenize(text):
    tk = TweetTokenizer()
    return tk.tokenize(text)
vectorizer = CountVectorizer(analyzer = "word", tokenizer=tokenize,lowercase = True, ngram_range=(1,1))


In [27]:
print(vectorizer)

CountVectorizer(tokenizer=<function tokenize at 0x00000196F2947BA0>)


In [29]:
count = vectorizer.fit_transform(final_df["process_txt"])
count

<16000x15206 sparse matrix of type '<class 'numpy.int64'>'
	with 264527 stored elements in Compressed Sparse Row format>

In [30]:
count.shape

(16000, 15206)

In [31]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix, recall_score, precision_score, f1_score, accuracy_score

X = final_df["process_txt"].values
print(X)

['i didnt feel humiliated'
 'i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake'
 'im grabbing minute to post feel greedy wrong' ...
 'i feel strong and good overall'
 'i feel like this was such rude comment and im glad that t'
 'i know lot but feel so stupid because can not portray it']


In [32]:
y = final_df["Emotion"].values
y

array(['sadness', 'sadness', 'anger', ..., 'joy', 'anger', 'sadness'],
      dtype=object)

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state =100, test_size=0.3)


In [34]:

vectorizer = TfidfVectorizer(max_features=1000)
X_train_df = vectorizer.fit_transform(X_train)
X_test_df = vectorizer.transform(X_test)


In [37]:
df1 = pd.DataFrame(vectorizer.idf_, index = vectorizer.get_feature_names_out(), columns = ["idf_weights"])
df1.head()

Unnamed: 0,idf_weights
able,5.503477
about,3.266721
absolutely,6.517096
abused,6.991554
acceptable,6.922561


In [38]:
df1.sort_values(by=["idf_weights"],ascending =False).head()

Unnamed: 0,idf_weights
blah,7.758809
chest,7.684701
pregnant,7.433387
computer,7.379319
dream,7.379319


In [39]:
# naive bayes
Mnb = MultinomialNB()
Mnb.fit(X_train_df,y_train)

pred_mnb = Mnb.predict(X_test_df)
acc =accuracy_score (y_test,pred_mnb)
result = pd.DataFrame([["Multinomial Naive Bayes",acc]],columns=["model","Accuracy"])
print(result)

                     model  Accuracy
0  Multinomial Naive Bayes  0.740625


In [40]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(X_train_df,y_train)

y_pred_clf = clf.predict(X_test_df)
acc_1 = accuracy_score(y_test,y_pred_clf)
model2=pd.DataFrame([["Random Forest",acc_1]],columns=["model","Accuracy"])
print(model2)

           model  Accuracy
0  Random Forest  0.832708


In [41]:
# Random Forest Entropy
from sklearn.ensemble import RandomForestClassifier
clf_1= RandomForestClassifier(criterion = "entropy")
clf_1.fit(X_train_df,y_train)

y_pred_clf_1 = clf_1.predict(X_test_df)
acc_2 = accuracy_score(y_test,y_pred_clf_1)
model3=pd.DataFrame([["Random Forest",acc_2]],columns=["model","Accuracy"])
print(model3)

           model  Accuracy
0  Random Forest  0.810208


In [42]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train_df, y_train)

y_pred_svc = svc.predict(X_test_df)
acc_3 =accuracy_score(y_test, y_pred_svc)
model_4 =pd.DataFrame([["SVC",acc_3]],columns=["model","Accuracy"])
model_4

Unnamed: 0,model,Accuracy
0,SVC,0.81375


In [43]:
model_results = pd.concat([result,model2,model3,model_4],ignore_index = True)
model_results

Unnamed: 0,model,Accuracy
0,Multinomial Naive Bayes,0.740625
1,Random Forest,0.832708
2,Random Forest,0.810208
3,SVC,0.81375


In [44]:
confusion_matrix(y_test,y_pred_clf)

array([[ 498,   13,   67,    3,   30,    0],
       [  26,  448,   94,    4,   28,   15],
       [  20,   11, 1484,   31,   50,    7],
       [   4,    3,   97,  266,    3,    0],
       [  37,   33,  137,   12, 1183,    5],
       [   1,   39,   28,    0,    5,  118]], dtype=int64)

In [45]:
import joblib
joblib.dump(clf,r"C:\Users\mutyalasravanthi\OneDrive\Desktop\Sentiment_Analysis\model.h5")

['C:\\Users\\mutyalasravanthi\\OneDrive\\Desktop\\Sentiment_Analysis\\model.h5']

In [46]:
joblib.dump(vectorizer,r"C:\Users\mutyalasravanthi\OneDrive\Desktop\Sentiment_Analysis\tdfvectorizer.pkl")

['C:\\Users\\mutyalasravanthi\\OneDrive\\Desktop\\Sentiment_Analysis\\tdfvectorizer.pkl']