In [1]:
import pandas as pd, re, string
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score,classification_report

In [2]:
df= pd.read_csv('cleaned data.csv')

In [3]:
df.head()

Unnamed: 0,Text,Classification
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [4]:
df.tail()

Unnamed: 0,Text,Classification
16958,i just keep feeling like someone is being unki...,anger
16959,im feeling a little cranky negative after this...,anger
16960,i feel that i am useful to my people and that ...,joy
16961,im feeling more comfortable with derby i feel ...,joy
16962,i feel all weird when i have to meet w people ...,fear


In [5]:
df.shape

(16963, 2)

In [6]:
df.dtypes

Text              object
Classification    object
dtype: object

In [7]:
df.isnull().sum()

Text              0
Classification    0
dtype: int64

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16963 entries, 0 to 16962
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Text            16963 non-null  object
 1   Classification  16963 non-null  object
dtypes: object(2)
memory usage: 265.2+ KB


In [9]:
df.nunique()

Text              16963
Classification        6
dtype: int64

In [10]:
df.rename(columns={'Classification':'Sentiment',"Text":"review"},inplace=True)
df.head()

Unnamed: 0,review,Sentiment
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [11]:
df['Sentiment'].unique()

array(['sadness', 'anger', 'love', 'surprise', 'fear', 'joy'],
      dtype=object)

In [12]:
df['label'] = df['Sentiment'].map({"love":1,"surprise":0,"joy":2,"fear":4,"anger":5,"sadness":6})

In [13]:
def clean_text(t):
    t = t.lower()
    t = re.sub(r"<.*?>"," ",t)
    t = t.translate(str.maketrans(string.punctuation, " "*len(string.punctuation)))

    return re.sub(r"\s+"," ",t).strip()
df["cleaned"]= df["review"].apply(clean_text)

In [14]:
x_train,x_test,y_train,y_test = train_test_split(df["cleaned"],df["label"],test_size =0.2, random_state = 42)
tfidf = TfidfVectorizer(max_features=20000, ngram_range=(1, 2), min_df=5)

In [15]:
x_train_tfidf = tfidf.fit_transform(x_train)
x_test_tfidf = tfidf.transform(x_test)

In [16]:
nb=MultinomialNB()
nb.fit(x_train_tfidf,y_train)

In [17]:
y_pred=nb.predict(x_test_tfidf)

In [18]:
print('Accuracy:',accuracy_score(y_test, y_pred))

Accuracy: 0.6746242263483643


In [19]:
target_names = ['very negative', 'negative', 'neutral', 'positive', 'very positive', 'mixed']
print(classification_report(y_test, y_pred, target_names=target_names))

               precision    recall  f1-score   support

very negative       1.00      0.03      0.05       114
     negative       1.00      0.04      0.07       277
      neutral       0.61      0.98      0.75      1180
     positive       0.92      0.32      0.47       408
very positive       0.94      0.31      0.47       427
        mixed       0.71      0.87      0.78       987

     accuracy                           0.67      3393
    macro avg       0.86      0.42      0.43      3393
 weighted avg       0.76      0.67      0.61      3393



In [20]:
sample=input("Enter the Review")
sample_tfidf=tfidf.transform([sample])
pred_num=nb.predict(sample_tfidf)[0]
pred_label='Negative' if pred_num==1 else 'positive'
print(f"Predicted Sentimental {pred_label}.")

Enter the Review positive


Predicted Sentimental positive.


In [21]:
from sklearn.linear_model import LogisticRegression

In [22]:
lr=LogisticRegression()
lr.fit(x_train_tfidf,y_train)

In [23]:
y_pred=lr.predict(x_test_tfidf)

In [24]:
print('Accuracy:',accuracy_score(y_test, y_pred))

Accuracy: 0.8275862068965517


In [25]:
target_names = ['very negative', 'negative', 'neutral', 'positive', 'very positive', 'mixed']
print(classification_report(y_test, y_pred, target_names=target_names))

               precision    recall  f1-score   support

very negative       0.92      0.30      0.45       114
     negative       0.92      0.44      0.60       277
      neutral       0.76      0.96      0.85      1180
     positive       0.90      0.73      0.80       408
very positive       0.89      0.72      0.80       427
        mixed       0.86      0.92      0.89       987

     accuracy                           0.83      3393
    macro avg       0.88      0.68      0.73      3393
 weighted avg       0.84      0.83      0.82      3393



In [26]:
sample=input("Enter the Review")
sample_tfidf=tfidf.transform([sample])
pred_num=lr.predict(sample_tfidf)[0]
pred_label='negative' if pred_num==1 else 'Positive'
print(f"Predicted Sentimental {pred_label}.")

Enter the Review negative


Predicted Sentimental Positive.


In [27]:
from sklearn.svm import SVC

In [None]:
svc=SVC()
svc.fit(x_train_tfidf,y_train)

In [29]:
y_pred=svc.predict(x_test_tfidf)

In [30]:
print('Accuracy:',accuracy_score(y_test, y_pred))

Accuracy: 0.8107869142351901


In [31]:
target_names = ['very negative', 'negative', 'neutral', 'positive', 'very positive', 'mixed']
print(classification_report(y_test, y_pred, target_names=target_names))

               precision    recall  f1-score   support

very negative       0.97      0.32      0.48       114
     negative       0.94      0.39      0.55       277
      neutral       0.73      0.97      0.83      1180
     positive       0.91      0.68      0.78       408
very positive       0.90      0.66      0.76       427
        mixed       0.86      0.91      0.88       987

     accuracy                           0.81      3393
    macro avg       0.89      0.66      0.71      3393
 weighted avg       0.84      0.81      0.80      3393



In [32]:
sample=input("Enter the Review")
sample_tfidf=tfidf.transform([sample])
pred_num=svc.predict(sample_tfidf)[0]
pred_label='Positive' if pred_num==1 else 'Negative'
print(f"Predicted Sentimental {pred_label}.")

Enter the Review bed


Predicted Sentimental Negative.
