In [2]:
import numpy as np
import pandas as pd
import re
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report


In [3]:
df = pd.read_csv("sentiment_data.csv")
df = df[['Comment','Sentiment']].dropna()
stopwords = {"the","a","an","is","and","or","was","it","this","that","of","to","very","but","be","in","on","for","so","at","as","are","were","from"}
df.head()

Unnamed: 0,Comment,Sentiment
0,lets forget apple pay required brand new iphon...,1
1,nz retailers don’t even contactless credit car...,0
2,forever acknowledge channel help lessons ideas...,2
3,whenever go place doesn’t take apple pay doesn...,0
4,apple pay convenient secure easy use used kore...,2


In [4]:
def preprocess(t):
    t = t.lower()
    t = re.sub(r"[^a-z\s]", "", t)
    return [w for w in t.split() if w not in stopwords]
df["tokens"] = df["Comment"].apply(preprocess)
vocab = Counter()
for t in df["tokens"]: vocab.update(t)
vocab = {w:i for i,(w,_) in enumerate(vocab.most_common(5000))}

def vectorize(t):
    v = np.zeros(len(vocab))
    for w in t:
        if w in vocab: v[vocab[w]] += 1
    return v

In [5]:
X = np.array([vectorize(t) for t in df["tokens"]])
y = df["Sentiment"].values

In [6]:

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
model = MultinomialNB()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

In [7]:
print("Accuracy:",accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

Accuracy: 0.6892250861246005
              precision    recall  f1-score   support

           0       0.66      0.63      0.65     11023
           1       0.68      0.64      0.66     16631
           2       0.71      0.76      0.73     20532

    accuracy                           0.69     48186
   macro avg       0.68      0.68      0.68     48186
weighted avg       0.69      0.69      0.69     48186



In [9]:
def predict_sentiment(s):
    t = preprocess(s)
    v = vectorize(t).reshape(1,-1)
    p = model.predict(v)[0]
    return {0:"Negative",1:"Neutral",2:"Positive"}[p]

print(predict_sentiment("Oh Today is Monday"))

Neutral
