In [1]:
import pandas as pd
from textblob import TextBlob

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
d = pd.read_csv("processed_tweets.csv")
d = d.drop(["Unnamed: 0","index"],axis=1)
d = d.rename(columns={"0":"Text"})
d.head(10)

Unnamed: 0,Text
0,account grow difficult keep track people activ...
1,release lyric video collab song bipolar sunshi...
2,stargalaxy genuinely cant even imagine second ...
3,theres call call spark call world always
4,scary plausible
5,track trace utterly useless proof covid spread...
6,camp never lose faith team proud aggie remembe...
7,bhpv merge bhel say announce lok sabha ncbn lo...
8,check result last two weeks arround time sum n...
9,see cabinet minister speak television deep thi...


In [3]:
d.isna().sum()

Text    0
dtype: int64

In [4]:
d.dropna(inplace=True)

In [5]:
d.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6294 entries, 0 to 6293
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Text    6294 non-null   object
dtypes: object(1)
memory usage: 98.3+ KB


In [6]:
n = len(d)
d["Sentiment"]=[None for i in range(n)]
for i in range(n):
    
    s = TextBlob(d["Text"].iloc[i]).sentiment
    if(s[0]>=0):
        d["Sentiment"].iloc[i] = 0
    else:
        d["Sentiment"].iloc[i] = 1

In [7]:
len(d[d["Sentiment"]==0]), len(d[d["Sentiment"]==1])

(4667, 1627)

In [8]:
no = len(d[d["Sentiment"]==1])

In [9]:
t = d[d["Sentiment"]==0][:no]
s = d[d["Sentiment"]==1]

In [10]:
df = pd.concat([s,t],ignore_index="True")
len(df)

3254

In [11]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

x1 = CountVectorizer().fit_transform(df["Text"])
x2 = TfidfVectorizer().fit_transform(df["Text"])
y = df["Sentiment"]
y=y.astype('int')

In [12]:
from sklearn.model_selection import train_test_split

x_train,x_test, y_train, y_test = train_test_split(x1,y,test_size=0.2,random_state=84)
X_train, X_test, Y_train, Y_test = train_test_split(x2,y,test_size=0.2,random_state=84)

In [13]:
from sklearn import metrics

accuracy = {}

# SVM

### CountVectorizer

In [14]:
from sklearn import svm
from sklearn import metrics

classifier=svm.SVC()
classifier.fit(x_train,y_train)
y_predict1=classifier.predict(x_test)

b1=metrics.accuracy_score(y_test, y_predict1)
recall = metrics.recall_score(y_test, y_predict1)
x = "Support Vector Machine with CountVectorizer"
accuracy[x] = b1

print(x)
print("Accuracy: {0:.4f}".format(b1))
print("Recall  : {0:.4f}".format(recall))

Support Vector Machine with CountVectorizer
Accuracy: 0.8679
Recall  : 0.7814


### TfidfVectorizer

In [15]:
classifier=svm.SVC()
classifier.fit(X_train,Y_train)
Y_predict1=classifier.predict(X_test)

b2=metrics.accuracy_score(Y_test, Y_predict1)
recall = metrics.recall_score(y_test, y_predict1)
x = "Support Vector Machine with TfidfVectorizer"
accuracy[x] = b2

print(x)
print("Accuracy: {0:.4f}".format(b2))
print("Recall  : {0:.4f}".format(recall))

Support Vector Machine with TfidfVectorizer
Accuracy: 0.8786
Recall  : 0.7814


# Multinomial Naive Bayes

### CountVectorizer

In [16]:
from sklearn.naive_bayes import MultinomialNB

nb = MultinomialNB()
nb.fit(x_train,y_train)
y_predict2=classifier.predict(x_test)

b3=metrics.accuracy_score(y_test, y_predict2)
recall = metrics.recall_score(y_test, y_predict1)
x = "Multinomial Naive Bayes with CountVectorizer"
accuracy[x] = b3

print(x)
print("Accuracy: {0:.4f}".format(b3))
print("Recall  : {0:.4f}".format(recall))

Multinomial Naive Bayes with CountVectorizer
Accuracy: 0.5499
Recall  : 0.7814


### TfidfVectorizer

In [17]:
nb = MultinomialNB()
nb.fit(X_train,Y_train)
Y_predict2=classifier.predict(X_test)

b4=metrics.accuracy_score(Y_test,Y_predict2)
recall = metrics.recall_score(y_test, y_predict1)
x = "Multinomial Naive Bayes with TfidfVectorizer"
accuracy[x] = b4

print(x)
print("Accuracy: {0:.4f}".format(b4))
print("Recall  : {0:.4f}".format(recall))

Multinomial Naive Bayes with TfidfVectorizer
Accuracy: 0.8786
Recall  : 0.7814
