## <font size=5> <strong>Sentiment analysis

## I. Importing essential libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import  svm
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

setting the seed value to ensure that you get the same sequence of random numbers every time

In [2]:
np.random.seed(500)

## II. Importing and understanding our dataset 

In [3]:
data = pd.read_csv('training.csv',encoding='latin1')
#print(data['Sentence'])
data.dropna(inplace=True)
#Change all the text to lower case. This is required as python interprets 'bin' and 'BIN' differently
data['Sentence'] = [entry.lower() for entry in data['Sentence']]
data['Sentiment'] = np.where(data['Sentiment'].str.contains('positive'), 1, 0)


shape of data

In [4]:
data.shape

(218, 2)

In [5]:
data.describe()

Unnamed: 0,Sentiment
count,218.0
mean,0.412844
std,0.493478
min,0.0
25%,0.0
50%,0.0
75%,1.0
max,1.0


## III. Train Test split

In [7]:
Train_X, Test_X, Train_Y, Test_Y = train_test_split(data['Sentence'],data['Sentiment'],test_size=0.3)
#70-30

## IV. Tokenization and removing stop words

In [10]:
Encoder = LabelEncoder()
Train_Y = Encoder.fit_transform(Train_Y)
Test_Y = Encoder.fit_transform(Test_Y)
vectorizer = TfidfVectorizer(stop_words='english')
vectorizer.fit_transform(data['Sentence'])
feature_names = vectorizer.get_feature_names_out()
#print(response)
Train_X_Tfidf = vectorizer.transform(Train_X)
Test_X_Tfidf = vectorizer.transform(Test_X)
print(Train_X_Tfidf.shape,Train_Y.shape)


(152, 361) (152,)


## V. Model Fitting

### Support Vector Machine

In [20]:
SVM = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto')
SVM.fit(Train_X_Tfidf,Train_Y)
# predict the labels on validation dataset
predictions_SVM = SVM.predict(Test_X_Tfidf)
# Use accuracy_score function to get the accuracy
print("SVM Accuracy Score -> ",accuracy_score(predictions_SVM, Test_Y)*100)
#print(Test_Y,predictions_SVM)

SVM Accuracy Score ->  66.66666666666666


### Naive Bayes Model

In [17]:
NB = MultinomialNB()
NB.fit(Train_X_Tfidf, Train_Y)
predictions_NB = NB.predict(Test_X_Tfidf)
print("Naive Bayes Accuracy Score -> ", accuracy_score(predictions_NB, Test_Y) * 100)

Naive Bayes Accuracy Score ->  68.18181818181817


## VI. Testing data with user inputs

In [16]:
print("Enter sentences: ")
sentences = []
for i in range(2):
    sentence = input()
    sentences.append(sentence)

test_data = vectorizer.transform(sentences)

predictions_SVM = SVM.predict(test_data)
predictions_NB = NB.predict(test_data)

for prediction in predictions_SVM:
    if prediction == 1:
        print("---- SVM: positive")
    else:
        print("---- SVM: negative")

for prediction in predictions_NB:
    if prediction == 1:
        print("---- Naive Bayes: positive")
    else:
        print("---- Naive Bayes: negative")

Enter sentences: 


 inspiring good
 very bad


---- SVM: positive
---- SVM: negative
---- Naive Bayes: positive
---- Naive Bayes: negative
