In [36]:
import spacy
from spacy import displacy
spacy.load("en_core_web_sm")
from spacy.lang.en.stop_words import STOP_WORDS

In [37]:
#importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [38]:
# Navigating to file location
import os
os.chdir("E:/")

In [39]:
# Reading data
data=pd.read_excel("Book1.xlsx")

In [40]:
data.head(10)

Unnamed: 0,Reviews,Sentiments
0,We do not handle this issue. I'd now transfer ...,neutral
1,"Not really, but they do seem to have more 'jui...",neutral
2,We use alot of batteries between my boys and t...,neutral
3,I bought this to replace a damaged much more e...,neutral
4,Love this amazon did a great job with design f...,happy
5,Completely satisfied with the Amazon Fire TV. ...,happy
6,"Fun to use for recipes, conversions while cook...",happy
7,"Convenient to use for music, news reports and ...",happy
8,The music and weather reports are terrific in ...,happy
9,great for finding local info while traveling. ...,happy


In [41]:
data.shape

(35, 2)

In [42]:
# Sentiments values counts
data["Sentiments"].value_counts()

disgust     7
fear        7
happy       6
anger       5
neutral     4
surprise    3
sad         3
Name: Sentiments, dtype: int64

In [43]:
# checking for null values
data.isnull().sum()

Reviews       0
Sentiments    0
dtype: int64

In [44]:
#Tokenisation
# Tokenization
import string
Punct=string.punctuation
Punct

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [45]:
# Data cleaning
def text_data_cleaning(sentence):
    doc=nlp(sentence)
    
    tokens=[]
    for token in doc:
        if token.lemma_!="PRON":
            temp=token.lemma_.lower().strip()
        else:
            temp=token.lower_
        tokens.append(temp)
        
        cleaned_tokens=[]
        for token in tokens:
            if token not in STOP_WORDS and token not in Punct:
                cleaned_tokens.append(token)
            return cleaned_tokens

In [51]:
# Model Building
from sklearn.svm import LinearSVC
tfidf=TfidfVectorizer()
model=LinearSVC()

In [52]:
# Defining independent & dependent variables
x=data["Reviews"]
y=data["Sentiments"]

In [329]:
#train testsplit
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=71)

In [330]:
Classifier=Pipeline([("tfidf",tfidf),("model",model)])

In [331]:
Classifier.fit(x_train,y_train)

Pipeline(steps=[('tfidf', TfidfVectorizer()), ('model', LinearSVC())])

In [332]:
y_pred=Classifier.predict(x_test)

In [339]:
print(classification_report(y_test,y_pred),accuracy_score(y_test,y_pred))

              precision    recall  f1-score   support

     disgust       1.00      0.50      0.67         2
        fear       0.50      1.00      0.67         1
       happy       1.00      1.00      1.00         2
     neutral       1.00      1.00      1.00         1
    surprise       1.00      1.00      1.00         1

    accuracy                           0.86         7
   macro avg       0.90      0.90      0.87         7
weighted avg       0.93      0.86      0.86         7
 0.8571428571428571


In [353]:
# Test

In [341]:
print(Classifier.predict(["He made me wait for 1 hour without informing"]))

['anger']


In [344]:
print(Classifier.predict(["wow officially i have no life this month! LOL my friends dont believe me when i say they need to BOOK IN ADVANCE!"]))

['surprise']


In [345]:
print(Classifier.predict(["there was a swelling which  is dangerous to the patient hence I became afraid."]))

['fear']


In [346]:
print(Classifier.predict(["When I take a look into a slaughter-house and when listening to stories about torturing people or animals."]))

['disgust']


In [348]:
print(Classifier.predict(["Not really, but they do seem to have more 'juice' than the national brands, if that is possible."]))

['neutral']


In [351]:
print(Classifier.predict(["Completely satisfied with the Amazon Fire TV. I'll never go back to cable TV again."]))

['happy']


In [352]:
print(Classifier.predict(["great for finding local info while traveling. Useful for weather and morning news brief."]))

['happy']
