In [1]:
# Natural Language Processing

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [38]:
# Importing the dataset
dataset = pd.read_csv('social_media_reviews2.tsv', delimiter = '\t', quoting = 3)
dataset.head(7)
# 1 means a positive review and 0 means a negative review

Unnamed: 0,Review,Liked
0,Wow... This Product is amazing.,1
1,Product is not good.,0
2,Not what I ordered for and the customer servic...,0
3,Easy to use. As a beginner I found this pad ve...,1
4,I am very happy with this tablet.,1
5,Now I am getting angry and I want my damn mone...,0
6,I was really hoping that this would be faster ...,0


In [39]:
dataset.shape

(117, 2)

In [40]:
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [42]:
corpus = []
for i in range(0, 117):
    review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])
    review = review.lower()
    review = review.split()
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)
print(corpus)

['wow product amaz', 'product good', 'order custom servic nasti', 'easi use beginn found pad good enjoy kindl app', 'happi tablet', 'get angri want damn money back', 'realli hope would faster inexspens tablet', 'need work batteri', 'product good', 'realli work expect', 'good qualiti', 'would go back vendor', 'laptop light issu bright', 'product good', 'work fine', 'work well gb instead gb indic', 'highli recommend', 'deliveri realli slow', 'batteri health came still confus', 'like', 'strong batteri life', 'work perfectli', 'great meet need', 'good product batteri also good', 'yeah like conveni use qualiti excel', 'leather strong expect', 'nice product', 'hate honestli bad', 'made good qualiti leather', 'deliv someth differ pictur even origin', 'found vendor accid could happier', 'qualiti need upgrad', 'nice soft light comfi', 'met expect', 'good', 'bad qualiti wast money', 'beauti leather', 'deliveri suck', 'eri cool', 'deal good enough would drag vendor', 'hard judg whether side good 

In [44]:
# Creating the Bag of Words model
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 1500)
X = cv.fit_transform(corpus).toarray()
print(X)
y = dataset.iloc[:, 1].values
print(y)

[[0 0 0 ... 0 1 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
[1 0 0 1 1 0 0 0 1 1 1 0 0 1 0 0 1 0 0 0 1 1 1 1 1 0 1 0 1 0 1 0 1 1 1 0 1
 0 1 0 0 1 1 0 0 1 1 1 1 1 1 1 0 1 1 1 0 0 0 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1
 0 0 0 0 0 1 1 1 0 0 0 1 0 1 0 1 1 1 1 1 1 0 1 1 0 0 1 1 0 1 1 1 1 1 1 1 1
 1 1 1 0 0 1]


In [45]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

# Training the Naive Bayes model on the Training set
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [46]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)


[[ 3  4]
 [ 1 16]]


In [47]:
#Check the accuracy of our model
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_pred, y_test)
print(accuracy)

0.7916666666666666


In [49]:
#What we want our model to do is to be able to classify a new comment into positive or negative
#After doing this, we want it to also add the new comment into the database of the account and then perform an analysis
#to know the level of good ratings given to that vendor compared to all the ratings received

#First we would check for all positive ratings
count = 0
for i in y:
    if i == 1:
        count+=1
print(count)     

72
117


In [52]:
total_ratings = len(y)
level_good_rating = (count/total_ratings)
print(level_good_rating)

0.6153846153846154


In [51]:
#From the above we can see that this vendor has a rating of 61.54%
rating = 0

In [55]:
if level_good_rating <=0.2:
    rating = 1
elif level_good_rating > 0.2 and level_good_rating <=0.2:
    rating = 2
elif level_good_rating >0.4 and level_good_rating <=0.6:
    rating = 3
elif level_good_rating >0.6 and level_good_rating <=0.8:
    rating = 4
else:
    rating = 5
    
print(rating)

4
