In [1]:
import pandas as pd
import numpy as np

dataset = pd.read_csv('C:\\Users\\ashim\\Downloads\\reviews.tsv', delimiter = '\t')

import re   #checks if a particular string matches a given regular expression 
import nltk    #Natural Language Toolkit; for building Python programs to work with human language data

nltk.download('stopwords')    

from nltk.corpus import stopwords   #collection of commonly used words (such as “the”, “a”, “an”, “in”) 
from nltk.stem.porter import PorterStemmer   #removing the commoner inflexion endings from words, extract root word( ed,est)
ps = PorterStemmer()   

all_stopwords = stopwords.words('english')  #list of all the English stop words supported by NLTK
all_stopwords.remove('not')   #Removing not from stopwords

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ashim\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
words=[]

for i in range(0, 900):
  review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])   #[^_] negates a character match inside square brackets, i.e. the characters apart from a-z and A-Z and substitute them with a blank " "
  review = review.lower()   # returns the lowercased strings
  review = review.split()   # splits a string into a list
  review = [ps.stem(word) for word in review if not word in set(all_stopwords)]
  review = ' '.join(review)  
  words.append(review)
  

In [None]:
words

In [3]:
from sklearn.feature_extraction.text import CountVectorizer   
cv = CountVectorizer(max_features = 1420)   #CountVectorizer creates a matrix in which each unique word is represented by a column of the matrix, and each text sample from the document is a row in the matrix. 

In [4]:
X = cv.fit_transform(words).toarray()   
y = dataset.iloc[:, -1].values

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

In [6]:
X_test

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [7]:
from tkinter import *
from tkinter.ttk import *
import cv2
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image, ImageTk
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.naive_bayes import BernoulliNB
from sklearn import linear_model



In [8]:
def gaussian(root, input):
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)   

    data = [input]
    vectorizer = cv.transform(data).toarray()
    pred = classifier.predict(vectorizer)
    if pred==0 :
        gaus= Label(root, text='Bad  review', font="times 15").place(relx= 0.45, rely=0.4)
    else:
        gaus= Label(root, text='Good review', font="times 15").place(relx= 0.45, rely=0.4)

    


In [9]:
def multinom(root, input):
    classifier = MultinomialNB(alpha=0.1)
    classifier.fit(X_train, y_train)

    data = [input]
    vectorizer = cv.transform(data).toarray()
    pred = classifier.predict(vectorizer)
    if pred==0 :
        Label(root, text='Bad  review', font="times 15").place(relx= 0.45, rely=0.5)
    else:
        Label(root, text='Good review', font="times 15").place(relx= 0.45, rely=0.5)

        


In [10]:
def bernoulli(root, input):
    # Bernoulli NB

    # Fitting Naive Bayes to the Training set
    classifier = BernoulliNB(alpha=0.8)
    classifier.fit(X_train, y_train)

    data = [input]
    vectorizer = cv.transform(data).toarray()
    pred = classifier.predict(vectorizer)
    if pred==0 :
        Label(root, text='Bad  review', font="times 15").place(relx= 0.45, rely=0.6)
    else:
        Label(root, text='Good review', font="times 15").place(relx= 0.45, rely=0.6)

    

In [11]:
def logistic(root, input):
    # Logistic Regression

    # Fitting Logistic Regression to the Training set
    classifier = linear_model.LogisticRegression(C=1.5)
    classifier.fit(X_train, y_train)

    data = [input]
    vectorizer = cv.transform(data).toarray()
    pred = classifier.predict(vectorizer)
    if pred==0 :
        log= Label(root,text= 'Bad  review', font="times 15").place(relx= 0.45, rely=0.7)
        
    else:
        log= Label(root,text= 'Good review', font="times 15").place(relx= 0.45, rely=0.7)
    

In [12]:
def gaus_acc(root):
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)   

    from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
    cm = confusion_matrix(y_test, y_pred)

    score1 = accuracy_score(y_test,y_pred)
    score2 = precision_score(y_test,y_pred)
    score3= recall_score(y_test,y_pred)
    
    Label(root,text= round(score1*100,2), relief="solid").place(relx= 0.85, rely=0.45)
    Label(root,text= round(score2*100,2), relief="solid").place(relx= 0.85, rely=0.55)
    Label(root,text= round(score3*100,2), relief="solid").place(relx= 0.85, rely=0.65)

In [13]:
def mult_acc(root):
    classifier = MultinomialNB(alpha=0.1)
    classifier.fit(X_train, y_train)

    # Predicting the Test set results
    y_pred = classifier.predict(X_test)

    # Making the Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)

    score1 = accuracy_score(y_test,y_pred)
    score2 = precision_score(y_test,y_pred)
    score3= recall_score(y_test,y_pred)

    Label(root,text= round(score1*100,2), relief="solid").place(relx= 0.85, rely=0.45)
    Label(root,text= round(score2*100,2), relief="solid").place(relx= 0.85, rely=0.55)
    Label(root,text= round(score3*100,2), relief="solid").place(relx= 0.85, rely=0.65)

In [14]:
def bern_acc(root):
    # Making the Confusion Matrix
    classifier = BernoulliNB(alpha=0.8)
    classifier.fit(X_train, y_train)

    # Predicting the Test set results
    y_pred = classifier.predict(X_test)

    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(y_test, y_pred)

    score1 = accuracy_score(y_test,y_pred)
    score2 = precision_score(y_test,y_pred)
    score3= recall_score(y_test,y_pred)

    Label(root,text= round(score1*100,2), relief="solid").place(relx= 0.85, rely=0.45)
    Label(root,text= round(score2*100,2), relief="solid").place(relx= 0.85, rely=0.55)
    Label(root,text= round(score3*100,2), relief="solid").place(relx= 0.85, rely=0.65)


    

In [15]:
def log_acc(root):
    classifier = linear_model.LogisticRegression(C=1.5)
    classifier.fit(X_train, y_train)

    # Predicting the Test set results
    y_pred = classifier.predict(X_test)

    # Making the Confusion Matrix
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(y_test, y_pred)

    score1 = accuracy_score(y_test,y_pred)
    score2 = precision_score(y_test,y_pred)
    score3= recall_score(y_test,y_pred)

    Label(root,text= round(score1*100,2), relief="solid").place(relx= 0.85, rely=0.45)
    Label(root,text= round(score2*100,2), relief="solid").place(relx= 0.85, rely=0.55)
    Label(root,text= round(score3*100,2), relief="solid").place(relx= 0.85, rely=0.65)


In [19]:
def evaluate2():
    input = rev.get()
    # print(input)
    gaussian(main_window, input)
    multinom(main_window, input)
    bernoulli(main_window, input)
    logistic(main_window, input)  

def gaus_acc2():
    gaus_acc(main_window)

def mult_acc2():
    mult_acc(main_window)

def bern_acc2():
    bern_acc(main_window)

def log_acc2():
    log_acc(main_window)

from tkinter import *
from tkinter import ttk

main_window= Tk()

main_window.geometry("900x600")
main_window['background']='#99ff99'

Label(main_window, text="Review Sentiment Predictor", font="times 15 bold").place(relx= 0.19, rely=0.08)

Label(main_window, text="Enter a review:", font="times 15 bold").place(relx= 0.1, rely=0.2)

rev= ttk.Entry(main_window,width=40)
rev.place(relx=0.3, rely=0.2)

Button(main_window,text="Submit",command =evaluate2).place(relx=0.3, rely= 0.3)

Label(main_window, text= "Prediction using Gaussian Model", font="times 15 bold").place(relx=0.1, rely=0.4)
Label(main_window, text= "Prediction using Multinomial Model", font="times 15 bold",).place(relx=0.1, rely=0.5)
Label(main_window, text= "Prediction using Bernoulli Model", font="times 15 bold").place(relx=0.1, rely=0.6)
Label(main_window, text= "Prediction using Logistic Model", font="times 15 bold").place(relx=0.1, rely=0.7)

Label(main_window, text = "Check Accuracy of models",font="times 15 bold").place(relx= 0.7, rely= 0.08)

Button(main_window, text = "Gaussian", command= gaus_acc2).place(relx= 0.7, rely= 0.2)
Button(main_window, text = "Multinomial", command= mult_acc2).place(relx= 0.7, rely= 0.3)
Button(main_window, text = "Bernoulli", command=bern_acc2).place(relx= 0.8, rely= 0.2)
Button(main_window, text = "Logistic",command= log_acc2).place(relx= 0.8, rely= 0.3)


Label(main_window, text= "Accuracy", font="times 15 bold underline").place(relx=0.7, rely=0.45)
Label(main_window, text= "Precision", font="times 15 bold underline").place(relx=0.7, rely=0.55)
Label(main_window, text= "Recall", font="times 15 bold underline").place(relx=0.7, rely=0.65)

main_window.mainloop()