In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report
from sklearn.feature_extraction.text import CountVectorizer
import tensorflow as tf


import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

In [None]:
data = pd.read_csv('Restaurant_Reviews.tsv', delimiter='\t', quoting=3)

In [None]:
data.head()

In [None]:
data.shape

In [None]:
# data cleaning

corpus = []

for i in range(0,1000):
  review = re.sub('[^a-zA-Z]', ' ', data['Review'][i])
  review = review.lower()
  review = review.split()
  ps = PorterStemmer()
  all_words = stopwords.words('english')
  all_words.remove('not')
  review = [ps.stem(word) for word in review if not word in set(all_words)]
  review = ' '.join(review)
  corpus.append(review)

In [None]:
print(corpus)

In [None]:
# bag of words model

cv = CountVectorizer(max_features=1500)
X = cv.fit_transform(corpus).toarray()

In [None]:
X

In [None]:
y = data.iloc[:,-1].values

In [None]:
y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 365)

In [None]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

In [None]:
y_pred = classifier.predict(X_test)

In [None]:
y_pred

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
confusion_matrix(y_test, y_pred)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [None]:
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.Dense(units=500, activation='relu'))
model.add(tf.keras.layers.Dense(units=500, activation='relu'))
model.add(tf.keras.layers.Dense(units=500, activation='relu'))
model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
R = model.fit(X_train, y_train, batch_size=128, epochs=100)

In [None]:
y_pred_2 = model.predict(X_test)

In [None]:
y_pred_2

In [None]:
prediction = model.predict(X_test)

In [None]:
prediction = (prediction>0.5)

In [None]:
prediction

In [None]:
def review(pred):
  for i in pred:
    if i>0.5:
      print('positive review')
    else:
      print('negative review')

In [None]:
review(prediction[6])

In [None]:
plt.figure(figsize=(16,6))

plt.subplot(1,2,1)
plt.plot(R.history['accuracy'],label='accuracy during model training',color='green')
plt.xlabel('Epochs')
plt.title('ACCURACY', fontweight='bold')
plt.legend()

plt.subplot(1,2,2)
plt.plot(R.history['loss'], label='loss during model training',color='r')
plt.xlabel('Epochs')
plt.title('LOSS',fontweight='bold')
plt.legend()

plt.tight_layout()

In [None]:
def review_system():
  rvs = input('please enter your review: ')
  rvs = re.sub('[^a-zA-Z]', ' ', rvs)
  rvs = rvs.lower()
  rvs = rvs.split()
  ps = PorterStemmer()
  all_words = stopwords.words('english')
  all_words.remove('not')
  rvs = [ps.stem(word) for word in rvs if not word in set(all_words)]
  rvs = ' '.join(rvs)
  new_corpus = [rvs]
  new_X_test = cv.transform(new_corpus).toarray()
  new_y_pred = model.predict(new_X_test)
  new_y_pred = (new_y_pred>0.5)
  for i in new_y_pred:
    if i>0.5:
      return'Positive Review'
    else:
      return 'Negative Review'
  print(new_y_pred)

In [None]:
review_system()

In [None]:
review_system()

In [None]:
review_system()

In [None]:
review_system()

In [None]:
model.save('Review_System.h5')

In [None]:
review_system()