In [None]:
# Import req. lib

import pandas as pd
import numpy as np
import nltk
import re  # Remove unwanted char.

nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


# Text processing (example)

In [None]:
a = "Hii all!!!, I'm Hari Prabu. Welcome to AI Session"
a

"Hii all!!!, I'm Hari Prabu. Welcome to AI Session"

In [None]:
# Step 1 - Remove special char. with regular exp(re)

reg = re.sub('[^a-zA-Z]',' ',a)
reg

'Hii all     I m Hari Prabu  Welcome to AI Session'

In [None]:
# Step 2 - Convert entire text input into small letters

lw = reg.lower()
lw

'hii all     i m hari prabu  welcome to ai session'

In [None]:
# Step 3 - Split the input

wd = lw.split()
wd

['hii', 'all', 'i', 'm', 'hari', 'prabu', 'welcome', 'to', 'ai', 'session']

In [None]:
print(stopwords.words('english'))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

In [None]:
# Step 4 - Removing stopwords

sw = [w for w in wd if w not in set(stopwords.words('english'))]
sw

['hii', 'hari', 'prabu', 'welcome', 'ai', 'session']

In [None]:
# example for list comp.

l1 = [1,2,3]

for i in l1:
  print(i*20)

20
40
60


In [None]:
o = [i*20 for i in l1]
o

[20, 40, 60]

In [None]:
# Step 5 - Doing stemming on data(shortform of original values)

ps = PorterStemmer()
p = [ps.stem(w) for w in sw]
p

['hii', 'hari', 'prabu', 'welcom', 'ai', 'session']

In [None]:
# Step 6 - Joining the words

st = ' '.join(p)
st

'hii hari prabu welcom ai session'

In [None]:
# Step 7 - Convert text to numbers

from sklearn.feature_extraction.text import CountVectorizer

cv = CountVectorizer()
cv.fit_transform([st]).toarray()

array([[1, 1, 1, 1, 1, 1]])

# Model Building

In [None]:
df = pd.read_csv('/content/Restaurant_Reviews.tsv',delimiter='\t')
df

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1
...,...,...
995,I think food should have flavor and texture an...,0
996,Appetite instantly gone.,0
997,Overall I was not impressed and would not go b...,0
998,"The whole experience was underwhelming, and I ...",0


In [None]:
data = []

for i in range(0,1000):
  review = df['Review'][i] # Reading data
  review = re.sub('[^a-zA-Z]',' ',review) # Removing special char.
  review = review.lower() # Convert capital letters into small letters
  review = review.split() # Split the input
  review = [ps.stem(w) for w in review if w not in set(stopwords.words('english'))] # Stemming & Stopwords
  review = ' '.join(review) # join words
  data.append(review)

data

['wow love place',
 'crust good',
 'tasti textur nasti',
 'stop late may bank holiday rick steve recommend love',
 'select menu great price',
 'get angri want damn pho',
 'honeslti tast fresh',
 'potato like rubber could tell made ahead time kept warmer',
 'fri great',
 'great touch',
 'servic prompt',
 'would go back',
 'cashier care ever say still end wayyy overpr',
 'tri cape cod ravoli chicken cranberri mmmm',
 'disgust pretti sure human hair',
 'shock sign indic cash',
 'highli recommend',
 'waitress littl slow servic',
 'place worth time let alon vega',
 'like',
 'burritto blah',
 'food amaz',
 'servic also cute',
 'could care less interior beauti',
 'perform',
 'right red velvet cake ohhh stuff good',
 'never brought salad ask',
 'hole wall great mexican street taco friendli staff',
 'took hour get food tabl restaur food luke warm sever run around like total overwhelm',
 'worst salmon sashimi',
 'also combo like burger fri beer decent deal',
 'like final blow',
 'found place acc

In [None]:
x = cv.fit_transform(data).toarray()
x

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [None]:
y = df['Liked'].values
y

array([1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1,

In [None]:
# ANN Model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
# ANN Block

model = Sequential()
model.add(Dense(1500, activation='relu'))
model.add(Dense(3000, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
model.fit(x,y,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f65f5c88b10>

In [None]:
model.save('NLP.h5')

# Testing

In [None]:
# Test 1

text = 'Awesome food!!! I loved the food!!! I liked it'
print(text)
print('*'*50)
text = re.sub('[^a-zA-Z]',' ',text)
print(text)
print('*'*50)
text = text.lower()
print(text)
print('*'*50)
text = text.split()
print(text)
print('*'*50)
text = [ps.stem(w) for w in text if w not in set(stopwords.words('english'))]
print(text)
print('*'*50)
text = ' '.join(text)
print(text)
print('*'*50)
text = cv.transform([text]).toarray()
print(text)
print('*'*50)
pred = model.predict(text)
print(pred)
print('*'*50)
if pred>0.5:
  print('Possitive')
else: print('Negative')



Awesome food!!! I loved the food!!! I liked it
**************************************************
Awesome food    I loved the food    I liked it
**************************************************
awesome food    i loved the food    i liked it
**************************************************
['awesome', 'food', 'i', 'loved', 'the', 'food', 'i', 'liked', 'it']
**************************************************
['awesom', 'food', 'love', 'food', 'like']
**************************************************
awesom food love food like
**************************************************
[[0 0 0 ... 0 0 0]]
**************************************************
[[0.99999076]]
**************************************************
Possitive


In [None]:
# Test 1

text = "Worst food. Waste of money. Don't visit again"
print(text)
print('*'*50)
text = re.sub('[^a-zA-Z]',' ',text)
print(text)
print('*'*50)
text = text.lower()
print(text)
print('*'*50)
text = text.split()
print(text)
print('*'*50)
text = [ps.stem(w) for w in text if w not in set(stopwords.words('english'))]
print(text)
print('*'*50)
text = ' '.join(text)
print(text)
print('*'*50)
text = cv.transform([text]).toarray()
print(text)
print('*'*50)
pred = model.predict(text)
print(pred)
print('*'*50)
if pred>0.5:
  print('Possitive')
else: print('Negative')



Worst food. Waste of money. Don't visit again
**************************************************
Worst food  Waste of money  Don t visit again
**************************************************
worst food  waste of money  don t visit again
**************************************************
['worst', 'food', 'waste', 'of', 'money', 'don', 't', 'visit', 'again']
**************************************************
['worst', 'food', 'wast', 'money', 'visit']
**************************************************
worst food wast money visit
**************************************************
[[0 0 0 ... 0 0 0]]
**************************************************
[[1.0693399e-07]]
**************************************************
Negative
