## NLP on Restaurant reviews

![Restaurant review](restaurant.jpg)

In [29]:
import numpy as np
import pandas as pd

dataset=pd.read_csv("Restaurant_reviews.tsv",sep="\t",quoting=3)
dataset.head(30)

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1
5,Now I am getting angry and I want my damn pho.,0
6,Honeslty it didn't taste THAT fresh.),0
7,The potatoes were like rubber and you could te...,0
8,The fries were great too.,1
9,A great touch.,1


In [30]:
import re
import nltk

## Download stopwords

In [2]:
nltk.download("stopwords")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Shahid\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
review=re.sub("[^a-zA-Z]"," ",dataset["Review"][0])
review

'Wow    Loved this place '

In [4]:
review=review.split()
review

['Wow', 'Loved', 'this', 'place']

## remove unwanted words using nltk

In [32]:
from nltk.corpus import stopwords
review=[word for word in review if word not in set(stopwords.words('english'))]
review

['wow', 'loved', 'place']

In [33]:
from nltk.stem.porter import PorterStemmer
ps=PorterStemmer()

### remove affixes in words

In [7]:
review=[ps.stem(word) for word in review if word not in set(stopwords.words('english'))]
review=' '.join(review)
review

'wow love place'

## working in entire Dataset

In [34]:
corpus=[]
for i in range(0,1000):
    review=re.sub("[^a-zA-Z]"," ",dataset["Review"][i])
    review=review.lower()
    review=review.split()
    review=[ps.stem(word) for word in review if word not in set(stopwords.words('english'))]
    corpus.append(' '.join(review))

In [35]:
corpus

['wow love place',
 'crust good',
 'tasti textur nasti',
 'stop late may bank holiday rick steve recommend love',
 'select menu great price',
 'get angri want damn pho',
 'honeslti tast fresh',
 'potato like rubber could tell made ahead time kept warmer',
 'fri great',
 'great touch',
 'servic prompt',
 'would go back',
 'cashier care ever say still end wayyy overpr',
 'tri cape cod ravoli chicken cranberri mmmm',
 'disgust pretti sure human hair',
 'shock sign indic cash',
 'highli recommend',
 'waitress littl slow servic',
 'place worth time let alon vega',
 'like',
 'burritto blah',
 'food amaz',
 'servic also cute',
 'could care less interior beauti',
 'perform',
 'right red velvet cake ohhh stuff good',
 'never brought salad ask',
 'hole wall great mexican street taco friendli staff',
 'took hour get food tabl restaur food luke warm sever run around like total overwhelm',
 'worst salmon sashimi',
 'also combo like burger fri beer decent deal',
 'like final blow',
 'found place acc

### creating a bag of words model

In [36]:
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer(max_features=1500)
x=cv.fit_transform(corpus).toarray()
x.shape

(1000, 1500)

In [37]:
#dependent variable
y=dataset.iloc[:,1].values

In [None]:
# using RandomForest Classifier

In [36]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=0)

In [47]:
classifier=RandomForestClassifier(n_estimators=70)
classifier.fit(xtrain,ytrain)
ypred=classifier.predict(xtest)

In [48]:
from sklearn.metrics import confusion_matrix,accuracy_score
result=confusion_matrix(ytest,ypred)
print(accuracy_score(ytest,ypred))
print(result)

0.705
[[85 12]
 [47 56]]


## using hyperParameter Optimizer

In [1]:
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
import re

In [2]:
dataset=pd.read_csv("Restaurant_Reviews.tsv",sep="\t",quoting=3)
dataset.shape

(1000, 2)

In [3]:
corpus=[]
ps=PorterStemmer()
for i in range(0,1000):
    review=re.sub("[^a-zA-Z]"," ",dataset["Review"][i])
    review=review.lower()
    review=review.split()
    review=[ps.stem(word) for word in review if word not in set(stopwords.words('english'))]
    corpus.append(' '.join(review))

In [4]:
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer(max_features=1500)
x=cv.fit_transform(corpus).toarray()

In [5]:
x.shape

(1000, 1500)

In [6]:
y=dataset.iloc[:,1].values

In [38]:
n_estimators=[int(x) for x in np.linspace(start=100,stop=1200,num=12)]
max_features=['auto','sqrt']
max_depth=[int(y) for y in np.linspace(start=5,stop=30,num=6)]
min_sample_split=[2,5,10,15,20,100]
min_sample_leaf=[1,2,5,10]

r_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_sample_split,
               'min_samples_leaf': min_sample_leaf }

In [39]:
from sklearn.ensemble import RandomForestClassifier
classifier_rf=RandomForestClassifier()
from sklearn.model_selection import RandomizedSearchCV
regressor_rs=RandomizedSearchCV(estimator=classifier_rf,param_distributions=r_grid,scoring='neg_mean_squared_error',cv=5,n_iter=10,random_state=0,verbose=2,n_jobs=-1)
regressor_rs.fit(x,y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:  1.7min finished


RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
               

In [40]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=0)

In [41]:
ypred=regressor_rs.predict(xtest)

In [42]:
from sklearn.metrics import confusion_matrix,accuracy_score
result=confusion_matrix(ytest,ypred)
print(accuracy_score(ytest,ypred))
print(result)

0.875
[[94  3]
 [22 81]]


## using naivebayes

In [46]:
import re
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

dataset=pd.read_csv("Restaurant_reviews.tsv",sep="\t",quoting=3)
corpus=[]
ps=PorterStemmer()
for i in range(0,1000):
    review=re.sub("[^a-z A-Z]"," ",dataset["Review"][i])
    review=review.lower()
    review=review.split()
    review=[ps.stem(word) for word in review if word not in set(stopwords.words("english"))]
    corpus.append(" ".join(review))
corpus

['wow love place',
 'crust good',
 'tasti textur nasti',
 'stop late may bank holiday rick steve recommend love',
 'select menu great price',
 'get angri want damn pho',
 'honeslti tast fresh',
 'potato like rubber could tell made ahead time kept warmer',
 'fri great',
 'great touch',
 'servic prompt',
 'would go back',
 'cashier care ever say still end wayyy overpr',
 'tri cape cod ravoli chicken cranberri mmmm',
 'disgust pretti sure human hair',
 'shock sign indic cash',
 'highli recommend',
 'waitress littl slow servic',
 'place worth time let alon vega',
 'like',
 'burritto blah',
 'food amaz',
 'servic also cute',
 'could care less interior beauti',
 'perform',
 'right red velvet cake ohhh stuff good',
 'never brought salad ask',
 'hole wall great mexican street taco friendli staff',
 'took hour get food tabl restaur food luke warm sever run around like total overwhelm',
 'worst salmon sashimi',
 'also combo like burger fri beer decent deal',
 'like final blow',
 'found place acc

In [47]:
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer(max_features=1500)
x=cv.fit_transform(corpus).toarray()
x.shape

(1000, 1500)

In [7]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,dataset.iloc[:,-1],test_size=0.1,random_state=0)

In [43]:
from sklearn.naive_bayes import GaussianNB
classifier=GaussianNB()
classifier.fit(xtrain,ytrain)

GaussianNB(priors=None, var_smoothing=1e-09)

In [44]:
ypred=classifier.predict(xtest)

from sklearn.metrics import confusion_matrix,accuracy_score
print(confusion_matrix(ytest,ypred))
print("accuracy is : ",accuracy_score(ytest,ypred)*100)

[[55 42]
 [12 91]]
accuracy is :  73.0


In [32]:
x.shape

(1000, 1500)

# using ANN

In [45]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

classifier=Sequential()
classifier.add(Dense(units=750,kernel_initializer='uniform',activation='relu',input_dim=1500))
classifier.add(Dense(units=750,kernel_initializer='uniform',activation='relu'))
classifier.add(Dense(units=1,kernel_initializer='uniform',activation='sigmoid'))

In [46]:
classifier.compile(optimizer="adam",loss="binary_crossentropy",metrics=['accuracy'])

In [49]:
classifier.fit(xtrain,ytrain,batch_size=10,epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1c10098e448>

In [50]:
ypred=classifier.predict(xtest)

from sklearn.metrics import confusion_matrix,accuracy_score
print(confusion_matrix(ytest,(ypred>0.5)))
print("accuracy is : ",accuracy_score(ytest,(ypred>0.5))*100)

[[ 97   0]
 [  0 103]]
accuracy is :  100.0


In [66]:
def feedback(review):
    review=re.sub("[^a-z A-Z]"," ",review)
    review=review.lower()
    review=review.split()
    review=[ps.stem(word) for word in review if word not in set(stopwords.words("english"))]
    review=" ".join(review)
    review=cv.transform([review]).toarray()
    return classifier.predict(review)
    

In [72]:
review=input("Feedback : ")
result=feedback(review)
if result<=0.5:
    print("Negative")
else:
    print("Positive")

Feedback : it was delicious
Positive


In [1]:
import nltk
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml


True

In [2]:
paragraph = """I have three visions for India. In 3000 years of our history, people from all over 
               the world have come and invaded us, captured our lands, conquered our minds. 
               From Alexander onwards, the Greeks, the Turks, the Moguls, the Portuguese, the British,
               the French, the Dutch, all of them came and looted us, took over what was ours. 
               Yet we have not done this to any other nation. We have not conquered anyone. 
               We have not grabbed their land, their culture, 
               their history and tried to enforce our way of life on them. 
               Why? Because we respect the freedom of others.That is why my 
               first vision is that of freedom. I believe that India got its first vision of 
               this in 1857, when we started the War of Independence. It is this freedom that
               we must protect and nurture and build on. If we are not free, no one will respect us.
               My second vision for India’s development. For fifty years we have been a developing nation.
               It is time we see ourselves as a developed nation. We are among the top 5 nations of the world
               in terms of GDP. We have a 10 percent growth rate in most areas. Our poverty levels are falling.
               Our achievements are being globally recognised today. Yet we lack the self-confidence to
               see ourselves as a developed nation, self-reliant and self-assured. Isn’t this incorrect?
               I have a third vision. India must stand up to the world. Because I believe that unless India 
               stands up to the world, no one will respect us. Only strength respects strength. We must be 
               strong not only as a military power but also as an economic power. Both must go hand-in-hand. 
               My good fortune was to have worked with three great minds. Dr. Vikram Sarabhai of the Dept. of 
               space, Professor Satish Dhawan, who succeeded him and Dr. Brahm Prakash, father of nuclear material.
               I was lucky to have worked with all three of them closely and consider this the great opportunity of my life. 
               I see four milestones in my career"""

In [7]:
sentence=nltk.sent_tokenize(paragraph)

In [11]:
sentence[0]

'I have three visions for India.'

In [12]:
words=nltk.word_tokenize(paragraph)

In [15]:
words[1]

'have'

In [16]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

In [17]:
sentences=nltk.sent_tokenize(paragraph)
ps=PorterStemmer()
for i in range(len(sentences)):
    words=nltk.word_tokenize(sentences[i])
    words=[ps.stem(w) for w in words if w not in set(stopwords.words("english"))]
    sentences[i]=" ".join(words)

In [18]:
sentences

['I three vision india .',
 'In 3000 year histori , peopl world come invad us , captur land , conquer mind .',
 'from alexand onward , greek , turk , mogul , portugues , british , french , dutch , came loot us , took .',
 'yet done nation .',
 'We conquer anyon .',
 'We grab land , cultur , histori tri enforc way life .',
 'whi ?',
 'becaus respect freedom others.that first vision freedom .',
 'I believ india got first vision 1857 , start war independ .',
 'It freedom must protect nurtur build .',
 'If free , one respect us .',
 'My second vision india ’ develop .',
 'for fifti year develop nation .',
 'It time see develop nation .',
 'We among top 5 nation world term gdp .',
 'We 10 percent growth rate area .',
 'our poverti level fall .',
 'our achiev global recognis today .',
 'yet lack self-confid see develop nation , self-reli self-assur .',
 'isn ’ incorrect ?',
 'I third vision .',
 'india must stand world .',
 'becaus I believ unless india stand world , one respect us .',
 'onl

In [2]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re

In [3]:
paragraph = """I have three visions for India. In 3000 years of our history, people from all over 
               the world have come and invaded us, captured our lands, conquered our minds. 
               From Alexander onwards, the Greeks, the Turks, the Moguls, the Portuguese, the British,
               the French, the Dutch, all of them came and looted us, took over what was ours. 
               Yet we have not done this to any other nation. We have not conquered anyone. 
               We have not grabbed their land, their culture, 
               their history and tried to enforce our way of life on them. 
               Why? Because we respect the freedom of others.That is why my 
               first vision is that of freedom. I believe that India got its first vision of 
               this in 1857, when we started the War of Independence. It is this freedom that
               we must protect and nurture and build on. If we are not free, no one will respect us.
               My second vision for India’s development. For fifty years we have been a developing nation.
               It is time we see ourselves as a developed nation. We are among the top 5 nations of the world
               in terms of GDP. We have a 10 percent growth rate in most areas. Our poverty levels are falling.
               Our achievements are being globally recognised today. Yet we lack the self-confidence to
               see ourselves as a developed nation, self-reliant and self-assured. Isn’t this incorrect?
               I have a third vision. India must stand up to the world. Because I believe that unless India 
               stands up to the world, no one will respect us. Only strength respects strength. We must be 
               strong not only as a military power but also as an economic power. Both must go hand-in-hand. 
               My good fortune was to have worked with three great minds. Dr. Vikram Sarabhai of the Dept. of 
               space, Professor Satish Dhawan, who succeeded him and Dr. Brahm Prakash, father of nuclear material.
               I was lucky to have worked with all three of them closely and consider this the great opportunity of my life. 
               I see four milestones in my career"""

In [18]:
sentences=nltk.sent_tokenize(paragraph)

for i in range(len(sentences)):
    review=re.sub("[^a-zA-Z,.]"," ",sentences[i])
    review=review.lower()
    review=review.split()
    sentences[i]=" ".join(review)

In [22]:
sentences

[['three', 'visions', 'india', '.'],
 ['years',
  'history',
  ',',
  'people',
  'world',
  'come',
  'invaded',
  'us',
  ',',
  'captured',
  'lands',
  ',',
  'conquered',
  'minds',
  '.'],
 ['alexander',
  'onwards',
  ',',
  'greeks',
  ',',
  'turks',
  ',',
  'moguls',
  ',',
  'portuguese',
  ',',
  'british',
  ',',
  'french',
  ',',
  'dutch',
  ',',
  'came',
  'looted',
  'us',
  ',',
  'took',
  '.'],
 ['yet', 'done', 'nation', '.'],
 ['conquered', 'anyone', '.'],
 ['grabbed',
  'land',
  ',',
  'culture',
  ',',
  'history',
  'tried',
  'enforce',
  'way',
  'life',
  '.'],
 [],
 ['respect', 'freedom', 'others.that', 'first', 'vision', 'freedom', '.'],
 ['believe',
  'india',
  'got',
  'first',
  'vision',
  ',',
  'started',
  'war',
  'independence',
  '.'],
 ['freedom', 'must', 'protect', 'nurture', 'build', '.'],
 ['free', ',', 'one', 'respect', 'us', '.'],
 ['second', 'vision', 'india', 'development', '.'],
 ['fifty', 'years', 'developing', 'nation', '.'],
 ['ti

In [20]:
sentences=[nltk.word_tokenize(word) for word in sentences]

In [21]:
for i in range(len(sentences)):
    sentences[i]=[word for word in sentences[i] if word not in set(stopwords.words("english"))]

In [23]:
from gensim.models import Word2Vec
model=Word2Vec(sentences,min_count=1)

In [24]:
words=model.wv.vocab

In [25]:
model.wv['war']

array([-3.8100462e-03, -3.0845234e-03,  9.3570235e-04,  1.2737379e-03,
       -4.0533845e-03,  1.6059059e-03, -4.0467759e-03, -2.3770980e-04,
        1.0840826e-03,  4.7318637e-03, -2.6343034e-03,  1.5425561e-03,
        4.6325428e-03, -3.6115965e-03,  1.4533793e-03, -1.1521612e-03,
       -4.6320449e-04,  9.9555030e-04, -4.2062341e-03, -3.9379974e-03,
        1.9505057e-04, -8.1896764e-04,  2.0280373e-04, -1.9731440e-03,
       -1.8548113e-03,  1.2761204e-03,  2.8169623e-03, -2.3872154e-03,
       -3.2487290e-04,  4.2594438e-03, -2.8418771e-03,  2.0576376e-03,
       -3.4394523e-03,  9.3026587e-04, -3.8727913e-03, -4.6790168e-03,
        2.3955980e-03,  1.1807533e-03, -5.0041652e-03, -3.2355124e-03,
       -2.5110785e-03, -1.4020343e-03, -1.7846043e-03, -3.5854157e-03,
       -2.0635054e-03, -3.0428830e-03, -1.0415142e-03, -4.9876249e-03,
       -8.2191062e-04, -2.5601939e-03,  2.2413002e-03,  3.5288755e-03,
        4.2098225e-03,  8.6495973e-04, -4.2858813e-03,  1.2016684e-03,
      

In [27]:
model.wv.most_similar('war')

[('succeeded', 0.285161554813385),
 ('greeks', 0.21379354596138),
 ('lands', 0.19154804944992065),
 ('self', 0.1893511563539505),
 ('recognised', 0.18688777089118958),
 ('minds', 0.1801367700099945),
 ('build', 0.16661223769187927),
 ('grabbed', 0.16048914194107056),
 ('also', 0.12853600084781647),
 ('freedom', 0.12660248577594757)]

In [14]:
file=open("HelloWorld.java","r+")

In [15]:
text=file.readlines()

In [16]:
text

['public class HelloWorld\n',
 '{\n',
 '    int x, p,q,hr;\n',
 '    public static void main(String []args)\n',
 '    {\n',
 '        int myNum = 5;\n',
 '    int myFloatNum = 5.99f;         \n',
 "    char myLetter = 'D';\n",
 '    boolean myBool = true;\n',
 '    String myText = "Hello";    \n',
 '    System.out.println(myNum);\n',
 '    System.out.println(myFloatNum);\n',
 '    System.out.println(myLetter);\n',
 '    System.out.println(myBool);\n',
 '    System.out.println(myText);\n',
 '    }\n',
 '    int i,q;\n',
 '}']

In [19]:
noOfLines=0
string=''''''
for content in text:
        noOfLines+=1
        string+=content

In [20]:
string

'public class HelloWorld\n{\n    int x, p,q,hr;\n    public static void main(String []args)\n    {\n        int myNum = 5;\n    int myFloatNum = 5.99f;         \n    char myLetter = \'D\';\n    boolean myBool = true;\n    String myText = "Hello";    \n    System.out.println(myNum);\n    System.out.println(myFloatNum);\n    System.out.println(myLetter);\n    System.out.println(myBool);\n    System.out.println(myText);\n    }\n    int i,q;\n}'

In [21]:
fptr=open("temp.txt","w+")
fptr.write(string)

424

In [1]:
import pandas as pd
import numpy as np
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from nltk.stem import WordNetLemmatizer
import re

In [2]:
dataset=pd.read_csv("Restaurant_reviews.tsv",sep="\t",quoting=3)
dataset.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [3]:
x=dataset.iloc[:,:-1]
y=dataset.iloc[:,-1]
x.head()

Unnamed: 0,Review
0,Wow... Loved this place.
1,Crust is not good.
2,Not tasty and the texture was just nasty.
3,Stopped by during the late May bank holiday of...
4,The selection on the menu was great and so wer...


In [4]:
len(x)

1000

In [5]:
x.iloc[0]

Review    Wow... Loved this place.
Name: 0, dtype: object

In [4]:
corpus=[]
lm=WordNetLemmatizer()
for i in range(len(x)):
    review=re.sub("[^a-zA-Z]"," ",dataset["Review"][i])
    review=review.lower()
    review=review.split()
    review=[lm.lemmatize(word) for word in review if word not in stopwords.words("english")]
    review=" ".join(review)
    corpus.append(review)

In [5]:
corpus

['wow loved place',
 'crust good',
 'tasty texture nasty',
 'stopped late may bank holiday rick steve recommendation loved',
 'selection menu great price',
 'getting angry want damn pho',
 'honeslty taste fresh',
 'potato like rubber could tell made ahead time kept warmer',
 'fry great',
 'great touch',
 'service prompt',
 'would go back',
 'cashier care ever say still ended wayyy overpriced',
 'tried cape cod ravoli chicken cranberry mmmm',
 'disgusted pretty sure human hair',
 'shocked sign indicate cash',
 'highly recommended',
 'waitress little slow service',
 'place worth time let alone vega',
 'like',
 'burrittos blah',
 'food amazing',
 'service also cute',
 'could care le interior beautiful',
 'performed',
 'right red velvet cake ohhh stuff good',
 'never brought salad asked',
 'hole wall great mexican street taco friendly staff',
 'took hour get food table restaurant food luke warm sever running around like totally overwhelmed',
 'worst salmon sashimi',
 'also combo like burge

In [8]:
len(corpus)

1000

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf=TfidfVectorizer(max_features=1500)
x=tfidf.fit_transform(corpus).toarray()

In [10]:
x.shape

(1000, 1500)

In [7]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=0)

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

classifier=Sequential()
classifier.add(Dense(units=750,kernel_initializer="he_uniform",activation="relu",input_dim=1500))
classifier.add(Dense(units=750,kernel_initializer="he_uniform",activation="relu"))
classifier.add(Dense(units=1,kernel_initializer="he_uniform",activation="sigmoid"))
classifier.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])

In [13]:
classifier.fit(xtrain,ytrain,batch_size=10,epochs=30,validation_data=(xtest,ytest))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x22e93007848>

In [14]:
ypred=classifier.predict(xtest)

from sklearn.metrics import confusion_matrix,accuracy_score
print(confusion_matrix(ytest,(ypred>0.5)))
print("accuracy is : ",accuracy_score(ytest,(ypred>0.5))*100)

[[77 20]
 [34 69]]
accuracy is :  73.0


In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from kerastuner import RandomSearch
import tensorflow

def built_model(hp):
    classifier=Sequential()
    for i in range(hp.Int("num_layers",2,50)):
        classifier.add(Dense(units=hp.Int("units"+str(i),min_value=500,max_value=1500,step=32),
                             kernel_initializer="he_uniform",
                            activation="relu"))
    classifier.add(Dense(1,activation="sigmoid"))
    classifier.compile(optimizer=tensorflow.keras.optimizers.Adam(hp.Choice('Learning_rate',[1e-2, 1e-3])),
                      loss="binary_crossentropy",
                      metrics=["accuracy"])
    return classifier

In [10]:
model=RandomSearch(built_model,
                  objective="val_accuracy",
                  max_trials=10,
                  directory='nlp_model',
                project_name='restaurant')

INFO:tensorflow:Reloading Oracle from existing project nlp_model\restaurant\oracle.json
INFO:tensorflow:Reloading Tuner from nlp_model\restaurant\tuner0.json


In [11]:
model.search(xtrain,ytrain,epochs=20,validation_data=(xtest,ytest))

Trial 1 Complete [00h 05m 10s]
val_accuracy: 0.5149999856948853

Best val_accuracy So Far: 0.8050000071525574
Total elapsed time: 00h 05m 10s
INFO:tensorflow:Oracle triggered exit


In [12]:
best_hps=model.get_best_hyperparameters(num_trials=1)[0]

In [13]:
best_model=model.get_best_models(num_models=1)[0]

In [14]:
best_model.fit(xtrain,ytrain,batch_size=25,
            epochs=20,
            validation_data=(xtest,ytest))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1ee80509b48>

In [15]:
ypred=best_model.predict(xtest)

from sklearn.metrics import confusion_matrix,accuracy_score
print(confusion_matrix(ytest,(ypred>0.5)))
print("accuracy is : ",accuracy_score(ytest,(ypred>0.5))*100)

[[ 97   0]
 [103   0]]
accuracy is :  48.5


In [None]:
score = model.evaluate(input_test, target_test, verbose=0)
print(f'Test loss: {score[0]} / Test accuracy: {score[1]}')