# Amazon Food Reviews

In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.preprocessing.text as tfkpt
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
import csv
import json

In [2]:
food_df = pd.read_csv('./small_reviews.csv')

In [3]:
food_score = food_df[['Text','Score']].sort_values(by='Score')
food_score

Unnamed: 0,Text,Score
8582,"As coffee in general, this Wolfgang variety wa...",1
5024,"Looked like a good deal, so I ordered the grou...",1
2868,When I placed my order it was for Hartz Pigski...,1
5021,This product was a big disappointment. The pi...,1
2870,I emailed customer service and asked if the tr...,1
...,...,...
4115,This thing works instantly. You plug in your m...,5
4114,This people is a must have especially to take ...,5
4113,I use this with my Audio Technica condensor. W...,5
4108,This Icicle performs flawlessly with very good...,5


In [4]:
def create_sentiment(num):
    if num <= 2:
        return 0
    if num == 3:
        return 1
    else:
        return 2

In [5]:
food_score['sentiment'] = food_score['Score'].apply(create_sentiment)
food_score.sample(5)

Unnamed: 0,Text,Score,sentiment
9597,This is EXTREMELY better than Wilton!!! Buy t...,5,2
235,The taste of these white cheddar flat breads i...,2,0
2454,O. N. E. Coconut water is delicious and refres...,5,2
449,These chips have the right amount of crunch an...,5,2
2732,Popcorn has grear color (white) But taste is ...,2,0


In [6]:
def clean_commas(str):
    return str.replace(',', ';')

In [7]:
food_score['Text'] = food_score['Text'].apply(clean_commas)
food_score.sample(5)

Unnamed: 0,Text,Score,sentiment
5755,I got these 2 weeks ago; for the price i wasnt...,5,2
7072,After a lot of trial and error (with just wate...,5,2
7616,I ordered this and several other plants from t...,5,2
6486,very tasty and the perfect size snack. Reliab...,5,2
8897,t may be expresso grind; but if you are expect...,2,0


In [8]:
print("Num GPU's Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPU's Available:  0


In [9]:
text_df = pd.read_csv('./small_reviews.csv', encoding='latin-1')
text_training_df = text_df[['Score', 'Text']].dropna()
text_training_df.sample(5)

Unnamed: 0,Score,Text
9960,5,I add these Chia seeds to my Scottish oatmeal....
5832,5,Harney & Sons' Dragon Pearl Jasmine tea is one...
6344,1,I'm really disappointed with the changes in qu...
2881,5,one of the best choices so far. If you like b...
6425,5,My title was my reaction when I first opened t...


In [10]:
text_y = text_training_df.Score
print(text_y)

0       5
1       1
2       4
3       2
4       5
       ..
9994    5
9995    1
9996    5
9997    5
9998    5
Name: Score, Length: 9999, dtype: int64


In [11]:
text_x = text_training_df.Text
print(text_x)

0       I have bought several of the Vitality canned d...
1       Product arrived labeled as Jumbo Salted Peanut...
2       This is a confection that has been around a fe...
3       If you are looking for the secret ingredient i...
4       Great taffy at a great price.  There was a wid...
                              ...                        
9994    I switched from Similac Advanced to Organic wh...
9995    we switched from the advance similac to the or...
9996    Like the bad reviews say, the organic formula ...
9997    I wanted to solely breastfeed but was unable t...
9998    i love the fact that i can get this delieved t...
Name: Text, Length: 9999, dtype: object


In [12]:
text_training_df.describe()

Unnamed: 0,Score
count,9999.0
mean,4.134513
std,1.327238
min,1.0
25%,4.0
50%,5.0
75%,5.0
max,5.0


In [13]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_x)

In [14]:
text_x = tokenizer.texts_to_sequences(text_x)
text_x

19,
  426,
  211,
  6,
  113,
  4,
  37,
  1327,
  649,
  464],
 [2,
  1067,
  60,
  37,
  324,
  67,
  3957,
  3,
  411,
  5,
  91,
  6,
  2,
  20,
  1649,
  67,
  1,
  33,
  15,
  6,
  1014,
  58,
  5,
  31,
  4,
  186,
  87,
  55,
  2,
  3941,
  94,
  275,
  3473,
  139,
  1,
  178,
  154,
  3,
  734,
  351,
  6,
  127,
  81,
  70,
  1679,
  1,
  2851],
 [126,
  9,
  11,
  13,
  2119,
  11,
  67,
  110,
  201,
  121,
  53,
  2084,
  5,
  31,
  67,
  163,
  121,
  2059,
  49,
  9,
  64,
  141,
  121,
  20,
  67,
  751,
  618,
  103,
  135,
  3958,
  7,
  351,
  13,
  64,
  5196,
  2,
  126,
  331,
  1,
  64,
  26,
  3659,
  6437,
  66],
 [2,
  106,
  229,
  3957,
  64,
  12,
  4,
  253,
  1170,
  3,
  59,
  388,
  6,
  26,
  2,
  443,
  224,
  12,
  319,
  310,
  3,
  575,
  11,
  6,
  12,
  4,
  3372,
  1029,
  103,
  317,
  613,
  7,
  8933,
  2,
  810,
  1,
  1170,
  3,
  1291,
  85,
  23,
  122,
  5,
  86,
  1,
  64,
  2,
  20,
  771,
  8,
  20,
  8932,
  3957,
  1706,
  2,
  620

In [15]:
text_x = tokenizer.sequences_to_matrix(text_x)
text_x

array([[0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 1., 1., 1.]])

In [16]:
print(type(text_x))

<class 'numpy.ndarray'>


In [17]:
model = Sequential()
model.add(Dense(37, activation = 'relu'))
model.add(Dropout(0.2))
# model.add(Dense(419, activation = 'linear'))
# model.add(Dropout(0.3))
# model.add(Dense(281, activation = 'elu'))
# model.add(Dropout(0.4))
# model.add(Dense(167, activation = 'tanh'))
# model.add(Dropout(0.5))
# model.add(Dense(61, activation = 'softsign'))
# model.add(Dense(3, activation = 'softmax'))

In [18]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [19]:
text_y = keras.utils.to_categorical(text_y, 37)

In [20]:
model.fit(text_x, text_y,
batch_size=37,
epochs=7,
verbose=1,
validation_split=0.2,
shuffle=True)

Train on 7999 samples, validate on 2000 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<tensorflow.python.keras.callbacks.History at 0x7fc74c22d5c0>

In [21]:
food_model = model.to_json()
with open('food_model.json', 'w') as json_file:
    json_file.write(food_model)
model.save_weights('food_model.json.h5')