In [206]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.python.client import device_lib

from sklearn.feature_extraction.text import CountVectorizer

In [207]:
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 15903353417472668729
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 6242172928
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 9368971401068029883
 physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 2080, pci bus id: 0000:01:00.0, compute capability: 7.5"
 xla_global_id: 416903419]

In [208]:
def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_'+metric], '')
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_'+metric])


In [209]:
df = pd.read_csv('../Data/reviews_with_sentiment.csv')
df.head()

Unnamed: 0,overall,reviewText,asin,sentiment
0,5,I don't spend a lot on my flags because they r...,9539723809,0.9514
1,5,"Super fast processing and shipping, if you are...",9539723809,0.8748
2,5,Great product. I would recommend this product...,9539723809,0.8442
3,5,GREAT PRICE I LOVE MY STATE AND COUNTRY,9539723809,0.8519
4,5,Great display flag for the den.,9539723809,0.6249


In [210]:
train_text, test_text, train_y, test_y = train_test_split(df['reviewText'].values,df['overall'].values, test_size=0.25, random_state=1000)

In [211]:
train_y = to_categorical(train_y)
test_y = to_categorical(test_y)

train_y

array([[0., 0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0.],
       ...,
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 1.]], dtype=float32)

In [212]:
from keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train_text)

train_x = tokenizer.texts_to_sequences(train_text)
test_x = tokenizer.texts_to_sequences(test_text)

vocab_size = len(tokenizer.word_index) + 1

In [213]:
print(train_text[2])

print(train_x[2])

Arrived in a little box with three leaves. I repotted it and put in a sunny window do to wintertime here now. It has taken off producing a new leaf every week. Looks good and healthy. Happy with this purchase.
[273, 11, 5, 73, 215, 14, 292, 284, 3, 6, 2, 89, 11, 5, 2294, 675, 60, 4, 274, 91, 6, 59, 1299, 75, 3326, 5, 132, 831, 178, 314, 162, 30, 2, 766, 191, 14, 8, 226]


In [214]:
for word in ['testing', 'if', 'words', 'token']:
    print('{}: {}'.format(word, tokenizer.word_index[word]))

testing: 1794
if: 37
words: 2352
token: 37365


In [215]:
from keras.utils import pad_sequences

maxlen = 100

train_x = pad_sequences(train_x, padding='post', maxlen=maxlen)
test_x = pad_sequences(test_x, padding='post', maxlen=maxlen)

print(train_x[0, :])

[ 221   63  208   26  714   37   19  416  143   17   18  143    4  575
  868   11    5 2757  519    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0]


In [216]:
embedding_dim = 100

model = Sequential()
model.add(layers.Embedding(vocab_size, embedding_dim, input_length=maxlen))
model.add(layers.Conv1D(128, 5, activation='relu'))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(10, activation='relu'))
model.add(layers.Dense(6, activation='sigmoid'))
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()


Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (None, 100, 100)          11027800  
                                                                 
 conv1d_5 (Conv1D)           (None, 96, 128)           64128     
                                                                 
 global_max_pooling1d_3 (Glo  (None, 128)              0         
 balMaxPooling1D)                                                
                                                                 
 dense_23 (Dense)            (None, 10)                1290      
                                                                 
 dense_24 (Dense)            (None, 6)                 66        
                                                                 
Total params: 11,093,284
Trainable params: 11,093,284
Non-trainable params: 0
_________________________________________

In [217]:
history = model.fit(train_x, train_y,
                    epochs=10,
                    validation_data=(test_x, test_y),
                    batch_size=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training Accuracy: 0.8718
Testing Accuracy:  0.7283


NameError: name 'plot_history' is not defined

In [218]:
model.save("../Model/model.h5")