In [None]:
from keras.models import Sequential
from keras.layers import Embedding, Dropout, Conv1D, MaxPool1D, LSTM, Dense, Activation
from keras.utils import pad_sequences, to_categorical
import numpy as np
import pandas as pd
from keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split


In [2]:
f = open("Video_Games.json")

df = pd.read_json(f, lines=True)
df = df[['overall', 'reviewText']]
df = df.dropna()
df = df.reset_index(drop=True)

df = np.array(df)

In [3]:
(y_train, y_test, x_train, x_test) = train_test_split(df[:, 0], df[:, 1], test_size=0.2, random_state=42)

In [4]:
x_train

array(["I'm a sucker for all these LED items by Razer. This is a killer addition. I get a lot of dropped jaws when people see the game room thanks to this. You can sync up the mouse, keyboard and mouse pad to loop the same color at the same time.",
       "I'm not about to pay 60$ for a pc game no matter how well it may sell.",
       "With one quick dash for cookies, the cable connection for this controller went out faster than most people's lame joke when eating Taco Bell.  It is ideal for chill adult but not quite right for kiddos.",
       ...,
       "Seems to be good quality and for $5 you can't go wrong.  My ps4 controller thumbsticks were wearing out so I bought these to protect them.  Fits well and hasn't fallen off yet.",
       "While good ol' Crash Bandicoot has expanded his horizons on the Playstation 2 and Xbox, Vicarious Visions seems to have decided to hold back for the marsupial's then-latest Game Boy Advance title, Crash Purple. On the surface, it seems like it would 

In [5]:
x_test

array(['my grandaughter love it.',
       "Finally, someone listened.  What with all the 3D hoopla and the console wars, someone out there hasn't forgotten the pleasures of 2D gaming.  Thank you, Konami, for Contra: Shattered Soldier.\nAfter two horrendous outings on the original PlayStation, one of the most beloved franchises in gaming history has returned with a vengeance...and yes, it's in 2D!  Old-schoolers rejoice - you've now got a reason to bring those long-forgotten razor-sharp reflexes out of the closet.  Contra: SS takes a few liberties with the old formula, but the result is a gorgeous, difficult throwback that will bring back all those memories.  Contra is back in a big way.  If you played, and enjoyed, any of the previous Contra games, you'll fall for Shattered Soldier big time.  Everything's here, and then some.\nFor those too young to remember: prepare yourself for a gaming experience unlike any other.  Contra: Shattered Soldier is a side-scrolling action shooter in whic

In [6]:
# Tokenize our stuff
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(x_train)
x_train = tokenizer.texts_to_sequences(x_train)
x_test = tokenizer.texts_to_sequences(x_test)


In [7]:
# padding
x_train = pad_sequences(x_train, maxlen=100)
x_test = pad_sequences(x_test, maxlen=100)

In [8]:
# one-hot encoding
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [9]:
len(y_train)

2050907

In [10]:
# model, compile, and train

model = Sequential()
model.add(Embedding(len(y_train), 100))
model.add(Dropout(0.5))
model.add(Conv1D(64, 5, padding='valid', activation='relu', strides=1))
model.add(MaxPool1D(pool_size=4))
model.add(LSTM(55))
model.add(Dense(6))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 100)         205090700 
                                                                 
 dropout (Dropout)           (None, None, 100)         0         
                                                                 
 conv1d (Conv1D)             (None, None, 64)          32064     
                                                                 
 max_pooling1d (MaxPooling1D  (None, None, 64)         0         
 )                                                               
                                                                 
 lstm (LSTM)                 (None, 55)                26400     
                                                                 
 dense (Dense)               (None, 6)                 336       
                                                        

In [None]:

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

history = model.fit(x_train, y_train, batch_size=100, epochs=10, validation_data=(x_test, y_test))


Epoch 1/10