In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tflearn

In [2]:
df = pd.read_csv('ign.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,score_phrase,title,url,platform,score,genre,editors_choice,release_year,release_month,release_day
0,0,Amazing,LittleBigPlanet PS Vita,/games/littlebigplanet-vita/vita-98907,PlayStation Vita,9.0,Platformer,Y,2012,9,12
1,1,Amazing,LittleBigPlanet PS Vita -- Marvel Super Hero E...,/games/littlebigplanet-ps-vita-marvel-super-he...,PlayStation Vita,9.0,Platformer,Y,2012,9,12
2,2,Great,Splice: Tree of Life,/games/splice/ipad-141070,iPad,8.5,Puzzle,N,2012,9,12
3,3,Great,NHL 13,/games/nhl-13/xbox-360-128182,Xbox 360,8.5,Sports,N,2012,9,11
4,4,Great,NHL 13,/games/nhl-13/ps3-128181,PlayStation 3,8.5,Sports,N,2012,9,11


In [3]:
df.score_phrase.unique()

array(['Amazing', 'Great', 'Good', 'Awful', 'Okay', 'Mediocre', 'Bad',
       'Painful', 'Unbearable', 'Disaster', 'Masterpiece'], dtype=object)

In [4]:
df=df.drop(['url','platform','release_year','release_month','release_day','editors_choice','genre','Unnamed: 0'], axis=1)
df.drop_duplicates('title',inplace=True)
df.head()

Unnamed: 0,score_phrase,title,score
0,Amazing,LittleBigPlanet PS Vita,9.0
1,Amazing,LittleBigPlanet PS Vita -- Marvel Super Hero E...,9.0
2,Great,Splice: Tree of Life,8.5
3,Great,NHL 13,8.5
5,Good,Total War Battles: Shogun,7.0


In [5]:
df.loc[:, ('positive')] = (df.score > 5.).astype(int)
df.loc[:, ('negative')] = (df.score <= 5.).astype(int)
df.head()

Unnamed: 0,score_phrase,title,score,positive,negative
0,Amazing,LittleBigPlanet PS Vita,9.0,1,0
1,Amazing,LittleBigPlanet PS Vita -- Marvel Super Hero E...,9.0,1,0
2,Great,Splice: Tree of Life,8.5,1,0
3,Great,NHL 13,8.5,1,0
5,Good,Total War Battles: Shogun,7.0,1,0


In [6]:
df[23:25]

Unnamed: 0,score_phrase,title,score,positive,negative
32,Bad,Hell Yeah! Wrath of the Dead Rabbit,4.9,0,1
33,Amazing,Pokemon White Version 2,9.6,1,0


In [7]:
from tflearn.data_utils import to_categorical, pad_sequences
from collections import Counter

total_counts = Counter()
for title in df.title:
    for word in title.split(" "):
        total_counts[word]  += 1

print("total words in data set: ", len(total_counts))

vocab = sorted(total_counts, key=total_counts.get, reverse=True)
print(vocab[-1], ': ', total_counts[vocab[-1]])

word2idx = { term:idx for idx, term in enumerate(list(vocab))}

total words in data set:  9924
UpWords :  1


In [8]:
def text_to_vector(text):
    retVal = np.zeros(len(vocab), dtype=np.int_)
    for idx, word in enumerate(text.split(" ")):
        if word in word2idx.keys():
            retVal[idx] = word2idx[word]
    return(retVal)

In [9]:
import sys

word_vectors = np.zeros((len(df), len(vocab)), dtype=np.int_)
for i, (_, text) in enumerate(df.iterrows()):
    word_vectors[i] = text_to_vector(text[1])
len(word_vectors)

12589

In [10]:
labels = df.loc[:, ['positive']]
len(labels)

12589

In [11]:
assert(len(word_vectors)==len(labels))
n_records = len(labels)
n_shuffle = np.arange(n_records)
np.random.shuffle(n_shuffle)
test_fraction = 0.9

n_train_split, n_test_split = n_shuffle[:int(n_records*test_fraction)], n_shuffle[int(n_records*test_fraction):]
trainX, trainY = word_vectors[n_train_split,:], to_categorical(labels.values[n_train_split], 2)
testX, testY = word_vectors[n_test_split,:], to_categorical(labels.values[n_test_split], 2)

In [12]:
trainY

array([[ 1.,  0.],
       [ 0.,  1.],
       [ 0.,  1.],
       ..., 
       [ 0.,  1.],
       [ 1.,  0.],
       [ 0.,  1.]])

In [13]:
# Network building
def build_model():
    # This resets all parameters and variables, leave this here
    tf.reset_default_graph()
    
    # layers
    net = tflearn.input_data([None, len(word_vectors[0])])
    net = tflearn.embedding(net, input_dim=len(word_vectors[0]), output_dim=128)
    net = tflearn.lstm(net, 128, dropout=0.8)
    net = tflearn.fully_connected(net, 2, activation='softmax')
        
    # training
    net = tflearn.regression(net, loss='categorical_crossentropy', optimizer='adam', learning_rate=0.001)
    
    model = tflearn.DNN(net, tensorboard_verbose=0)
    return model

In [14]:
model = build_model()

KeyboardInterrupt: 

In [15]:
model.fit(trainX, trainY, validation_set=0.1, show_metric=True, batch_size=32, n_epoch=4)

NameError: name 'model' is not defined

In [None]:
predictions = (np.array(model.predict(testX))[:,0] >= 0.5).astype(np.int_)
test_accuracy = np.mean(predictions == testY[:,0], axis=0)
print("Test accuracy: ", test_accuracy)

In [None]:
bad = "Double Dragon IV"
mediocre = "Divide"
amazing = "Nioh"
vec = text_to_vector("Wrath of the Dead Rabbit")
print(vec)
positive_prob = model.predict([vec])[0][1]
print('{} should be negative:\nP(positive) = {:.3f} :'.format("Wrath of the Dead Rabbit", positive_prob), 
      'Positive' if positive_prob > 0.5 else 'Negative')

positive_prob = model.predict([text_to_vector(mediocre)])[0][1]
print('{} should be positive:\nP(positive) = {:.3f} :'.format(mediocre, positive_prob), 
      'Positive' if positive_prob > 0.5 else 'Negative')

positive_prob = model.predict([text_to_vector(amazing)])[0][1]
print('{} should be positive:\nP(positive) = {:.3f} :'.format(amazing, positive_prob), 
      'Positive' if positive_prob > 0.5 else 'Negative')