# Sentimental Analysis for Game reviews

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.keras import models, layers, optimizers
from tensorflow.keras.preprocessing.text import Tokenizer, text_to_word_sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences
import re

%matplotlib inline
import os

# Dataset review

In [2]:
train = pd.read_csv('train.csv')

In [3]:
test = pd.read_csv('test.csv')

In [5]:
game = pd.read_csv('game_overview.csv')

In [6]:
train.head()

Unnamed: 0,review_id,title,year,user_review,user_suggestion
0,1,Spooky's Jump Scare Mansion,2016.0,I'm scared and hearing creepy voices. So I'll...,1
1,2,Spooky's Jump Scare Mansion,2016.0,"Best game, more better than Sam Pepper's YouTu...",1
2,3,Spooky's Jump Scare Mansion,2016.0,"A littly iffy on the controls, but once you kn...",1
3,4,Spooky's Jump Scare Mansion,2015.0,"Great game, fun and colorful and all that.A si...",1
4,5,Spooky's Jump Scare Mansion,2015.0,Not many games have the cute tag right next to...,1


# Text Preprocessing

In [7]:
punc = re.compile(r'[\W]')
asci = re.compile(r'[^a-z0-1\s]')

In [8]:
def cleaning(message):
    clean_reviews = []
    for i in message:
        lower = i.lower()
        no_punc = punc.sub(r' ', lower)
        no_non_ascii = asci.sub(r'', no_punc)
        clean_reviews.append(no_non_ascii)
    return clean_reviews

In [9]:
train['clean_review'] = cleaning(train['user_review'])
test['clean_review'] = cleaning(test['user_review'])

Using Tokenizer

In [10]:
MAX_FEATURES = 90000
tokenizer = Tokenizer(num_words=MAX_FEATURES)
tokenizer.fit_on_texts(train['clean_review'])
train_texts = tokenizer.texts_to_sequences(train['clean_review'])
test_texts = tokenizer.texts_to_sequences(test['clean_review'])

# Padding Sequences

In [11]:
MAX_LENGTH = max(len(i) for i in train_texts)
train_texts = pad_sequences(train_texts, maxlen=MAX_LENGTH)
test_texts = pad_sequences(test_texts, maxlen=MAX_LENGTH)

# Model formation

In [118]:
def Model():
    sequences = layers.Input(shape=(2330,))
    embedded = layers.Embedding(MAX_FEATURES, 64)(sequences)
    x = layers.Conv1D(64, 3, activation='relu')(embedded)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool1D(3)(x)
    x = layers.Conv1D(64, 5, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool1D(5)(x)
    x = layers.Conv1D(64, 5, activation='relu')(x)
    x = layers.GlobalMaxPool1D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(100, activation='relu')(x)
    predictions = layers.Dense(1, activation='sigmoid')(x)
    model = models.Model(inputs=sequences, outputs=predictions)
    model.compile(
        optimizer='rmsprop',
        loss='binary_crossentropy',
        metrics=['binary_accuracy']
    )
    return model
    
model1 = Model()

In [20]:
X_train = train_texts
Y_train = train['user_suggestion']
X_test = test_texts

# Fitting

In [116]:
fit = model1.fit(x_train1 , Y_train , epochs = 5 , verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [119]:
temp = model.predict(x_test1)

In [120]:
Predictions = []
for i in temp:
    if i >=0.5:
        Predictions.append(1)
    else:
        Predictions.append(0)

In [24]:
ID = test['review_id']

In [121]:
Predictions = np.array(Predictions)

In [122]:
Solution = pd.DataFrame({'review_id':ID , 'user_suggestion':Predictions })

In [123]:
Solution.to_csv('Sol3.csv' , index = False)

Accuracy - 84.3 %

# Merging the game reviews

In [34]:
train1 = train.merge(game , how = 'left' , on = 'title')

In [35]:
train1.head()

Unnamed: 0,review_id,title,year,user_review,user_suggestion,clean_review,developer,publisher,tags,overview
0,1,Spooky's Jump Scare Mansion,2016.0,I'm scared and hearing creepy voices. So I'll...,1,i m scared and hearing creepy voices so i ll...,Lag Studios,Lag Studios,"['Horror', 'Free to Play', 'Cute', 'First-Pers...",Can you survive 1000 rooms of cute terror? Or ...
1,2,Spooky's Jump Scare Mansion,2016.0,"Best game, more better than Sam Pepper's YouTu...",1,best game more better than sam pepper s youtu...,Lag Studios,Lag Studios,"['Horror', 'Free to Play', 'Cute', 'First-Pers...",Can you survive 1000 rooms of cute terror? Or ...
2,3,Spooky's Jump Scare Mansion,2016.0,"A littly iffy on the controls, but once you kn...",1,a littly iffy on the controls but once you kn...,Lag Studios,Lag Studios,"['Horror', 'Free to Play', 'Cute', 'First-Pers...",Can you survive 1000 rooms of cute terror? Or ...
3,4,Spooky's Jump Scare Mansion,2015.0,"Great game, fun and colorful and all that.A si...",1,great game fun and colorful and all that a si...,Lag Studios,Lag Studios,"['Horror', 'Free to Play', 'Cute', 'First-Pers...",Can you survive 1000 rooms of cute terror? Or ...
4,5,Spooky's Jump Scare Mansion,2015.0,Not many games have the cute tag right next to...,1,not many games have the cute tag right next to...,Lag Studios,Lag Studios,"['Horror', 'Free to Play', 'Cute', 'First-Pers...",Can you survive 1000 rooms of cute terror? Or ...


In [36]:
train1.describe(include = 'all')

Unnamed: 0,review_id,title,year,user_review,user_suggestion,clean_review,developer,publisher,tags,overview
count,17494.0,17494,17316.0,17494,17494.0,17494,17494,17494,17494,17494.0
unique,,44,,17490,,17468,42,40,44,43.0
top,,Robocraft,,#NAME?,,name,Valve,Bethesda Softworks,"['Free to Play', 'Robots', 'Building', 'Multip...",
freq,,842,,5,,5,884,1012,842,965.0
mean,12423.216989,,2016.388427,,0.569795,,,,,
std,7653.36766,,1.390356,,0.495119,,,,,
min,1.0,,2011.0,,0.0,,,,,
25%,5769.25,,2015.0,,0.0,,,,,
50%,11756.5,,2017.0,,1.0,,,,,
75%,18926.75,,2018.0,,1.0,,,,,


In [39]:
train_texts[:5]

array([[    0,     0,     0, ...,     4,  1127,  1834],
       [    0,     0,     0, ...,  1210,     8,   285],
       [    0,     0,     0, ...,    36,  3282, 22277],
       [    0,     0,     0, ...,    38,  2509,  4505],
       [    0,     0,     0, ...,     1,     5,   977]])

# Cleaning , Tokenizing and Padding

In [41]:
train1['clean_tags'] = cleaning(train1['tags'])

In [42]:
test1 = test.merge(game , how = 'left' , on = 'title')
test1['clean_tags'] = cleaning(test1['tags'])

In [46]:
train1['clean_overview'] = cleaning(train1['overview'])
test1['clean_overview'] = cleaning(test1['overview'])

In [52]:
MAX_FEATURES_2 = 90000
tokenizer2 = Tokenizer(num_words=MAX_FEATURES)
tokenizer2.fit_on_texts(train1['clean_tags'])
train_tags = tokenizer2.texts_to_sequences(train1['clean_tags'])
test_tags = tokenizer2.texts_to_sequences(test1['clean_tags'])
tokenizer2.fit_on_texts(train1['clean_overview'])
train_over = tokenizer2.texts_to_sequences(train1['clean_overview'])
test_over = tokenizer2.texts_to_sequences(test1['clean_overview'])

In [56]:
MAX_LENGTH_2 = max(len(i) for i in train_tags)
train_tags = pad_sequences(train_tags, maxlen=MAX_LENGTH_2)
test_tags = pad_sequences(test_tags, maxlen=MAX_LENGTH_2)

In [57]:
MAX_LENGTH_3 = max(len(i) for i in train_over)
train_over = pad_sequences(train_over, maxlen=MAX_LENGTH_3)
test_over = pad_sequences(test_over, maxlen=MAX_LENGTH_3)

In [67]:
combine = np.concatenate((train_texts , train_tags , train_over) , axis=1)

In [68]:
np.shape(combine)

(17494, 2330)

In [69]:
combine_test = np.concatenate((test_texts , test_tags , test_over) , axis=1)

In [105]:
x_train1 = combine
x_test1 = combine_test

Accuracy - 87.6%

# Trying different other classifiers

In [95]:
from sklearn.ensemble import RandomForestClassifier

In [96]:
rfr = RandomForestClassifier(n_estimators = 100 , random_state = 1)

In [109]:
rfr.fit(X_train , Y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

In [110]:
rfr.score(X_train , Y_train)

0.9995427003544072

In [111]:
Predictions = rfr.predict(X_test)