In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import re
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from wordcloud import WordCloud, STOPWORDS
import spacy
from sklearn import preprocessing
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import metrics
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [2]:
df = pd.read_csv('../Datasets/train/steam_ds.csv', encoding = "ISO-8859-1" )
df.head()

Unnamed: 0,review_id,title,year,user_review,user_suggestion
0,1,Spooky's Jump Scare Mansion,2016.0,I'm scared and hearing creepy voices. So I'll...,Positive
1,2,Spooky's Jump Scare Mansion,2016.0,"Best game, more better than Sam Pepper's YouTu...",Positive
2,3,Spooky's Jump Scare Mansion,2016.0,"A littly iffy on the controls, but once you kn...",Positive
3,4,Spooky's Jump Scare Mansion,2015.0,"Great game, fun and colorful and all that.A si...",Positive
4,5,Spooky's Jump Scare Mansion,2015.0,Not many games have the cute tag right next to...,Positive


In [3]:
sid = SentimentIntensityAnalyzer()

In [4]:
df['user_suggestion'].value_counts()

Positive    9968
Negative    7526
Name: user_suggestion, dtype: int64

In [5]:
df['scores'] = df['user_review'].apply(lambda review: sid.polarity_scores(review))
df.head()

Unnamed: 0,review_id,title,year,user_review,user_suggestion,scores
0,1,Spooky's Jump Scare Mansion,2016.0,I'm scared and hearing creepy voices. So I'll...,Positive,"{'neg': 0.121, 'neu': 0.774, 'pos': 0.106, 'co..."
1,2,Spooky's Jump Scare Mansion,2016.0,"Best game, more better than Sam Pepper's YouTu...",Positive,"{'neg': 0.062, 'neu': 0.795, 'pos': 0.143, 'co..."
2,3,Spooky's Jump Scare Mansion,2016.0,"A littly iffy on the controls, but once you kn...",Positive,"{'neg': 0.0, 'neu': 0.662, 'pos': 0.338, 'comp..."
3,4,Spooky's Jump Scare Mansion,2015.0,"Great game, fun and colorful and all that.A si...",Positive,"{'neg': 0.1, 'neu': 0.672, 'pos': 0.228, 'comp..."
4,5,Spooky's Jump Scare Mansion,2015.0,Not many games have the cute tag right next to...,Positive,"{'neg': 0.046, 'neu': 0.741, 'pos': 0.213, 'co..."


In [6]:
df['compound']  = df['scores'].apply(lambda score_dict: score_dict['compound'])
df.head()

Unnamed: 0,review_id,title,year,user_review,user_suggestion,scores,compound
0,1,Spooky's Jump Scare Mansion,2016.0,I'm scared and hearing creepy voices. So I'll...,Positive,"{'neg': 0.121, 'neu': 0.774, 'pos': 0.106, 'co...",-0.7431
1,2,Spooky's Jump Scare Mansion,2016.0,"Best game, more better than Sam Pepper's YouTu...",Positive,"{'neg': 0.062, 'neu': 0.795, 'pos': 0.143, 'co...",0.6361
2,3,Spooky's Jump Scare Mansion,2016.0,"A littly iffy on the controls, but once you kn...",Positive,"{'neg': 0.0, 'neu': 0.662, 'pos': 0.338, 'comp...",0.9851
3,4,Spooky's Jump Scare Mansion,2015.0,"Great game, fun and colorful and all that.A si...",Positive,"{'neg': 0.1, 'neu': 0.672, 'pos': 0.228, 'comp...",0.8146
4,5,Spooky's Jump Scare Mansion,2015.0,Not many games have the cute tag right next to...,Positive,"{'neg': 0.046, 'neu': 0.741, 'pos': 0.213, 'co...",0.9107


In [7]:
df['comp_score'] = df['compound'].apply(lambda c: 'Positive' if c >=0 else 'Negative')

df.head()

Unnamed: 0,review_id,title,year,user_review,user_suggestion,scores,compound,comp_score
0,1,Spooky's Jump Scare Mansion,2016.0,I'm scared and hearing creepy voices. So I'll...,Positive,"{'neg': 0.121, 'neu': 0.774, 'pos': 0.106, 'co...",-0.7431,Negative
1,2,Spooky's Jump Scare Mansion,2016.0,"Best game, more better than Sam Pepper's YouTu...",Positive,"{'neg': 0.062, 'neu': 0.795, 'pos': 0.143, 'co...",0.6361,Positive
2,3,Spooky's Jump Scare Mansion,2016.0,"A littly iffy on the controls, but once you kn...",Positive,"{'neg': 0.0, 'neu': 0.662, 'pos': 0.338, 'comp...",0.9851,Positive
3,4,Spooky's Jump Scare Mansion,2015.0,"Great game, fun and colorful and all that.A si...",Positive,"{'neg': 0.1, 'neu': 0.672, 'pos': 0.228, 'comp...",0.8146,Positive
4,5,Spooky's Jump Scare Mansion,2015.0,Not many games have the cute tag right next to...,Positive,"{'neg': 0.046, 'neu': 0.741, 'pos': 0.213, 'co...",0.9107,Positive


In [8]:

accuracy_score = metrics.accuracy_score(df['user_suggestion'], df['comp_score'])
print(str('{:04.2f}'.format(accuracy_score*100))+'%')

69.31%
