# Building in user inputs...
I want to be able to have users feed in reviews of games that they like. 

In [1]:
import numpy as np
import pandas as pd
import re

from scipy import sparse
from sklearn.neighbors import NearestNeighbors

In [2]:
df = pd.read_csv('metacriticfortuning.csv', index_col='Unnamed: 0')

In [3]:
df[df['title'].str.contains('Counter-Strike: Global Offensive')]

Unnamed: 0,is_critic,reviewer_name,reviewer_score,url,title,norm_score,genre,reviewer_id
484,1,LEVEL (Czech Republic),90,counter-strike-global-offensive,Counter-Strike: Global Offensive (PC),9,"Action, Shooter, First-Person, Modern, Tactica...",1
2425,1,PC Gamer,84,counter-strike-global-offensive,Counter-Strike: Global Offensive (PC),8,"Action, Shooter, First-Person, Modern, Tactica...",4
7763,1,IGN,80,counter-strike-global-offensive,Counter-Strike: Global Offensive (PC),8,"Action, Shooter, First-Person, Modern, Tactica...",34
10351,1,GameSpy,80,counter-strike-global-offensive,Counter-Strike: Global Offensive (PC),8,"Action, Shooter, First-Person, Modern, Tactica...",36
11601,1,GameSpot,85,counter-strike-global-offensive,Counter-Strike: Global Offensive (PC),9,"Action, Shooter, First-Person, Modern, Tactica...",37
15767,1,Eurogamer,90,counter-strike-global-offensive,Counter-Strike: Global Offensive (PC),9,"Action, Shooter, First-Person, Modern, Tactica...",41
17119,0,sinadoom,8,counter-strike-global-offensive,Counter-Strike: Global Offensive (PC),8,"Action, Shooter, First-Person, Modern, Tactica...",43
19362,1,PC PowerPlay,80,counter-strike-global-offensive,Counter-Strike: Global Offensive (PC),8,"Action, Shooter, First-Person, Modern, Tactica...",57
21881,1,GameStar,84,counter-strike-global-offensive,Counter-Strike: Global Offensive (PC),8,"Action, Shooter, First-Person, Modern, Tactica...",61
23716,1,3DJuegos,77,counter-strike-global-offensive,Counter-Strike: Global Offensive (PC),8,"Action, Shooter, First-Person, Modern, Tactica...",63


In [4]:
df.shape

(221303, 8)

# User input will be added to the DF and then picked for recommendations...
Currently the input will be accepted into the dataframe and then ran through the same filters as before. We want to make sure we keep only relevant information, so technically it will be possible to throw errors if we wanted to.

In [5]:
# Get the user's name
name = str(raw_input("Please enter your name: "))

gametitle = [] # empty lists to hold the user's review titles and scores
gamescore = []

exit = False

# I will run this loop to collect games and reviews until the user is finished
while exit == False:
    
    results = []

    gamesearch = str(raw_input("Please enter a title to review: "))
    
    while len(results) > 1 or len(results) == 0:
        results = []

        for title in df['title'].unique():
            if re.search(gamesearch, title, re.IGNORECASE):
                results.append(title) #gather up the possible results...
        count = 0
        if len(results) > 1:
            for i in results:
                print "[" + str(count) + "]" + " | " + i
                count += 1
            gamenumber = int(raw_input("There are multiple results. Please chose a number: "))
            results = [results[gamenumber]]
        elif len(results) == 0:
            gamesearch = str(raw_input("You returned nothing, try again: "))
        else:
            break

    gametitle.append(results[0])
    print "You've selected: " + gametitle[-1]
    print
    
    # Get the user's score for the game
    loop = True
    while loop == True:
        score = int(raw_input("Please give the title a score from 0-10: "))
        if score >= 0 and score <=10:
            gamescore.append(score)
            loop = False
        else:
            loop = True
    
    # Ask if the users wants to stop adding games
    loop = True
    while loop == True:
        confirm = str(raw_input("Would you like to add another game? (y/n): "))
        if confirm == 'y' or confirm =='Y':
            exit = False
            loop = False
        elif confirm == 'n' or confirm =='N':
            exit = True
            loop = False
        else:
            loop = True

Please enter your name: Keith Strmiska
Please enter a title to review: Doom
[0] | Deponia Doomsday (PC)
[1] | Doom 3 BFG Edition (PC)
[2] | DOOM (PC)
[3] | Doom 3 (PC)
[4] | Doom 3: Resurrection of Evil (PC)
[5] | Doom II (PC)
[6] | Hearts of Iron II: Doomsday (PC)
[7] | Hector: Badge of Carnage - Episode 3: Beyond Reasonable Doom (PC)
There are multiple results. Please chose a number: 2
You've selected: DOOM (PC)

Please give the title a score from 0-10: 10
Would you like to add another game? (y/n): y
Please enter a title to review: call of duty
[0] | Call of Duty: Black Ops (PC)
[1] | Call of Duty: Modern Warfare 2 (PC)
[2] | Call of Duty: Modern Warfare 3 (PC)
[3] | Call of Duty: World at War (PC)
[4] | Call of Duty: Advanced Warfare (PC)
[5] | Call of Duty: Black Ops II (PC)
[6] | Call of Duty: Black Ops III (PC)
[7] | Call of Duty: Ghosts (PC)
[8] | Call of Duty: Infinite Warfare (PC)
[9] | Call of Duty (PC)
[10] | Call of Duty 2 (PC)
[11] | Call of Duty 4: Modern Warfare (PC)
[12

In [7]:
# The gathered information from the user as variables... (name, game titles, game scores)
print name
print gametitle
print gamescore

Keith Strmiska
['DOOM (PC)', 'Call of Duty: Modern Warfare 3 (PC)', 'League of Legends (PC)', 'Dota 2 (PC)', 'Heroes of the Storm (PC)']
[10, 7, 8, 8, 9]


In [8]:
# Build the variables into a dictionary 'entry' for each game
# Each dictionary is fed into the 'reviewedlist' as a list of dictionaries...

reviewedlist = []
for g,s in zip(gametitle, gamescore):
    entry = {}
    entry['is_critic'] = 0
    entry['reviewer_name'] = name
    entry['reviewer_score'] = s
    entry['url'] = 'user'
    entry['title'] = g
    entry['norm_score'] = s
    entry['genre'] = 'user'
    entry['reviewer_id'] = -1
    
    reviewedlist.append(entry)

In [9]:
# convert the list of dictionaries to a dataframe and append it to the primary dataframe

userdf = pd.DataFrame(reviewedlist)
df = df.append(userdf)
df.tail(10)

Unnamed: 0,genre,is_critic,norm_score,reviewer_id,reviewer_name,reviewer_score,title,url
221298,"Strategy, Tycoon, Management, Business / Tycoon",0,8,85366,K.Lawson,8,Zoo Tycoon 2 (PC),zoo-tycoon-2
221299,"Strategy, Tycoon, Management, Business / Tycoon",0,9,85367,TyR.,9,Zoo Tycoon 2 (PC),zoo-tycoon-2
221300,"Strategy, Tycoon, Management, Business / Tycoon",0,7,85368,BethS.,7,Zoo Tycoon 2 (PC),zoo-tycoon-2
221301,"Miscellaneous, Puzzle, Action, Action",0,8,85369,HenrikW.,8,Zuma's Revenge! (PC),zumas-revenge!
221302,"Miscellaneous, Puzzle, Action, Action",0,4,85370,conradhall,4,Zuma's Revenge! (PC),zumas-revenge!
0,user,0,10,-1,Keith Strmiska,10,DOOM (PC),user
1,user,0,7,-1,Keith Strmiska,7,Call of Duty: Modern Warfare 3 (PC),user
2,user,0,8,-1,Keith Strmiska,8,League of Legends (PC),user
3,user,0,8,-1,Keith Strmiska,8,Dota 2 (PC),user
4,user,0,9,-1,Keith Strmiska,9,Heroes of the Storm (PC),user


## Create a column called 'Avg Score' which is the average score of the user

In [10]:
avg_df = df.groupby(['reviewer_name']).median().reset_index()
avg_df = avg_df.drop(['is_critic', 'reviewer_score', 'reviewer_id'], axis=1)
avg_df = avg_df.rename(columns={'norm_score': 'avg_score'})
df = pd.merge(df, avg_df, on='reviewer_name')

## Filter out reviewers with a median score less than 3

In [11]:
df = df[df['avg_score'].values > 3]

## We only want to keep users with 4 or more reviews

In [12]:
df = df.groupby('reviewer_id').filter(lambda x: len(x) > 3)

In [13]:
#New Dataset has an average of almost 20 reviews per person!
print "average reviews per person:"
print np.mean(df.reviewer_id.value_counts())
print
#New Dataset has 121437 reviews...
print "length of dataframe:"
print len(df)
print

print "top reviewers:"
print df.reviewer_id.value_counts()[:10]

average reviews per person:
19.6813614263

length of dataframe:
121434

top reviewers:
37     2446
34     2417
4      2308
30     1365
87     1130
36     1109
1      1027
61     1025
40      965
188     954
Name: reviewer_id, dtype: int64


In [14]:
df.head()

Unnamed: 0,genre,is_critic,norm_score,reviewer_id,reviewer_name,reviewer_score,title,url,avg_score
0,"Action, Shooter, First-Person, Modern, Arcade",1,4,0,GamingXP,35,007 Legends (PC),007-legends,8.0
1,"Action, Shooter, First-Person, Modern, Arcade",1,4,0,GamingXP,35,007 Legends (PC),007-legends,8.0
2,"Action, Shooter, First-Person, Modern, Arcade",1,4,0,GamingXP,35,007 Legends (PC),007-legends,8.0
3,"Adventure, General, Point-and-Click",1,8,0,GamingXP,81,15 Days (PC),15-days,8.0
4,"Adventure, General, Point-and-Click",1,8,0,GamingXP,81,15 Days (PC),15-days,8.0


## Need to fix the index and reviewer_id since they are now missing values...
We also need to do this to set the new added user into the dataframe without any repeats happening

In [15]:
df = df.reset_index()
df = df.drop(['index'], axis=1)

In [16]:
df.head()

Unnamed: 0,genre,is_critic,norm_score,reviewer_id,reviewer_name,reviewer_score,title,url,avg_score
0,"Action, Shooter, First-Person, Modern, Arcade",1,4,0,GamingXP,35,007 Legends (PC),007-legends,8.0
1,"Action, Shooter, First-Person, Modern, Arcade",1,4,0,GamingXP,35,007 Legends (PC),007-legends,8.0
2,"Action, Shooter, First-Person, Modern, Arcade",1,4,0,GamingXP,35,007 Legends (PC),007-legends,8.0
3,"Adventure, General, Point-and-Click",1,8,0,GamingXP,81,15 Days (PC),15-days,8.0
4,"Adventure, General, Point-and-Click",1,8,0,GamingXP,81,15 Days (PC),15-days,8.0


### Building a new reviewer_id

In [17]:
#Build a df to make a new reviewer_id so we don't have any empty spaces...
revName = pd.DataFrame(df['reviewer_name'].unique())

revName.columns = ['reviewer_name']
revName['reviewer_id_fixt'] = range(len(revName))

In [18]:
revName.head()

Unnamed: 0,reviewer_name,reviewer_id_fixt
0,GamingXP,0
1,LEVEL (Czech Republic),1
2,PC Master (Greece),2
3,Riot Pixels,3
4,PC Gamer,4


In [19]:
revName.tail()

Unnamed: 0,reviewer_name,reviewer_id_fixt
6165,Andrea,6165
6166,skulltulainahat,6166
6167,mose,6167
6168,Dallaswithad,6168
6169,Keith Strmiska,6169


In [20]:
# Merge the new list to the DF, drop and rename the old reviewer id column.
df = pd.merge(df, revName, on='reviewer_name')
df = df.drop(['reviewer_id'], axis=1)
df = df.rename(columns={'reviewer_id_fixt': 'reviewer_id'})

In [21]:
df.head(10)

Unnamed: 0,genre,is_critic,norm_score,reviewer_name,reviewer_score,title,url,avg_score,reviewer_id
0,"Action, Shooter, First-Person, Modern, Arcade",1,4,GamingXP,35,007 Legends (PC),007-legends,8.0,0
1,"Action, Shooter, First-Person, Modern, Arcade",1,4,GamingXP,35,007 Legends (PC),007-legends,8.0,0
2,"Action, Shooter, First-Person, Modern, Arcade",1,4,GamingXP,35,007 Legends (PC),007-legends,8.0,0
3,"Adventure, General, Point-and-Click",1,8,GamingXP,81,15 Days (PC),15-days,8.0,0
4,"Adventure, General, Point-and-Click",1,8,GamingXP,81,15 Days (PC),15-days,8.0,0
5,"Adventure, General, Point-and-Click",1,8,GamingXP,81,15 Days (PC),15-days,8.0,0
6,"Strategy, Real-Time, General, Historic, Fantas...",1,7,GamingXP,72,A Game of Thrones: Genesis (PC),a-game-of-thrones-genesis,8.0,0
7,"Adventure, General, General",1,9,GamingXP,85,A New Beginning: Final Cut (PC),a-new-beginning-final-cut,8.0,0
8,"Action, Simulation, Flight, Combat",1,8,GamingXP,80,Ace Combat Assault Horizon: Enhanced Edition (PC),ace-combat-assault-horizon-enhanced-edition,8.0,0
9,"Strategy, Turn-Based, Wargame, General",1,8,GamingXP,75,Achtung Panzer: Kharkov 1943 (PC),achtung-panzer-kharkov-1943,8.0,0


## Looking up the recently added User
We'll use the variable we created earlier called 'name' to look up our recently added user and their reviews.

In [22]:
# Hello person!

df[df['reviewer_name'].str.contains(name)]

Unnamed: 0,genre,is_critic,norm_score,reviewer_name,reviewer_score,title,url,avg_score,reviewer_id
121429,user,0,10,Keith Strmiska,10,DOOM (PC),user,8.0,6169
121430,user,0,7,Keith Strmiska,7,Call of Duty: Modern Warfare 3 (PC),user,8.0,6169
121431,user,0,8,Keith Strmiska,8,League of Legends (PC),user,8.0,6169
121432,user,0,8,Keith Strmiska,8,Dota 2 (PC),user,8.0,6169
121433,user,0,9,Keith Strmiska,9,Heroes of the Storm (PC),user,8.0,6169


In [23]:
# Find the reviewer_id where the reviewer_name is == our name variable as make sure it is an int type.

nameIndex = int(df['reviewer_id'].where(df['reviewer_name'] == name).unique()[-1])

# Setting up a pivot table

In [24]:
pivot = df.pivot_table(index='reviewer_id', columns='title', values='norm_score')
pivot.shape

(6170, 4457)

In [25]:
pivot.fillna(0).values
pivot.shape

(6170, 4457)

In [26]:
pivot.head()

title,007 Legends (PC),007: NightFire (PC),007: Quantum of Solace (PC),0RBITALIS (PC),10 Second Ninja (PC),"10,000,000 (PC)",1001 Spikes (PC),12 is Better Than 6 (PC),140 (PC),15 Days (PC),...,Zoo Tycoon: Marine Mania (PC),Zuma's Revenge! (PC),continue?9876543210 (PC),iRacing (PC),imprint-X (PC),inMomentum (PC),kill.switch (PC),nail'd (PC),rymdkapsel (PC),theHunter: Call of the Wild (PC)
reviewer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,4.0,,,,,,,,,8.0,...,,6.0,,,,,,7.0,,
1,3.0,,6.0,,,,,,,6.0,...,,,,,,5.0,,6.0,8.0,
2,3.0,,,,,,,,,,...,,,,,,,,,,
3,3.0,,,,,5.0,,,,,...,,,,,,,,,,
4,2.0,6.0,,,,,,,,,...,,6.0,,,,,7.0,8.0,,6.0


# Building a sparse matrix

In [27]:
pivotsparse = sparse.csc_matrix(pivot.fillna(0).values)

In [28]:
pivot.fillna(0).shape

(6170, 4457)

## Running the Model

In [29]:
# I put the pivotsparse in NearestNeighbors and checked the for the closest neighbors around user_id (watevs).

X = pivotsparse
nbrs = NearestNeighbors(n_neighbors=7).fit(X) #6
distances, indices = nbrs.kneighbors(pivotsparse[nameIndex]) #We put latest users 'nameIndex' in the sparse index


# The most similar users in the dataframe
print distances
print "-"*20
print indices

[[  0.          18.          18.08314132  18.27566688  18.52025918
   18.70828693  18.70828693]]
--------------------
[[6169 3485 2153 4188 2574 5268 4801]]


## OUTPUT

In [30]:
# Set the reviewer and closest related reviewers to variables from the indicies of the kneighbors output
sample_reviewer = indices[0][0]
suggested_reviewers = indices[0][1:]

print sample_reviewer
print suggested_reviewers

6169
[3485 2153 4188 2574 5268 4801]


In [31]:
# Make a function to filter out titles that our user has played...

def user_suggestions(ur, sr):
    usrTitles = []
    for t in pivot.iloc[ur].dropna().index:
        usrTitles.append(t)
    sugTitles = []
    for rec in sr:
        for t in pivot.iloc[rec].dropna().index:
            sugTitles.append(t)

    recommendations = [x for x in sugTitles if x not in usrTitles]

    return recommendations 

In [32]:
print 'Hello ' + name + "."
print
print 'Here are your Reviews:'
print "-"*60
print pivot.iloc[sample_reviewer].dropna().sort_values(ascending=False)
print
print "*"*60
print "Here are a few recommendations from similar gamers!"
print "*"*60
print
for i in user_suggestions(sample_reviewer, suggested_reviewers):
    
    # Returning the titles that have a 50% average score or higher...
    if df['norm_score'].where(df['title'] == i).mean() >= 5.0:
        print i

Hello Keith Strmiska.

Here are your Reviews:
------------------------------------------------------------
title
DOOM (PC)                              10.0
Heroes of the Storm (PC)                9.0
League of Legends (PC)                  8.0
Dota 2 (PC)                             8.0
Call of Duty: Modern Warfare 3 (PC)     7.0
Name: 6169, dtype: float64

************************************************************
Here are a few recommendations from similar gamers!
************************************************************

Binary Domain (PC)
Crysis 2 (PC)
The Elder Scrolls V: Skyrim (PC)
Call of Duty: Advanced Warfare (PC)
Diablo III: Reaper of Souls (PC)
Fallout 4 (PC)
BioShock Infinite (PC)
Grand Theft Auto IV (PC)
Battlefield 3 (PC)
Rage (PC)
Fallout 4 (PC)
Hearts of Iron IV (PC)
The Elder Scrolls V: Skyrim (PC)
Deadfall Adventures (PC)
Narcosis (PC)
Thief (PC)
