# SECTION 1 - PRE PROCESSING

### Install Turicreate

In [0]:
!pip install turicreate

### Import necessary Libraries

In [0]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import turicreate as tc # Turicreate library for recommendations

### Google Drive Pre Requisite

In [0]:
# Code to read csv file into Colaboratory:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials 

# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)


In [0]:
link = 'https://drive.google.com/open?id=1dJ2MmNB9_UG3R5HZohSIqqu_dC0_N-S2'

fluff, id = link.split('=')

print (id) # Verify that you have everything after '='

### Read data from the file.

In [5]:
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile("steam-200k.csv")  
df = pd.read_csv("steam-200k.csv", header=None, index_col=None, names=['UserID', 'Game', 'Action', 'Hours', 'Other'])
df.head()

Unnamed: 0,UserID,Game,Action,Hours,Other
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0,0
1,151603712,The Elder Scrolls V Skyrim,play,273.0,0
2,151603712,Fallout 4,purchase,1.0,0
3,151603712,Fallout 4,play,87.0,0
4,151603712,Spore,purchase,1.0,0


### Check shape of the dataset

In [6]:
df.shape

(200000, 5)

### Check the datatypes of the columns

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200000 entries, 0 to 199999
Data columns (total 5 columns):
UserID    200000 non-null int64
Game      200000 non-null object
Action    200000 non-null object
Hours     200000 non-null float64
Other     200000 non-null int64
dtypes: float64(1), int64(2), object(2)
memory usage: 7.6+ MB


### Check the values in "Other" column

In [8]:
df["Other"].value_counts()

0    200000
Name: Other, dtype: int64

### Remove the column

In [0]:
df.drop(["Other"],axis=1,inplace = True)

### Statistical Information

In [10]:
df.loc[df["Action"]=="play","Hours"].describe()

count    70489.000000
mean        48.878063
std        229.335236
min          0.100000
25%          1.000000
50%          4.500000
75%         19.100000
max      11754.000000
Name: Hours, dtype: float64

# SECTION 2 - RANKING

### Dataset

In [11]:
df.head()

Unnamed: 0,UserID,Game,Action,Hours
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0
1,151603712,The Elder Scrolls V Skyrim,play,273.0
2,151603712,Fallout 4,purchase,1.0
3,151603712,Fallout 4,play,87.0
4,151603712,Spore,purchase,1.0


### Groupby using Actions

In [12]:
df_group_values = df.groupby(["UserID","Action"]).size()
df_group_values.head(25)

UserID  Action  
5250    play          6
        purchase     21
76767   play         20
        purchase     36
86540   play         15
        purchase     82
103360  purchase     10
144736  play          1
        purchase      8
181212  play          2
        purchase     12
229911  play         18
        purchase     27
298950  play        175
        purchase    259
299153  purchase     14
381543  play          1
        purchase     10
547685  play          5
        purchase     25
554278  play          9
        purchase     28
561758  play         80
        purchase    148
577614  play          1
dtype: int64

### Describe the dataset

In [13]:
print("Number of unique games : {0}".format(len(df.Game.unique())))
print("Number of unique users : {0}".format(len(df.UserID.unique())))
print("Number of total purchases by users : {0}".format(len(df.loc[df['Action'] == 'purchase'])))
print("Number of total plays by users : {0}".format(len(df.loc[df['Action'] == 'play'])))

Number of unique games : 5155
Number of unique users : 12393
Number of total purchases by users : 129511
Number of total plays by users : 70489


### Create 2 new datasets 

In [0]:
df_purchase =  df.loc[df['Action'] == 'purchase', ['UserID','Game','Action','Hours']].rename(columns={'Hours': 'Purchased'})  # Purchased Games
df_play = df.loc[df['Action'] == 'play', ['UserID','Game','Hours']].rename(columns={'Hours': 'Played_hours'})                 # Played games by hour

In [15]:
print('shape of purchase dataset : ', df_purchase.shape[0])
print('shape of play dataset : ', df_play.shape[0])

shape of purchase dataset :  129511
shape of play dataset :  70489


### Merge datasets

In [0]:
total_dataset = pd.merge(df_purchase, df_play, left_on=['UserID','Game'], right_on=['UserID','Game'], how='left').fillna(0)

In [17]:
total_dataset.head(20)

Unnamed: 0,UserID,Game,Action,Purchased,Played_hours
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0,273.0
1,151603712,Fallout 4,purchase,1.0,87.0
2,151603712,Spore,purchase,1.0,14.9
3,151603712,Fallout New Vegas,purchase,1.0,12.1
4,151603712,Left 4 Dead 2,purchase,1.0,8.9
5,151603712,HuniePop,purchase,1.0,8.5
6,151603712,Path of Exile,purchase,1.0,8.1
7,151603712,Poly Bridge,purchase,1.0,7.5
8,151603712,Left 4 Dead,purchase,1.0,3.3
9,151603712,Team Fortress 2,purchase,1.0,2.8


### Create a ranking system for games per user ID

In [0]:
#Ranking = lambda x: pd.qcut(x,10,labels=False,duplicates='drop')
#total_dataset['Ranking'] = total_dataset.groupby('UserID')['Played_hours'].apply(Ranking)

In [0]:
total_dataset['Ranking'] = total_dataset.groupby('UserID')['Played_hours'].transform(pd.cut, 10, labels=False,duplicates = "drop")+1.0

In [20]:
total_dataset.head(30)

Unnamed: 0,UserID,Game,Action,Purchased,Played_hours,Ranking
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0,273.0,10.0
1,151603712,Fallout 4,purchase,1.0,87.0,4.0
2,151603712,Spore,purchase,1.0,14.9,1.0
3,151603712,Fallout New Vegas,purchase,1.0,12.1,1.0
4,151603712,Left 4 Dead 2,purchase,1.0,8.9,1.0
5,151603712,HuniePop,purchase,1.0,8.5,1.0
6,151603712,Path of Exile,purchase,1.0,8.1,1.0
7,151603712,Poly Bridge,purchase,1.0,7.5,1.0
8,151603712,Left 4 Dead,purchase,1.0,3.3,1.0
9,151603712,Team Fortress 2,purchase,1.0,2.8,1.0


In [21]:
total_dataset.loc[total_dataset["UserID"]==151603712].head(100)

Unnamed: 0,UserID,Game,Action,Purchased,Played_hours,Ranking
0,151603712,The Elder Scrolls V Skyrim,purchase,1.0,273.0,10.0
1,151603712,Fallout 4,purchase,1.0,87.0,4.0
2,151603712,Spore,purchase,1.0,14.9,1.0
3,151603712,Fallout New Vegas,purchase,1.0,12.1,1.0
4,151603712,Left 4 Dead 2,purchase,1.0,8.9,1.0
5,151603712,HuniePop,purchase,1.0,8.5,1.0
6,151603712,Path of Exile,purchase,1.0,8.1,1.0
7,151603712,Poly Bridge,purchase,1.0,7.5,1.0
8,151603712,Left 4 Dead,purchase,1.0,3.3,1.0
9,151603712,Team Fortress 2,purchase,1.0,2.8,1.0


### Create a clean dataset by dropping redundant columns

In [22]:
clean_df = total_dataset.drop(["Purchased","Action"],axis = 1)
clean_df.head(20)

Unnamed: 0,UserID,Game,Played_hours,Ranking
0,151603712,The Elder Scrolls V Skyrim,273.0,10.0
1,151603712,Fallout 4,87.0,4.0
2,151603712,Spore,14.9,1.0
3,151603712,Fallout New Vegas,12.1,1.0
4,151603712,Left 4 Dead 2,8.9,1.0
5,151603712,HuniePop,8.5,1.0
6,151603712,Path of Exile,8.1,1.0
7,151603712,Poly Bridge,7.5,1.0
8,151603712,Left 4 Dead,3.3,1.0
9,151603712,Team Fortress 2,2.8,1.0


### Display top 5 users based on number of games

In [23]:
clean_df["UserID"].value_counts()

62990992     1075
33865373      785
30246419      766
58345543      667
76892907      597
             ... 
149194171       1
207945140       1
130315685       1
282733934       1
214618086       1
Name: UserID, Length: 12393, dtype: int64

# SECTION 3 - RECOMMENDATION SYSTEM - FACTORIZATION RECOMMENDER

### Create a SFRame from the clean dataframe

In [0]:
dfS = tc.SFrame(clean_df)

### Factorization Recommender System


In [25]:
m = tc.factorization_recommender.create(dfS,
			                              user_id='UserID',
                                    item_id='Game',
                                    target='Ranking')

In [26]:
recommendations = m.recommend()

### USER DESCRIPTION AND RECOMMENDATIONS

###TOP USER 1

In [27]:
fr1 = m.recommend(users=['151603712'])
fr1.head(20)

UserID,Game,score,rank
151603712,Sid Meier's Civilization V ...,4.357136250900583,1
151603712,Counter-Strike Global Offensive ...,4.046951980757074,2
151603712,Call of Duty Modern Warfare 2 - Multiplayer ...,3.700310022997216,3
151603712,Grand Theft Auto V,3.682324232029275,4
151603712,Football Manager 2012,3.643090785908059,5
151603712,Half-Life 2 Lost Coast,3.56846676902707,6
151603712,Sniper Ghost Warrior,3.5076798512213982,7
151603712,Counter-Strike,3.463431233035878,8
151603712,Euro Truck Simulator 2,3.399292768406228,9
151603712,Counter-Strike Source,3.368751050400094,10


### Creating a dataframe

In [28]:
fact1 = pd.DataFrame(fr1)
fact1.drop(["UserID","rank"],axis = 1, inplace = True)
fact1.rename(columns={'Game': 'Game_FR', 'score': 'score_FR'}, inplace=True)
fact1.head(10)

Unnamed: 0,Game_FR,score_FR
0,Sid Meier's Civilization V,4.357136
1,Counter-Strike Global Offensive,4.046952
2,Call of Duty Modern Warfare 2 - Multiplayer,3.70031
3,Grand Theft Auto V,3.682324
4,Football Manager 2012,3.643091
5,Half-Life 2 Lost Coast,3.568467
6,Sniper Ghost Warrior,3.50768
7,Counter-Strike,3.463431
8,Euro Truck Simulator 2,3.399293
9,Counter-Strike Source,3.368751


### Creating a dataframe

In [29]:
clean_df.loc[clean_df["UserID"] == 151603712,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
0,The Elder Scrolls V Skyrim,273.0,10.0
1,Fallout 4,87.0,4.0
2,Spore,14.9,1.0
3,Fallout New Vegas,12.1,1.0
4,Left 4 Dead 2,8.9,1.0
5,HuniePop,8.5,1.0
6,Path of Exile,8.1,1.0
7,Poly Bridge,7.5,1.0
8,Left 4 Dead,3.3,1.0
9,Team Fortress 2,2.8,1.0


###TOP USER 2

In [30]:
fr2 = m.recommend(users=['62990992'])
fr2

UserID,Game,score,rank
62990992,UberStrike,3.5863673141771115,1
62990992,Football Manager 2012,3.3785778409415044,2
62990992,Unturned,3.3452550186389245,3
62990992,Serious Sam HD The Second Encounter ...,3.24423803152378,4
62990992,Call of Duty Black Ops,3.207467438148812,5
62990992,BLOCKADE 3D,3.117440189661936,6
62990992,Football Manager 2013,3.060799171643809,7
62990992,Call of Duty Modern Warfare 2 ...,2.90209391848977,8
62990992,Lost Planet Extreme Condition ...,2.8566539859944142,9
62990992,F1 2012,2.819399186837033,10


### Creating a dataframe

In [31]:
fact2 = pd.DataFrame(fr2)
fact2.drop(["UserID","rank"],axis = 1, inplace = True)
fact2.rename(columns={'Game': 'Game_FR', 'score': 'score_FR'}, inplace=True)
fact2.head(10)

Unnamed: 0,Game_FR,score_FR
0,UberStrike,3.586367
1,Football Manager 2012,3.378578
2,Unturned,3.345255
3,Serious Sam HD The Second Encounter,3.244238
4,Call of Duty Black Ops,3.207467
5,BLOCKADE 3D,3.11744
6,Football Manager 2013,3.060799
7,Call of Duty Modern Warfare 2,2.902094
8,Lost Planet Extreme Condition,2.856654
9,F1 2012,2.819399


In [32]:
clean_df.loc[clean_df["UserID"] == 62990992,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
77076,Counter-Strike Global Offensive,663.0,10.0
77077,Sid Meier's Civilization V,550.0,9.0
77078,Total War SHOGUN 2,212.0,4.0
77079,Total War ROME II - Emperor Edition,198.0,3.0
77080,Dungeon Defenders,195.0,3.0
...,...,...,...
78146,Xenophage,0.0,1.0
78147,Xpand Rally Xtreme,0.0,1.0
78148,Zen Bound 2,0.0,1.0
78149,Zombie Driver HD Apocalypse Pack,0.0,1.0


###TOP USER 3

In [33]:
fr3 = m.recommend(users=['33865373'])
fr3

UserID,Game,score,rank
33865373,Sid Meier's Civilization V ...,4.441703007983522,1
33865373,Counter-Strike Global Offensive ...,4.134892919527845,2
33865373,Counter-Strike Source,3.6458142189853935,3
33865373,Football Manager 2012,3.6027473373883523,4
33865373,Counter-Strike Nexon Zombies ...,3.501396329569177,5
33865373,UberStrike,3.43334716694291,6
33865373,Sniper Ghost Warrior,3.4325565634840287,7
33865373,Call of Duty Modern Warfare 2 - Multiplayer ...,3.373797358440713,8
33865373,Call of Duty Black Ops,3.236289420592145,9
33865373,Football Manager 2013,3.235436761247472,10


### Creating a dataframe

In [34]:
fact3 = pd.DataFrame(fr3)
fact3.drop(["UserID","rank"],axis = 1, inplace = True)
fact3.rename(columns={'Game': 'Game_FR', 'score': 'score_FR'}, inplace=True)
fact3.head(10)

Unnamed: 0,Game_FR,score_FR
0,Sid Meier's Civilization V,4.441703
1,Counter-Strike Global Offensive,4.134893
2,Counter-Strike Source,3.645814
3,Football Manager 2012,3.602747
4,Counter-Strike Nexon Zombies,3.501396
5,UberStrike,3.433347
6,Sniper Ghost Warrior,3.432557
7,Call of Duty Modern Warfare 2 - Multiplayer,3.373797
8,Call of Duty Black Ops,3.236289
9,Football Manager 2013,3.235437


In [35]:
clean_df.loc[clean_df["UserID"] == 33865373,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
47739,The Elder Scrolls V Skyrim,345.0,10.0
47740,The Elder Scrolls IV Oblivion,239.0,7.0
47741,Fallout New Vegas,198.0,6.0
47742,Sid Meier's Civilization IV,135.0,4.0
47743,Sid Meier's Civilization IV,2.0,1.0
...,...,...,...
48519,Xenophage,0.0,1.0
48520,Xotic,0.0,1.0
48521,Zafehouse Diaries,0.0,1.0
48522,Zen Bound 2,0.0,1.0


###TOP USER 4

In [36]:
fr4 = m.recommend(users=['30246419'])
fr4

UserID,Game,score,rank
30246419,Counter-Strike Global Offensive ...,5.088228316473321,1
30246419,Dota 2,5.080670034008579,2
30246419,Grand Theft Auto V,4.513085575270013,3
30246419,Unturned,4.390760929750757,4
30246419,Call of Duty Modern Warfare 2 - Multiplayer ...,4.301605553793268,5
30246419,Football Manager 2012,4.2295291735404295,6
30246419,Counter-Strike Source,4.173588992523508,7
30246419,Counter-Strike Nexon Zombies ...,4.131647737192468,8
30246419,Call of Duty Black Ops,4.018088729547815,9
30246419,F1 2012,3.990187020468072,10


### Creating a dataframe

In [37]:
fact4 = pd.DataFrame(fr4)
fact4.drop(["UserID","rank"],axis = 1, inplace = True)
fact4.rename(columns={'Game': 'Game_FR', 'score': 'score_FR'}, inplace=True)
fact4.head(10)

Unnamed: 0,Game_FR,score_FR
0,Counter-Strike Global Offensive,5.088228
1,Dota 2,5.08067
2,Grand Theft Auto V,4.513086
3,Unturned,4.390761
4,Call of Duty Modern Warfare 2 - Multiplayer,4.301606
5,Football Manager 2012,4.229529
6,Counter-Strike Source,4.173589
7,Counter-Strike Nexon Zombies,4.131648
8,Call of Duty Black Ops,4.018089
9,F1 2012,3.990187


In [38]:
clean_df.loc[clean_df["UserID"] == 30246419,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
62101,The Witcher 3 Wild Hunt,99.0,10.0
62102,Fallout 4,97.0,10.0
62103,Two Worlds II,61.0,7.0
62104,Test Drive Unlimited 2,57.0,6.0
62105,Middle-earth Shadow of Mordor,53.0,6.0
...,...,...,...
62862,Yury,0.0,1.0
62863,Z,0.0,1.0
62864,Zafehouse Diaries,0.0,1.0
62865,Zeno Clash 2,0.0,1.0


###TOP USER 5

In [39]:
fr5 = m.recommend(users=['58345543'])
fr5

UserID,Game,score,rank
58345543,Grand Theft Auto V,4.697976396846132,1
58345543,Football Manager 2012,4.267308340834932,2
58345543,Euro Truck Simulator 2,4.247729228305177,3
58345543,F1 2012,4.076017187403993,4
58345543,H1Z1,4.069415257739381,5
58345543,Sniper Ghost Warrior,4.047849045562105,6
58345543,Half-Life Blue Shift,4.043884502219514,7
58345543,Counter-Strike,3.9897270737760815,8
58345543,RACE 07,3.8844409896605767,9
58345543,Team Fortress Classic,3.876006589698152,10


### Creating a dataframe

In [40]:
fact5 = pd.DataFrame(fr5)
fact5.drop(["UserID","rank"],axis = 1, inplace = True)
fact5.rename(columns={'Game': 'Game_FR', 'score': 'score_FR'}, inplace=True)
fact5.head(10)

Unnamed: 0,Game_FR,score_FR
0,Grand Theft Auto V,4.697976
1,Football Manager 2012,4.267308
2,Euro Truck Simulator 2,4.247729
3,F1 2012,4.076017
4,H1Z1,4.069415
5,Sniper Ghost Warrior,4.047849
6,Half-Life Blue Shift,4.043885
7,Counter-Strike,3.989727
8,RACE 07,3.884441
9,Team Fortress Classic,3.876007


In [41]:
clean_df.loc[clean_df["UserID"] == 58345543,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
115598,Spiral Knights,70.0,10.0
115599,Blacklight Retribution,64.0,10.0
115600,Dungeon Defenders,56.0,8.0
115601,Terraria,54.0,8.0
115602,Borderlands 2,53.0,8.0
...,...,...,...
116260,Wrath of Athena,0.0,1.0
116261,WTFast Gamers Private Network (GPN),0.0,1.0
116262,Xam,0.0,1.0
116263,You Have to Win the Game,0.0,1.0


In [42]:
clean_df.loc[clean_df["UserID"] == 58345543,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
115598,Spiral Knights,70.0,10.0
115599,Blacklight Retribution,64.0,10.0
115600,Dungeon Defenders,56.0,8.0
115601,Terraria,54.0,8.0
115602,Borderlands 2,53.0,8.0
...,...,...,...
116260,Wrath of Athena,0.0,1.0
116261,WTFast Gamers Private Network (GPN),0.0,1.0
116262,Xam,0.0,1.0
116263,You Have to Win the Game,0.0,1.0


# SECTION 4 - RECOMMENDATION SYSTEM - RANKING FACTORIZATION RECOMMENDER

### Ranking Factorization Recommender System

In [43]:
n = tc.ranking_factorization_recommender.create(dfS,
			                              user_id='UserID',
                                    item_id='Game',
                                    target='Ranking')


In [44]:
recommendations = n.recommend()

### USER DESCRIPTION AND RECOMMENDATIONS

###TOP USER 1

In [45]:
rf1 = n.recommend(users=['151603712'])
rf1.head(20)

UserID,Game,score,rank
151603712,Counter-Strike Global Offensive ...,3.1671369238370217,1
151603712,Counter-Strike,3.095889852809266,2
151603712,Counter-Strike Source,3.059421749281243,3
151603712,Unturned,2.9935809461944856,4
151603712,Sid Meier's Civilization V ...,2.371591571214155,5
151603712,Terraria,2.125734196471528,6
151603712,Call of Duty Modern Warfare 2 - Multiplayer ...,2.026283563541726,7
151603712,Call of Duty Modern Warfare 2 ...,2.019285188483552,8
151603712,Grand Theft Auto V,1.9695560349696437,9
151603712,Call of Duty Black Ops,1.9694260670417107,10


###Creating a dataframe

In [46]:
ranfact1 = pd.DataFrame(rf1)
ranfact1.drop(["UserID","rank"],axis = 1, inplace = True)
ranfact1.rename(columns={'Game': 'Game_RFR', 'score': 'score_RFR'}, inplace=True)
ranfact1.head(10)

Unnamed: 0,Game_RFR,score_RFR
0,Counter-Strike Global Offensive,3.167137
1,Counter-Strike,3.09589
2,Counter-Strike Source,3.059422
3,Unturned,2.993581
4,Sid Meier's Civilization V,2.371592
5,Terraria,2.125734
6,Call of Duty Modern Warfare 2 - Multiplayer,2.026284
7,Call of Duty Modern Warfare 2,2.019285
8,Grand Theft Auto V,1.969556
9,Call of Duty Black Ops,1.969426


In [47]:
clean_df.loc[clean_df["UserID"] == 151603712,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
0,The Elder Scrolls V Skyrim,273.0,10.0
1,Fallout 4,87.0,4.0
2,Spore,14.9,1.0
3,Fallout New Vegas,12.1,1.0
4,Left 4 Dead 2,8.9,1.0
5,HuniePop,8.5,1.0
6,Path of Exile,8.1,1.0
7,Poly Bridge,7.5,1.0
8,Left 4 Dead,3.3,1.0
9,Team Fortress 2,2.8,1.0


###TOP USER 2

In [48]:
rf2 = n.recommend(users=['62990992'])
rf2

UserID,Game,score,rank
62990992,Unturned,1.8416684581511773,1
62990992,Grand Theft Auto V,1.1955436481707848,2
62990992,Call of Duty Modern Warfare 2 - Multiplayer ...,1.0567457689517297,3
62990992,Call of Duty Modern Warfare 2 ...,0.9593829526179588,4
62990992,Call of Duty Black Ops,0.9483786536925592,5
62990992,Fallout New Vegas,0.9038690938227928,6
62990992,PAYDAY 2,0.8968136324160851,7
62990992,Fallout 4,0.8740738643878259,8
62990992,Rust,0.8151321722739495,9
62990992,Dark Souls Prepare to Die Edition ...,0.7517104311459817,10


###Creating a dataframe

In [49]:
ranfact2 = pd.DataFrame(rf2)
ranfact2.drop(["UserID","rank"],axis = 1, inplace = True)
ranfact2.rename(columns={'Game': 'Game_RFR', 'score': 'score_RFR'}, inplace=True)
ranfact2.head(10)

Unnamed: 0,Game_RFR,score_RFR
0,Unturned,1.841668
1,Grand Theft Auto V,1.195544
2,Call of Duty Modern Warfare 2 - Multiplayer,1.056746
3,Call of Duty Modern Warfare 2,0.959383
4,Call of Duty Black Ops,0.948379
5,Fallout New Vegas,0.903869
6,PAYDAY 2,0.896814
7,Fallout 4,0.874074
8,Rust,0.815132
9,Dark Souls Prepare to Die Edition,0.75171


In [50]:
clean_df.loc[clean_df["UserID"] == 62990992,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
77076,Counter-Strike Global Offensive,663.0,10.0
77077,Sid Meier's Civilization V,550.0,9.0
77078,Total War SHOGUN 2,212.0,4.0
77079,Total War ROME II - Emperor Edition,198.0,3.0
77080,Dungeon Defenders,195.0,3.0
...,...,...,...
78146,Xenophage,0.0,1.0
78147,Xpand Rally Xtreme,0.0,1.0
78148,Zen Bound 2,0.0,1.0
78149,Zombie Driver HD Apocalypse Pack,0.0,1.0


###TOP USER 3

In [51]:
rf3 = n.recommend(users=['33865373'])
rf3

UserID,Game,score,rank
33865373,Counter-Strike Global Offensive ...,3.1979376687281884,1
33865373,Counter-Strike Source,2.678135813640908,2
33865373,Unturned,2.387506456541375,3
33865373,Sid Meier's Civilization V ...,1.8684693737500464,4
33865373,Call of Duty Modern Warfare 2 - Multiplayer ...,1.7586014880412375,5
33865373,Grand Theft Auto V,1.6474726809733666,6
33865373,Borderlands 2,1.5191771401637353,7
33865373,Call of Duty Black Ops,1.470338256525353,8
33865373,PAYDAY 2,1.456703187393502,9
33865373,The Witcher 2 Assassins of Kings Enhanced Edi ...,1.4284240319007195,10


###Creating a dataframe

In [52]:
ranfact3 = pd.DataFrame(rf3)
ranfact3.drop(["UserID","rank"],axis = 1, inplace = True)
ranfact3.rename(columns={'Game': 'Game_RFR', 'score': 'score_RFR'}, inplace=True)
ranfact3.head(10)

Unnamed: 0,Game_RFR,score_RFR
0,Counter-Strike Global Offensive,3.197938
1,Counter-Strike Source,2.678136
2,Unturned,2.387506
3,Sid Meier's Civilization V,1.868469
4,Call of Duty Modern Warfare 2 - Multiplayer,1.758601
5,Grand Theft Auto V,1.647473
6,Borderlands 2,1.519177
7,Call of Duty Black Ops,1.470338
8,PAYDAY 2,1.456703
9,The Witcher 2 Assassins of Kings Enhanced Edition,1.428424


In [53]:
clean_df.loc[clean_df["UserID"] == 33865373,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
47739,The Elder Scrolls V Skyrim,345.0,10.0
47740,The Elder Scrolls IV Oblivion,239.0,7.0
47741,Fallout New Vegas,198.0,6.0
47742,Sid Meier's Civilization IV,135.0,4.0
47743,Sid Meier's Civilization IV,2.0,1.0
...,...,...,...
48519,Xenophage,0.0,1.0
48520,Xotic,0.0,1.0
48521,Zafehouse Diaries,0.0,1.0
48522,Zen Bound 2,0.0,1.0


###TOP USER 4

In [54]:
rf4 = n.recommend(users=['30246419'])
rf4

UserID,Game,score,rank
30246419,Dota 2,4.7573422952168745,1
30246419,Counter-Strike Global Offensive ...,4.111944915222482,2
30246419,Team Fortress 2,4.089925469326333,3
30246419,Counter-Strike,3.472716511654214,4
30246419,Counter-Strike Source,3.4205573512786187,5
30246419,Garry's Mod,3.069335163997964,6
30246419,Unturned,2.921819211410836,7
30246419,Call of Duty Modern Warfare 2 - Multiplayer ...,2.642764092850045,8
30246419,Grand Theft Auto V,2.4814943088763517,9
30246419,Empire Total War,2.285130859779672,10


###Creating a dataframe

In [55]:
ranfact4 = pd.DataFrame(rf4)
ranfact4.drop(["UserID","rank"],axis = 1, inplace = True)
ranfact4.rename(columns={'Game': 'Game_RFR', 'score': 'score_RFR'}, inplace=True)
ranfact4.head(10)

Unnamed: 0,Game_RFR,score_RFR
0,Dota 2,4.757342
1,Counter-Strike Global Offensive,4.111945
2,Team Fortress 2,4.089925
3,Counter-Strike,3.472717
4,Counter-Strike Source,3.420557
5,Garry's Mod,3.069335
6,Unturned,2.921819
7,Call of Duty Modern Warfare 2 - Multiplayer,2.642764
8,Grand Theft Auto V,2.481494
9,Empire Total War,2.285131


In [56]:
clean_df.loc[clean_df["UserID"] == 30246419,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
62101,The Witcher 3 Wild Hunt,99.0,10.0
62102,Fallout 4,97.0,10.0
62103,Two Worlds II,61.0,7.0
62104,Test Drive Unlimited 2,57.0,6.0
62105,Middle-earth Shadow of Mordor,53.0,6.0
...,...,...,...
62862,Yury,0.0,1.0
62863,Z,0.0,1.0
62864,Zafehouse Diaries,0.0,1.0
62865,Zeno Clash 2,0.0,1.0


###TOP USER 5

In [57]:
rf5 = n.recommend(users=['58345543'])
rf5

UserID,Game,score,rank
58345543,Counter-Strike,3.885623441504792,1
58345543,Grand Theft Auto V,2.7967790229790963,2
58345543,Empire Total War,2.7773902876847543,3
58345543,Euro Truck Simulator 2,2.756396339701966,4
58345543,Total War SHOGUN 2,2.620008931922273,5
58345543,Call of Duty Black Ops II - Multiplayer ...,2.609438226985292,6
58345543,Counter-Strike Condition Zero ...,2.592512296008424,7
58345543,PAYDAY 2,2.58966456489499,8
58345543,Arma 2 Operation Arrowhead ...,2.5787628455632485,9
58345543,Call of Duty Modern Warfare 3 - Multiplayer ...,2.548212306069688,10


###Creating a dataframe


In [58]:
ranfact5 = pd.DataFrame(rf5)
ranfact5.drop(["UserID","rank"],axis = 1, inplace = True)
ranfact5.rename(columns={'Game': 'Game_RFR', 'score': 'score_RFR'}, inplace=True)
ranfact5.head(10)

Unnamed: 0,Game_RFR,score_RFR
0,Counter-Strike,3.885623
1,Grand Theft Auto V,2.796779
2,Empire Total War,2.77739
3,Euro Truck Simulator 2,2.756396
4,Total War SHOGUN 2,2.620009
5,Call of Duty Black Ops II - Multiplayer,2.609438
6,Counter-Strike Condition Zero,2.592512
7,PAYDAY 2,2.589665
8,Arma 2 Operation Arrowhead,2.578763
9,Call of Duty Modern Warfare 3 - Multiplayer,2.548212


In [59]:
clean_df.loc[clean_df["UserID"] == 58345543,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
115598,Spiral Knights,70.0,10.0
115599,Blacklight Retribution,64.0,10.0
115600,Dungeon Defenders,56.0,8.0
115601,Terraria,54.0,8.0
115602,Borderlands 2,53.0,8.0
...,...,...,...
116260,Wrath of Athena,0.0,1.0
116261,WTFast Gamers Private Network (GPN),0.0,1.0
116262,Xam,0.0,1.0
116263,You Have to Win the Game,0.0,1.0


# SECTION 5 - RECOMMENDATION SYSTEM - ITEM SIMILARITY RECOMMENDER

### Item Similarity Recommender System

In [60]:
o = tc.item_similarity_recommender.create(dfS,
			                              user_id='UserID',
                                    item_id='Game',
                                    target='Ranking')

In [61]:
recommendations = o.recommend()

### USER DESCRIPTION AND RECOMMENDATIONS

###TOP 1 USER

In [62]:
isr1 = o.recommend(users=['151603712'])
isr1.head(20)

UserID,Game,score,rank
151603712,Borderlands 2,0.0884400218725204,1
151603712,The Witcher 2 Assassins of Kings Enhanced Edi ...,0.086573040485382,2
151603712,Portal 2,0.0854234710335731,3
151603712,Metro 2033,0.0831524074077606,4
151603712,Dishonored,0.0812200978398323,5
151603712,Batman Arkham City GOTY,0.0795793786644935,6
151603712,Company of Heroes (New Steam Version) ...,0.0741256177425384,7
151603712,Company of Heroes,0.0689126372337341,8
151603712,Batman Arkham Asylum GOTY Edition ...,0.0677080437541008,9
151603712,Deus Ex Human Revolution,0.0674901828169822,10


###Creating a dataframe

In [63]:
itsim1 = pd.DataFrame(isr1)
itsim1.drop(["UserID","rank"],axis = 1, inplace = True)
itsim1.rename(columns={'Game': 'Game_ISR', 'score': 'score_ISR'}, inplace=True)
itsim1.head(10)

Unnamed: 0,Game_ISR,score_ISR
0,Borderlands 2,0.08844
1,The Witcher 2 Assassins of Kings Enhanced Edition,0.086573
2,Portal 2,0.085423
3,Metro 2033,0.083152
4,Dishonored,0.08122
5,Batman Arkham City GOTY,0.079579
6,Company of Heroes (New Steam Version),0.074126
7,Company of Heroes,0.068913
8,Batman Arkham Asylum GOTY Edition,0.067708
9,Deus Ex Human Revolution,0.06749


In [64]:
clean_df.loc[clean_df["UserID"] == 151603712,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
0,The Elder Scrolls V Skyrim,273.0,10.0
1,Fallout 4,87.0,4.0
2,Spore,14.9,1.0
3,Fallout New Vegas,12.1,1.0
4,Left 4 Dead 2,8.9,1.0
5,HuniePop,8.5,1.0
6,Path of Exile,8.1,1.0
7,Poly Bridge,7.5,1.0
8,Left 4 Dead,3.3,1.0
9,Team Fortress 2,2.8,1.0


###TOP 2 USER

In [65]:
isr2 = o.recommend(users=['62990992'])
isr2

UserID,Game,score,rank
62990992,Brtal Legend,0.0211463945077153,1
62990992,Overlord Raising Hell,0.021016922280583,2
62990992,Mark of the Ninja,0.0202628679378202,3
62990992,Bastion,0.0190528717157099,4
62990992,Darksiders,0.0184229549285624,5
62990992,Dishonored,0.0159593203875902,6
62990992,LIMBO,0.0143794516164265,7
62990992,FTL Faster Than Light,0.0143433055850896,8
62990992,Rise of the Argonauts,0.0138825669270776,9
62990992,The Walking Dead,0.0136507162998678,10


###Creating a dataframe

In [66]:
itsim2 = pd.DataFrame(isr2)
itsim2.drop(["UserID","rank"],axis = 1, inplace = True)
itsim2.rename(columns = {'Game': 'Game_ISR', 'score': 'score_ISR'},inplace = True)
itsim2.head(10)

Unnamed: 0,Game_ISR,score_ISR
0,Brtal Legend,0.021146
1,Overlord Raising Hell,0.021017
2,Mark of the Ninja,0.020263
3,Bastion,0.019053
4,Darksiders,0.018423
5,Dishonored,0.015959
6,LIMBO,0.014379
7,FTL Faster Than Light,0.014343
8,Rise of the Argonauts,0.013883
9,The Walking Dead,0.013651


In [67]:
clean_df.loc[clean_df["UserID"] == 62990992,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
77076,Counter-Strike Global Offensive,663.0,10.0
77077,Sid Meier's Civilization V,550.0,9.0
77078,Total War SHOGUN 2,212.0,4.0
77079,Total War ROME II - Emperor Edition,198.0,3.0
77080,Dungeon Defenders,195.0,3.0
...,...,...,...
78146,Xenophage,0.0,1.0
78147,Xpand Rally Xtreme,0.0,1.0
78148,Zen Bound 2,0.0,1.0
78149,Zombie Driver HD Apocalypse Pack,0.0,1.0


###TOP 3 USER

In [68]:
isr3 = o.recommend(users=['33865373'])
isr3

UserID,Game,score,rank
33865373,GoD Factory Wingmen,0.0211968014074355,1
33865373,Guacamelee! Gold Edition,0.0208471214648374,2
33865373,The Swapper,0.0191651463662226,3
33865373,Titan Quest,0.0186004236186902,4
33865373,Gray Matter,0.017632924481151,5
33865373,Orcs Must Die! 2,0.0171985211446113,6
33865373,The Witcher 2 Assassins of Kings Enhanced Edi ...,0.0167787171823462,7
33865373,The Walking Dead,0.0165454957288565,8
33865373,Overlord Raising Hell,0.0158913284540176,9
33865373,Shank 2,0.0156992231447672,10


###Creating a dataframe

In [69]:
itsim3 = pd.DataFrame(isr3)
itsim3.drop(["UserID","rank"],axis = 1, inplace = True)
itsim3.rename(columns={'Game': 'Game_ISR', 'score': 'score_ISR'}, inplace=True)
itsim3.head(10)

Unnamed: 0,Game_ISR,score_ISR
0,GoD Factory Wingmen,0.021197
1,Guacamelee! Gold Edition,0.020847
2,The Swapper,0.019165
3,Titan Quest,0.0186
4,Gray Matter,0.017633
5,Orcs Must Die! 2,0.017199
6,The Witcher 2 Assassins of Kings Enhanced Edition,0.016779
7,The Walking Dead,0.016545
8,Overlord Raising Hell,0.015891
9,Shank 2,0.015699


In [70]:
clean_df.loc[clean_df["UserID"] == 33865373,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
47739,The Elder Scrolls V Skyrim,345.0,10.0
47740,The Elder Scrolls IV Oblivion,239.0,7.0
47741,Fallout New Vegas,198.0,6.0
47742,Sid Meier's Civilization IV,135.0,4.0
47743,Sid Meier's Civilization IV,2.0,1.0
...,...,...,...
48519,Xenophage,0.0,1.0
48520,Xotic,0.0,1.0
48521,Zafehouse Diaries,0.0,1.0
48522,Zen Bound 2,0.0,1.0


###TOP 4 USER

In [71]:
isr4 = o.recommend(users=['30246419'])
isr4

UserID,Game,score,rank
30246419,Sine Mora,0.0230194090697448,1
30246419,Dishonored,0.0213521445692763,2
30246419,Dear Esther,0.0199621609548335,3
30246419,BioShock Infinite,0.0191236710045805,4
30246419,Gratuitous Space Battles,0.017178096868618,5
30246419,Osmos,0.0165144150907343,6
30246419,Intrusion 2,0.0164531229827086,7
30246419,Red Faction Guerrilla Steam Edition ...,0.0153989426703321,8
30246419,Proteus,0.0151942900047,9
30246419,Thomas Was Alone,0.0149014164179524,10


###Creating a dataframe

In [72]:
itsim4 = pd.DataFrame(isr4)
itsim4.drop(["UserID","rank"],axis = 1, inplace = True)
itsim4.rename(columns={'Game': 'Game_ISR', 'score': 'score_ISR'}, inplace=True)
itsim4.head(10)

Unnamed: 0,Game_ISR,score_ISR
0,Sine Mora,0.023019
1,Dishonored,0.021352
2,Dear Esther,0.019962
3,BioShock Infinite,0.019124
4,Gratuitous Space Battles,0.017178
5,Osmos,0.016514
6,Intrusion 2,0.016453
7,Red Faction Guerrilla Steam Edition,0.015399
8,Proteus,0.015194
9,Thomas Was Alone,0.014901


In [73]:
clean_df.loc[clean_df["UserID"] == 30246419,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
62101,The Witcher 3 Wild Hunt,99.0,10.0
62102,Fallout 4,97.0,10.0
62103,Two Worlds II,61.0,7.0
62104,Test Drive Unlimited 2,57.0,6.0
62105,Middle-earth Shadow of Mordor,53.0,6.0
...,...,...,...
62862,Yury,0.0,1.0
62863,Z,0.0,1.0
62864,Zafehouse Diaries,0.0,1.0
62865,Zeno Clash 2,0.0,1.0


###TOP 5 USER

In [74]:
isr5 = o.recommend(users=['58345543'])
isr5

UserID,Game,score,rank
58345543,Combat Arms,0.0255608952439225,1
58345543,Dragon Nest Europe,0.0225918635770723,2
58345543,Global Agenda,0.020097782303979,3
58345543,Mark of the Ninja,0.0170792860311788,4
58345543,Batman Arkham City GOTY,0.0165116279511838,5
58345543,Batman Arkham Asylum GOTY Edition ...,0.0163011115055542,6
58345543,Frozen Synapse,0.0160368416044447,7
58345543,Dear Esther,0.0157095621656011,8
58345543,FTL Faster Than Light,0.0155915083648922,9
58345543,Dishonored,0.0152852153992867,10


###Creating a dataframe

In [75]:
itsim5 = pd.DataFrame(isr5)
itsim5.drop(["UserID","rank"],axis = 1, inplace = True)
itsim5.rename(columns={'Game': 'Game_ISR', 'score': 'score_ISR'},inplace = True)
itsim5.head(10)

Unnamed: 0,Game_ISR,score_ISR
0,Combat Arms,0.025561
1,Dragon Nest Europe,0.022592
2,Global Agenda,0.020098
3,Mark of the Ninja,0.017079
4,Batman Arkham City GOTY,0.016512
5,Batman Arkham Asylum GOTY Edition,0.016301
6,Frozen Synapse,0.016037
7,Dear Esther,0.01571
8,FTL Faster Than Light,0.015592
9,Dishonored,0.015285


In [76]:
clean_df.loc[clean_df["UserID"] == 58345543,["Game","Played_hours","Ranking"]]

Unnamed: 0,Game,Played_hours,Ranking
115598,Spiral Knights,70.0,10.0
115599,Blacklight Retribution,64.0,10.0
115600,Dungeon Defenders,56.0,8.0
115601,Terraria,54.0,8.0
115602,Borderlands 2,53.0,8.0
...,...,...,...
116260,Wrath of Athena,0.0,1.0
116261,WTFast Gamers Private Network (GPN),0.0,1.0
116262,Xam,0.0,1.0
116263,You Have to Win the Game,0.0,1.0


### TOP 1 USER TOTAL COMPARISON FOR RECOMMENDATIONS

In [77]:
Top1_comparison = pd.concat([fact1, ranfact1,itsim1], axis=1, sort=False)
Top1_comparison.head(10)

Unnamed: 0,Game_FR,score_FR,Game_RFR,score_RFR,Game_ISR,score_ISR
0,Sid Meier's Civilization V,4.357136,Counter-Strike Global Offensive,3.167137,Borderlands 2,0.08844
1,Counter-Strike Global Offensive,4.046952,Counter-Strike,3.09589,The Witcher 2 Assassins of Kings Enhanced Edition,0.086573
2,Call of Duty Modern Warfare 2 - Multiplayer,3.70031,Counter-Strike Source,3.059422,Portal 2,0.085423
3,Grand Theft Auto V,3.682324,Unturned,2.993581,Metro 2033,0.083152
4,Football Manager 2012,3.643091,Sid Meier's Civilization V,2.371592,Dishonored,0.08122
5,Half-Life 2 Lost Coast,3.568467,Terraria,2.125734,Batman Arkham City GOTY,0.079579
6,Sniper Ghost Warrior,3.50768,Call of Duty Modern Warfare 2 - Multiplayer,2.026284,Company of Heroes (New Steam Version),0.074126
7,Counter-Strike,3.463431,Call of Duty Modern Warfare 2,2.019285,Company of Heroes,0.068913
8,Euro Truck Simulator 2,3.399293,Grand Theft Auto V,1.969556,Batman Arkham Asylum GOTY Edition,0.067708
9,Counter-Strike Source,3.368751,Call of Duty Black Ops,1.969426,Deus Ex Human Revolution,0.06749


### TOP 2 USER TOTAL COMPARISON FOR RECOMMENDATIONS

In [78]:
Top2_comparison = pd.concat([fact2, ranfact2,itsim2], axis=1, sort=False)
Top2_comparison.head(10)

Unnamed: 0,Game_FR,score_FR,Game_RFR,score_RFR,Game_ISR,score_ISR
0,UberStrike,3.586367,Unturned,1.841668,Brtal Legend,0.021146
1,Football Manager 2012,3.378578,Grand Theft Auto V,1.195544,Overlord Raising Hell,0.021017
2,Unturned,3.345255,Call of Duty Modern Warfare 2 - Multiplayer,1.056746,Mark of the Ninja,0.020263
3,Serious Sam HD The Second Encounter,3.244238,Call of Duty Modern Warfare 2,0.959383,Bastion,0.019053
4,Call of Duty Black Ops,3.207467,Call of Duty Black Ops,0.948379,Darksiders,0.018423
5,BLOCKADE 3D,3.11744,Fallout New Vegas,0.903869,Dishonored,0.015959
6,Football Manager 2013,3.060799,PAYDAY 2,0.896814,LIMBO,0.014379
7,Call of Duty Modern Warfare 2,2.902094,Fallout 4,0.874074,FTL Faster Than Light,0.014343
8,Lost Planet Extreme Condition,2.856654,Rust,0.815132,Rise of the Argonauts,0.013883
9,F1 2012,2.819399,Dark Souls Prepare to Die Edition,0.75171,The Walking Dead,0.013651


### TOP 3 USER TOTAL COMPARISON FOR RECOMMENDATIONS

In [79]:
Top3_comparison = pd.concat([fact3, ranfact3,itsim3], axis=1, sort=False)
Top3_comparison.head(10)

Unnamed: 0,Game_FR,score_FR,Game_RFR,score_RFR,Game_ISR,score_ISR
0,Sid Meier's Civilization V,4.441703,Counter-Strike Global Offensive,3.197938,GoD Factory Wingmen,0.021197
1,Counter-Strike Global Offensive,4.134893,Counter-Strike Source,2.678136,Guacamelee! Gold Edition,0.020847
2,Counter-Strike Source,3.645814,Unturned,2.387506,The Swapper,0.019165
3,Football Manager 2012,3.602747,Sid Meier's Civilization V,1.868469,Titan Quest,0.0186
4,Counter-Strike Nexon Zombies,3.501396,Call of Duty Modern Warfare 2 - Multiplayer,1.758601,Gray Matter,0.017633
5,UberStrike,3.433347,Grand Theft Auto V,1.647473,Orcs Must Die! 2,0.017199
6,Sniper Ghost Warrior,3.432557,Borderlands 2,1.519177,The Witcher 2 Assassins of Kings Enhanced Edition,0.016779
7,Call of Duty Modern Warfare 2 - Multiplayer,3.373797,Call of Duty Black Ops,1.470338,The Walking Dead,0.016545
8,Call of Duty Black Ops,3.236289,PAYDAY 2,1.456703,Overlord Raising Hell,0.015891
9,Football Manager 2013,3.235437,The Witcher 2 Assassins of Kings Enhanced Edition,1.428424,Shank 2,0.015699


### TOP 4 USER TOTAL COMPARISON FOR RECOMMENDATIONS

In [80]:
Top4_comparison = pd.concat([fact4, ranfact4,itsim4], axis=1, sort=False)
Top4_comparison.head(10)

Unnamed: 0,Game_FR,score_FR,Game_RFR,score_RFR,Game_ISR,score_ISR
0,Counter-Strike Global Offensive,5.088228,Dota 2,4.757342,Sine Mora,0.023019
1,Dota 2,5.08067,Counter-Strike Global Offensive,4.111945,Dishonored,0.021352
2,Grand Theft Auto V,4.513086,Team Fortress 2,4.089925,Dear Esther,0.019962
3,Unturned,4.390761,Counter-Strike,3.472717,BioShock Infinite,0.019124
4,Call of Duty Modern Warfare 2 - Multiplayer,4.301606,Counter-Strike Source,3.420557,Gratuitous Space Battles,0.017178
5,Football Manager 2012,4.229529,Garry's Mod,3.069335,Osmos,0.016514
6,Counter-Strike Source,4.173589,Unturned,2.921819,Intrusion 2,0.016453
7,Counter-Strike Nexon Zombies,4.131648,Call of Duty Modern Warfare 2 - Multiplayer,2.642764,Red Faction Guerrilla Steam Edition,0.015399
8,Call of Duty Black Ops,4.018089,Grand Theft Auto V,2.481494,Proteus,0.015194
9,F1 2012,3.990187,Empire Total War,2.285131,Thomas Was Alone,0.014901


### TOP 5 USER TOTAL COMPARISON FOR RECOMMENDATIONS

In [81]:
Top5_comparison = pd.concat([fact5, ranfact5,itsim5], axis=1, sort=False)
Top5_comparison.head(10)

Unnamed: 0,Game_FR,score_FR,Game_RFR,score_RFR,Game_ISR,score_ISR
0,Grand Theft Auto V,4.697976,Counter-Strike,3.885623,Combat Arms,0.025561
1,Football Manager 2012,4.267308,Grand Theft Auto V,2.796779,Dragon Nest Europe,0.022592
2,Euro Truck Simulator 2,4.247729,Empire Total War,2.77739,Global Agenda,0.020098
3,F1 2012,4.076017,Euro Truck Simulator 2,2.756396,Mark of the Ninja,0.017079
4,H1Z1,4.069415,Total War SHOGUN 2,2.620009,Batman Arkham City GOTY,0.016512
5,Sniper Ghost Warrior,4.047849,Call of Duty Black Ops II - Multiplayer,2.609438,Batman Arkham Asylum GOTY Edition,0.016301
6,Half-Life Blue Shift,4.043885,Counter-Strike Condition Zero,2.592512,Frozen Synapse,0.016037
7,Counter-Strike,3.989727,PAYDAY 2,2.589665,Dear Esther,0.01571
8,RACE 07,3.884441,Arma 2 Operation Arrowhead,2.578763,FTL Faster Than Light,0.015592
9,Team Fortress Classic,3.876007,Call of Duty Modern Warfare 3 - Multiplayer,2.548212,Dishonored,0.015285


###Evaluation of Recommender Systems

### Split into Test and Train data

In [0]:
train_set, test_set = tc.recommender.util.random_split_by_user(dfS,"UserID","Game",item_test_proportion= 0.2)

### Model 1 : Factorization Recommender - Training

In [83]:
model1 = tc.factorization_recommender.create(train_set,
			                              user_id='UserID',
                                    item_id='Game',
                                    target='Ranking')

### Model 1 : Factorization Recommender - Testing

In [84]:
model1.evaluate(test_set)

  + 'the next major release. Any passed parameters are ignored.')



Precision and recall summary statistics by cutoff
+--------+----------------------+---------------------+
| cutoff |    mean_precision    |     mean_recall     |
+--------+----------------------+---------------------+
|   1    | 0.18633540372670812  | 0.13412867093488082 |
|   2    | 0.13768115942028983  | 0.16685040541938734 |
|   3    | 0.10973084886128363  | 0.18058323621920877 |
|   4    | 0.09316770186335403  |  0.2038856526893271 |
|   5    | 0.08405797101449272  |  0.2238409174039085 |
|   6    | 0.07556935817805378  | 0.23768107569282457 |
|   7    | 0.06802721088435379  | 0.24964745644802522 |
|   8    | 0.061853002070393355 | 0.25735608274115623 |
|   9    | 0.057510927076144455 |  0.2628852211895493 |
|   10   | 0.05403726708074535  |  0.2695297574879037 |
+--------+----------------------+---------------------+
[10 rows x 3 columns]


Overall RMSE: 4.499849176267877

Per User RMSE (best)
+-----------+----------------------+-------+
|   UserID  |         rmse         | count

{'precision_recall_by_user': Columns:
 	UserID	int
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 8694
 
 Data:
 +----------+--------+-----------+--------+-------+
 |  UserID  | cutoff | precision | recall | count |
 +----------+--------+-----------+--------+-------+
 | 30695285 |   1    |    0.0    |  0.0   |   1   |
 | 30695285 |   2    |    0.0    |  0.0   |   1   |
 | 30695285 |   3    |    0.0    |  0.0   |   1   |
 | 30695285 |   4    |    0.0    |  0.0   |   1   |
 | 30695285 |   5    |    0.0    |  0.0   |   1   |
 | 30695285 |   6    |    0.0    |  0.0   |   1   |
 | 30695285 |   7    |    0.0    |  0.0   |   1   |
 | 30695285 |   8    |    0.0    |  0.0   |   1   |
 | 30695285 |   9    |    0.0    |  0.0   |   1   |
 | 30695285 |   10   |    0.0    |  0.0   |   1   |
 +----------+--------+-----------+--------+-------+
 [8694 rows x 5 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more row

### Model 2 : Ranking Factorization Recommender - Training

In [85]:
model2 = tc.ranking_factorization_recommender.create(train_set,
			                              user_id='UserID',
                                    item_id='Game',
                                    target='Ranking')

### Model 2 : Ranking Factorization Recommender - Testing

In [86]:
model2.evaluate(test_set)


Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |    mean_precision   |     mean_recall     |
+--------+---------------------+---------------------+
|   1    |  0.1884057971014493 | 0.13516386762225147 |
|   2    | 0.14078674948240166 | 0.16934692737760698 |
|   3    | 0.11111111111111112 |  0.1920241256234375 |
|   4    | 0.09368530020703936 |  0.2065021372900729 |
|   5    | 0.08364389233954449 |  0.2228368320409167 |
|   6    |  0.077639751552795  | 0.23943764304144982 |
|   7    | 0.06980183377698908 | 0.24971973150667934 |
|   8    | 0.06521739130434781 | 0.26135696771997663 |
|   9    | 0.06165171382562681 | 0.27120620437740245 |
|   10   | 0.05838509316770183 |  0.277552136727247  |
+--------+---------------------+---------------------+
[10 rows x 3 columns]


Overall RMSE: 8.479656776691641

Per User RMSE (best)
+-----------+-----------------------+-------+
|   UserID  |          rmse         | count |
+--------

  + 'the next major release. Any passed parameters are ignored.')


{'precision_recall_by_user': Columns:
 	UserID	int
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 8694
 
 Data:
 +----------+--------+-----------+--------+-------+
 |  UserID  | cutoff | precision | recall | count |
 +----------+--------+-----------+--------+-------+
 | 30695285 |   1    |    0.0    |  0.0   |   1   |
 | 30695285 |   2    |    0.0    |  0.0   |   1   |
 | 30695285 |   3    |    0.0    |  0.0   |   1   |
 | 30695285 |   4    |    0.0    |  0.0   |   1   |
 | 30695285 |   5    |    0.0    |  0.0   |   1   |
 | 30695285 |   6    |    0.0    |  0.0   |   1   |
 | 30695285 |   7    |    0.0    |  0.0   |   1   |
 | 30695285 |   8    |    0.0    |  0.0   |   1   |
 | 30695285 |   9    |    0.0    |  0.0   |   1   |
 | 30695285 |   10   |    0.0    |  0.0   |   1   |
 +----------+--------+-----------+--------+-------+
 [8694 rows x 5 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more row

### Model 3 : Item Similarity Recommender - Training

In [87]:
model3 = tc.item_similarity_recommender.create(train_set,
			                              user_id='UserID',
                                    item_id='Game',
                                    target='Ranking')

### Model 3 : Item Similarity Recommender - Testing

In [88]:
model3.evaluate(test_set)


Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |    mean_precision   |     mean_recall     |
+--------+---------------------+---------------------+
|   1    | 0.32919254658385094 |   0.16071200411688  |
|   2    |  0.2474120082815734 | 0.21565839388833424 |
|   3    | 0.20220841959972394 | 0.24857016964943604 |
|   4    | 0.17184265010351968 | 0.26279689669636974 |
|   5    | 0.14989648033126296 | 0.27316891737481314 |
|   6    |  0.1342305037957212 |  0.2852597561901469 |
|   7    | 0.12333629103815437 | 0.29389099423380977 |
|   8    | 0.11413043478260866 |  0.3001590985093651 |
|   9    | 0.10559006211180126 |  0.3026206755612717 |
|   10   | 0.09896480331262938 | 0.30812000311546095 |
+--------+---------------------+---------------------+
[10 rows x 3 columns]


Overall RMSE: 3.0048352519037644

Per User RMSE (best)
+-----------+----------------------+-------+
|   UserID  |         rmse         | count |
+---------

  + 'the next major release. Any passed parameters are ignored.')


{'precision_recall_by_user': Columns:
 	UserID	int
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 8694
 
 Data:
 +----------+--------+---------------------+--------+-------+
 |  UserID  | cutoff |      precision      | recall | count |
 +----------+--------+---------------------+--------+-------+
 | 30695285 |   1    |         1.0         |  1.0   |   1   |
 | 30695285 |   2    |         0.5         |  1.0   |   1   |
 | 30695285 |   3    |  0.3333333333333333 |  1.0   |   1   |
 | 30695285 |   4    |         0.25        |  1.0   |   1   |
 | 30695285 |   5    |         0.2         |  1.0   |   1   |
 | 30695285 |   6    | 0.16666666666666666 |  1.0   |   1   |
 | 30695285 |   7    | 0.14285714285714285 |  1.0   |   1   |
 | 30695285 |   8    |        0.125        |  1.0   |   1   |
 | 30695285 |   9    |  0.1111111111111111 |  1.0   |   1   |
 | 30695285 |   10   |         0.1         |  1.0   |   1   |
 +----------+--------+---------------------+--------+-------+


### Dataset Reduction

### Removing all the users with less than 5 games played

In [89]:
counts = clean_df['UserID'].value_counts()

cleanest_df = clean_df[~clean_df['UserID'].isin(counts[counts < 5].index)]
cleanest_df["UserID"].value_counts()

62990992     1075
33865373      785
30246419      766
58345543      667
76892907      597
             ... 
253814117       5
299991818       5
164961511       5
82438562        5
136891549       5
Name: UserID, Length: 3761, dtype: int64

### Comparison with original clean dataset

In [90]:
clean_df["UserID"].value_counts()

62990992     1075
33865373      785
30246419      766
58345543      667
76892907      597
             ... 
149194171       1
207945140       1
130315685       1
282733934       1
214618086       1
Name: UserID, Length: 12393, dtype: int64

### Convert the dataframe into SFrame

In [0]:
dfC = tc.SFrame(cleanest_df)

### Create the Training dataset and Testing dataset

In [0]:
train_set2, test_set2 = tc.recommender.util.random_split_by_user(dfC,"UserID","Game",item_test_proportion= 0.2)

### Model 1 : Factorization Recommender - Training

In [93]:
model1C = tc.factorization_recommender.create(train_set2,
			                              user_id='UserID',
                                    item_id='Game',
                                    target='Ranking')

### Model 1 : Factorization Recommender - Testing

In [99]:
model1C.evaluate(test_set2)


Precision and recall summary statistics by cutoff
+--------+----------------------+---------------------+
| cutoff |    mean_precision    |     mean_recall     |
+--------+----------------------+---------------------+
|   1    |  0.0487264673311185  |  0.0173474558923125 |
|   2    | 0.047619047619047616 | 0.02376847026118361 |
|   3    | 0.05241786637135473  | 0.04012994138337609 |
|   4    | 0.058970099667774084 | 0.06540515419523148 |
|   5    | 0.05891472868217055  | 0.08996028431485315 |
|   6    | 0.05518641565153195  | 0.10282932912055046 |
|   7    | 0.050150292675209616 | 0.10775660220529724 |
|   8    | 0.04595791805094129  | 0.11184977900887712 |
|   9    | 0.04318936877076411  | 0.11697474250943082 |
|   10   | 0.04053156146179402  | 0.11870193722894065 |
+--------+----------------------+---------------------+
[10 rows x 3 columns]


Overall RMSE: 4.1286615977438545

Per User RMSE (best)
+----------+----------------------+-------+
|  UserID  |         rmse         | count 

  + 'the next major release. Any passed parameters are ignored.')


{'precision_recall_by_user': Columns:
 	UserID	int
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 16254
 
 Data:
 +----------+--------+-----------+--------+-------+
 |  UserID  | cutoff | precision | recall | count |
 +----------+--------+-----------+--------+-------+
 | 59945701 |   1    |    0.0    |  0.0   |   10  |
 | 59945701 |   2    |    0.0    |  0.0   |   10  |
 | 59945701 |   3    |    0.0    |  0.0   |   10  |
 | 59945701 |   4    |    0.0    |  0.0   |   10  |
 | 59945701 |   5    |    0.0    |  0.0   |   10  |
 | 59945701 |   6    |    0.0    |  0.0   |   10  |
 | 59945701 |   7    |    0.0    |  0.0   |   10  |
 | 59945701 |   8    |    0.0    |  0.0   |   10  |
 | 59945701 |   9    |    0.0    |  0.0   |   10  |
 | 59945701 |   10   |    0.0    |  0.0   |   10  |
 +----------+--------+-----------+--------+-------+
 [16254 rows x 5 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more r

### Model 2 : Ranking Factorization Recommender - Training

In [100]:
model2C = tc.ranking_factorization_recommender.create(train_set2,
			                              user_id='UserID',
                                    item_id='Game',
                                    target='Ranking')

### Model 2 : Ranking Factorization Recommender - Testing

In [101]:
model2C.evaluate(test_set2)


Precision and recall summary statistics by cutoff
+--------+---------------------+----------------------+
| cutoff |    mean_precision   |     mean_recall      |
+--------+---------------------+----------------------+
|   1    | 0.05647840531561461 | 0.020404076274514273 |
|   2    | 0.08582502768549283 | 0.055954147129964375 |
|   3    | 0.09228497600590624 | 0.08466152707700993  |
|   4    | 0.08887043189368772 | 0.10308389366679085  |
|   5    | 0.08261351052048725 | 0.12062969722138327  |
|   6    | 0.07622739018087858 | 0.13139419307168243  |
|   7    | 0.07166587565258661 | 0.13976511227227756  |
|   8    | 0.06741417497231453 | 0.14856042770470781  |
|   9    | 0.06619908945490341 | 0.15811841337958524  |
|   10   | 0.06378737541528236 | 0.17006146260139873  |
+--------+---------------------+----------------------+
[10 rows x 3 columns]


Overall RMSE: 5.526610236208746

Per User RMSE (best)
+-----------+-----------------------+-------+
|   UserID  |          rmse         | cou

  + 'the next major release. Any passed parameters are ignored.')


{'precision_recall_by_user': Columns:
 	UserID	int
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 16254
 
 Data:
 +----------+--------+-----------+--------+-------+
 |  UserID  | cutoff | precision | recall | count |
 +----------+--------+-----------+--------+-------+
 | 59945701 |   1    |    0.0    |  0.0   |   10  |
 | 59945701 |   2    |    0.0    |  0.0   |   10  |
 | 59945701 |   3    |    0.0    |  0.0   |   10  |
 | 59945701 |   4    |    0.0    |  0.0   |   10  |
 | 59945701 |   5    |    0.0    |  0.0   |   10  |
 | 59945701 |   6    |    0.0    |  0.0   |   10  |
 | 59945701 |   7    |    0.0    |  0.0   |   10  |
 | 59945701 |   8    |    0.0    |  0.0   |   10  |
 | 59945701 |   9    |    0.0    |  0.0   |   10  |
 | 59945701 |   10   |    0.0    |  0.0   |   10  |
 +----------+--------+-----------+--------+-------+
 [16254 rows x 5 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more r

### Model 3 : Item Similarity Recommender - Training

In [95]:
model3C = tc.item_similarity_recommender.create(train_set2,
			                              user_id='UserID',
                                    item_id='Game',
                                    target='Ranking')

### Model 3 : Item Similarity Recommender - Testing

In [98]:
model3C.evaluate(test_set2)

  + 'the next major release. Any passed parameters are ignored.')



Precision and recall summary statistics by cutoff
+--------+---------------------+---------------------+
| cutoff |    mean_precision   |     mean_recall     |
+--------+---------------------+---------------------+
|   1    |  0.4695459579180509 | 0.17801340812324148 |
|   2    |  0.3704318936877078 | 0.25369770744080505 |
|   3    |  0.301218161683278  |  0.2886499908653039 |
|   4    | 0.25526024363233674 | 0.31338863532185574 |
|   5    | 0.22414174972314507 |  0.3333855016947847 |
|   6    |  0.2033960871170174 | 0.35095555007932827 |
|   7    | 0.18652112007593738 |  0.3681066895381865 |
|   8    |  0.1705426356589147 |  0.3777562956910776 |
|   9    | 0.15885320536483316 |  0.3889573115375226 |
|   10   | 0.14961240310077528 |  0.4012447681034705 |
+--------+---------------------+---------------------+
[10 rows x 3 columns]


Overall RMSE: 2.4146158152424086

Per User RMSE (best)
+----------+--------------------+-------+
|  UserID  |        rmse        | count |
+----------+----

{'precision_recall_by_user': Columns:
 	UserID	int
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 16254
 
 Data:
 +----------+--------+---------------------+--------+-------+
 |  UserID  | cutoff |      precision      | recall | count |
 +----------+--------+---------------------+--------+-------+
 | 59945701 |   1    |         0.0         |  0.0   |   10  |
 | 59945701 |   2    |         0.5         |  0.1   |   10  |
 | 59945701 |   3    |  0.6666666666666666 |  0.2   |   10  |
 | 59945701 |   4    |         0.5         |  0.2   |   10  |
 | 59945701 |   5    |         0.4         |  0.2   |   10  |
 | 59945701 |   6    |  0.3333333333333333 |  0.2   |   10  |
 | 59945701 |   7    | 0.42857142857142855 |  0.3   |   10  |
 | 59945701 |   8    |        0.375        |  0.3   |   10  |
 | 59945701 |   9    |  0.3333333333333333 |  0.3   |   10  |
 | 59945701 |   10   |         0.3         |  0.3   |   10  |
 +----------+--------+---------------------+--------+-------+

### Compilation of Results

In [103]:
Data = {'Factorization Recommender RMSE':  ['4.499 ', '4.128'],
        'Ranking Factorization Recommender RMSE': ['8.479','5.526'],
        'Item Similarity RMSE':  ['3.004', '2.414'],
         'Actions':["Original","Cleaned"]}

tab = pd.DataFrame (Data, columns = ['Factorization Recommender RMSE','Ranking Factorization Recommender RMSE','Item Similarity RMSE','Actions'])
tab.set_index("Actions",inplace = True)
tab

Unnamed: 0_level_0,Factorization Recommender RMSE,Ranking Factorization Recommender RMSE,Item Similarity RMSE
Actions,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Original,4.499,8.479,3.004
Cleaned,4.128,5.526,2.414
