# Collaborative based filtering

Collaborative filtering based models finds similaries between items or users through ratings or items that other users have liked as well.

### Importing necessary packages


In [1]:
# installing the library suprise
!pip install scikit-surprise

Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/f5/da/b5700d96495fb4f092be497f02492768a3d96a3f4fa2ae7dea46d4081cfa/scikit-surprise-1.1.0.tar.gz (6.4MB)
[K     |████████████████████████████████| 6.5MB 2.5MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.0-cp36-cp36m-linux_x86_64.whl size=1675410 sha256=ef1ff2db8938bbb157145f47cdc42b8b66ef53e9dd1cd3d4dac8b2388924df98
  Stored in directory: /root/.cache/pip/wheels/cc/fa/8c/16c93fccce688ae1bde7d979ff102f7bee980d9cfeb8641bcf
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.0


In [2]:
# importing necessary packages
import pandas as pd
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 100)
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.preprocessing import StandardScaler 
from sklearn import preprocessing
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.model_selection import train_test_split

from surprise.model_selection import cross_validate
from surprise.prediction_algorithms import SVD
from surprise.prediction_algorithms import knns
from surprise import accuracy
from surprise.prediction_algorithms import KNNWithMeans, KNNBasic, KNNBaseline
from surprise.model_selection import GridSearchCV
from surprise.model_selection import train_test_split

from surprise import Reader, Dataset
import numpy as np

import nltk
from nltk.corpus import stopwords
from nltk import word_tokenize, FreqDist
nltk.download('punkt')
import re
import string
import os

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

  import pandas.util.testing as tm


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


### Importing and checking through the data sets

In [6]:
# unziping the folder containing the steam_rs.csv data
!unzip steam_rs.zip

Archive:  steam_rs.zip
  inflating: steam_rs.csv            


In [0]:
# importing the data set
steam = pd.read_csv('steam_rs.csv')

In [0]:
# removing release_date column
steam = steam.drop('release_date', axis = 1)

In [109]:
# displaying the data frame
steam.head()

Unnamed: 0,id,appid,name,purchase,hours_of_play,developer,publisher,positive,negative,english,platforms,required_age,categories,genres,steamspy_tags,achievements,average_playtime,median_playtime,owners,detailed_description,about_the_game,short_description,price,rank
0,151603712,570,Dota 2,purchase,0.0,Valve,Valve,1097301,194384,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,23944,801,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0,84.95113
1,151603712,570,Dota 2,play,0.5,Valve,Valve,1097301,194384,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,23944,801,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0,84.95113
2,187131847,570,Dota 2,purchase,0.0,Valve,Valve,1097301,194384,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,23944,801,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0,84.95113
3,187131847,570,Dota 2,play,2.3,Valve,Valve,1097301,194384,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,23944,801,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0,84.95113
4,176410694,570,Dota 2,purchase,0.0,Valve,Valve,1097301,194384,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,23944,801,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0,84.95113


In [0]:
# modifying the columns in the data frame
steam = steam[['id', 'appid', 'name', 'rank', 'genres', 'steamspy_tags', 'short_description', 'hours_of_play']]

Since Steam does not use ratings for games, i will base the recommendations on the hours of play for users.

In [0]:
# alterting the data frame further to show the id, game name and its hours of play
steam = steam[['id', 'name', 'hours_of_play']]

In [112]:
# displaying shape of data frame
steam.shape

(99632, 3)

In [0]:
# dropping the duplicate ids, and keeping the most recent ones as making sure it dosent remove all the unique games in the data frame
steam.drop_duplicates(subset = ['id', 'name'],
                     keep = 'last', inplace = True)

In [114]:
# displaying shape of data frame
steam.shape

(60446, 3)

In [0]:
# transforming the current data set into something that is compatible with surpirse 
reader = Reader()
steam = Dataset.load_from_df(steam,reader)

### Train/ test split

The train and test data sets will contain randomly selected user ratings and items instead of the entire list of users and items. 80% of these ratings reside in the training set and the remaining 20% is in the test set.

In [0]:
# preforming train/test split
trainset, testset = train_test_split(steam, test_size=0.2)

In [117]:
# checking the number of items and users in the data set
print('Number of users: ', trainset.n_users, '\n')
print('Number of items: ', trainset.n_items, '\n')

Number of users:  9192 

Number of items:  2190 



From this it can be seen that there is a fewer number of items rather than the number of users.

# Memory Based

For these memory based models I will be using KNNBasic which is a basic collaborative filtering algorithm. In addition to that i will also be using KNNBaseline which is a basic collaborative filtering algorithm taking into account a baseline rating. KNNWithMeans is basic collaborative filtering algorithm, taking into account the mean ratings of each user.

## Cosine similarity

## KNNBasic with cosine similarity (USER BASED)

In [0]:
#   cosine similarity
sim_cos = {'name':'cosine', 'user_based':True}

In [0]:
#   training the model with user_based = True
basic_user = knns.KNNBasic(sim_options=sim_cos)

In [120]:
#   fitting the model
simcos_cv_item = cross_validate(basic_user, steam, measures=['rmse', 'mae'],
                           cv = 3, return_train_measures=True, n_jobs= -1,
                           verbose = True)

Evaluating RMSE, MAE of algorithm KNNBasic on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    242.4288218.3260203.8858221.546915.8991 
MAE (testset)     36.6102 34.2526 33.8152 34.8927 1.2275  
RMSE (trainset)   211.2454223.9997230.7032221.98288.0706  
MAE (trainset)    33.7394 34.9100 35.1172 34.5889 0.6066  
Fit time          29.30   27.72   12.97   23.33   7.36    
Test time         6.60    6.43    3.31    5.45    1.51    


From the results this model appears overfit.

## KNNBasic with cosine similarity (ITEM BASED)


In [0]:
#   cosine similarity
sim_cos = {'name':'cosine', 'user_based':False}

In [0]:
#   training the model with user_based = True
basic_item = knns.KNNBasic(sim_options=sim_cos)

In [123]:
#   fitting the model
simcos_cv_item = cross_validate(basic_item, steam, measures=['rmse', 'mae'],
                           cv = 3, return_train_measures=True, n_jobs= -1,
                           verbose = True)

Evaluating RMSE, MAE of algorithm KNNBasic on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    227.3839207.4568231.1183221.986310.3864 
MAE (testset)     36.9183 34.3086 34.5877 35.2715 1.1700  
RMSE (trainset)   219.4847229.1260217.5205222.04385.0717  
MAE (trainset)    33.8067 35.0938 34.9424 34.6143 0.5744  
Fit time          1.80    2.39    1.45    1.88    0.39    
Test time         2.53    1.77    0.89    1.73    0.67    


Although like the previous model this model also appears quite overfit, there is a slight improvement in the RMSE and MAE scores.1


### Pearson similarity

## KNNBaseline with pearson similarity (USER BASED)

In [0]:
# person similarity
sim_pearson = {'name':'pearson', 'user_based':True}

In [0]:
#   training the model with user_based = True
knn_baseline_user = knns.KNNBaseline(sim_options=sim_pearson)


In [126]:
#   fitting the model
sim_pearson_cv_user = cross_validate(knn_baseline_user, steam, measures=['rmse', 'mae'],
                           cv = 3, return_train_measures=True, n_jobs= -1,
                           verbose = True)



Evaluating RMSE, MAE of algorithm KNNBaseline on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    199.7898226.8098238.0208221.540116.0464 
MAE (testset)     33.6201 36.9353 34.3185 34.9579 1.4270  
RMSE (trainset)   232.4640219.7170213.7066221.96257.8206  
MAE (trainset)    34.9067 33.2342 34.5595 34.2334 0.7207  
Fit time          32.18   33.62   17.56   27.79   7.26    
Test time         7.36    7.64    3.78    6.26    1.76    


From the RMSE and MAE scores obtained, there is evident improvement in this user based KNNBaseline model with pearson similarity. 

## KNNBaseline with pearson similarity (ITEM BASED)

In [0]:
# person similarity
sim_pearson = {'name':'pearson', 'user_based':False}

In [0]:
#   training the model with user_based = False
knn_baseline_user = knns.KNNBaseline(sim_options=sim_pearson)

In [129]:
#   fitting the model
sim_pearson_cv_user = cross_validate(knn_baseline_user, steam, measures=['rmse', 'mae'],
                           cv = 3, return_train_measures=True, n_jobs= -1,
                           verbose = True)

Evaluating RMSE, MAE of algorithm KNNBaseline on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    207.5102211.0380245.8289221.459017.2922 
MAE (testset)     33.4881 34.5468 37.1289 35.0546 1.5291  
RMSE (trainset)   229.0583227.4448209.2483221.91718.9824  
MAE (trainset)    35.0025 34.4459 33.1469 34.1985 0.7775  
Fit time          1.68    2.48    1.53    1.90    0.41    
Test time         2.66    1.99    1.33    1.99    0.55    


This models preformance was also decent

## KNNWithMeans with pearson similarity (USER BASED)

In [0]:
# person similarity
sim_pearson = {'name':'pearson', 'user_based':True}

In [0]:
#   training the model with user_based = True
knn_WithMeans_user = knns.KNNWithMeans(sim_options=sim_pearson)

In [132]:
#   fitting the model
sim_pearson_wm_cv_user = cross_validate(knn_WithMeans_user, steam, measures=['rmse', 'mae'],
                           cv = 3, return_train_measures=True, n_jobs= -1,
                           verbose = True)

Evaluating RMSE, MAE of algorithm KNNWithMeans on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    232.1031199.1552233.6758221.644715.9154 
MAE (testset)     35.4782 33.0398 35.9226 34.8135 1.2673  
RMSE (trainset)   216.9889232.7849216.1470221.97367.6525  
MAE (trainset)    33.8477 35.0340 33.6213 34.1677 0.6195  
Fit time          33.71   31.85   15.50   27.02   8.18    
Test time         6.94    6.92    3.55    5.80    1.59    


KNNWithMeans with pearson similarity (ITEM BASED)

In [0]:
# person similarity
sim_pearson = {'name':'pearson', 'user_based':False}

In [0]:
#   training the model with user_based = False
knn_WithMeans_item = knns.KNNWithMeans(sim_options=sim_pearson)

In [135]:
#   fitting the model
sim_pearson_wm_cv_item = cross_validate(knn_WithMeans_item, steam, measures=['rmse', 'mae'],
                           cv = 3, return_train_measures=True, n_jobs= -1,
                           verbose = True)

Evaluating RMSE, MAE of algorithm KNNWithMeans on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    213.2640226.9039225.9453222.03776.2163  
MAE (testset)     33.9982 35.7953 34.4630 34.7522 0.7616  
RMSE (trainset)   226.3961219.6672220.1621222.07513.0621  
MAE (trainset)    34.4204 33.5332 34.1773 34.0436 0.3743  
Fit time          1.61    2.03    1.48    1.71    0.23    
Test time         2.44    1.73    1.07    1.75    0.56    


# Model based (matrix factorization)

In [136]:
param_grid = {'n_factors':[20, 100],'n_epochs': [5, 10], 'lr_all': [0.002, 0.005],
               'reg_all': [0.4, 0.6]}
gs_model = GridSearchCV(SVD,param_grid=param_grid,n_jobs = -1,joblib_verbose=5)
gs_model.fit(steam)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:   18.0s
[Parallel(n_jobs=-1)]: Done  68 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done  80 out of  80 | elapsed:  2.3min finished


In [137]:
svd = SVD(n_factors=100, n_epochs=10, lr_all=0.005, reg_all=0.4)
svd.fit(trainset)
predictions = svd.test(testset)
print(accuracy.rmse(predictions))

RMSE: 228.5626
228.56259467383634


In [0]:
## Perform a gridsearch with SVD
params = {'n_factors': [20, 50, 100],
         'reg_all': [0.02, 0.05, 0.1]}
g_s_svd = GridSearchCV(SVD,param_grid=params,n_jobs=-1)
g_s_svd.fit(steam)

In [139]:
print(g_s_svd.best_score)
print(g_s_svd.best_params)

{'rmse': 220.99109435864926, 'mae': 35.7652323378286}
{'rmse': {'n_factors': 20, 'reg_all': 0.02}, 'mae': {'n_factors': 20, 'reg_all': 0.02}}


In [0]:
## Perform a gridsearch with SVD
params = {'n_factors': [20],
         'reg_all': [0.02]}
g_s_svd = GridSearchCV(SVD,param_grid=params,n_jobs=-1)
g_s_svd.fit(steam)

In [141]:
print(g_s_svd.best_score)
print(g_s_svd.best_params)

{'rmse': 221.6721178698086, 'mae': 35.76533137214132}
{'rmse': {'n_factors': 20, 'reg_all': 0.02}, 'mae': {'n_factors': 20, 'reg_all': 0.02}}


In [0]:
# person similarity
sim_pearson = {'name':'pearson', 'user_based':False}

In [0]:
#   training the model with user_based = False
knn_baseline_user = knns.KNNBaseline(sim_options=sim_pearson)

In [144]:
#   fitting the model
sim_pearson_cv_user = cross_validate(knn_baseline_user, steam, measures=['rmse', 'mae'],
                           cv = 3, return_train_measures=True, n_jobs= -1,
                           verbose = True)

Evaluating RMSE, MAE of algorithm KNNBaseline on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    220.5855220.1815225.5777222.11492.4541  
MAE (testset)     35.9312 33.5774 35.6132 35.0406 1.0428  
RMSE (trainset)   222.8671223.0652220.3537222.09531.2342  
MAE (trainset)    33.7595 34.9185 33.8926 34.1902 0.5179  
Fit time          1.89    2.44    1.50    1.94    0.38    
Test time         2.49    1.99    1.11    1.86    0.57    


In [145]:
for i in sim_pearson_cv_user.items():
    print(i)
print('-----------------------')
print(np.mean(sim_pearson_cv_user['test_rmse']))

('test_rmse', array([220.58549667, 220.18154675, 225.57766155]))
('train_rmse', array([222.86711883, 223.06520159, 220.3537227 ]))
('test_mae', array([35.93124502, 33.57737256, 35.61323592]))
('train_mae', array([33.75952043, 34.91854224, 33.89255863]))
('fit_time', (1.886951208114624, 2.435455083847046, 1.5027153491973877))
('test_time', (2.487823724746704, 1.9867215156555176, 1.111647367477417))
-----------------------
222.11490166014246


In [146]:
# cross validating with KNNBaseline
knn_baseline = KNNBaseline(sim_options={'name':'pearson', 'user_based':True})
cv_knn_baseline = cross_validate(knn_baseline, steam)

Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.


In [147]:
for i in cv_knn_baseline.items():
    print(i)

np.mean(cv_knn_baseline['test_rmse'])

('test_rmse', array([226.60715599, 232.46565032, 219.96518991, 199.37644504,
       230.56128705]))
('test_mae', array([35.31993904, 34.18757259, 34.35153825, 32.93349793, 37.72368989]))
('fit_time', (28.2661349773407, 26.4842426776886, 27.96272325515747, 24.16923761367798, 23.988571405410767))
('test_time', (3.586242437362671, 3.56370210647583, 3.595970630645752, 3.742413282394409, 3.6931352615356445))


221.79514566111152

# Making recommendations

In [0]:
df_games = pd.read_csv('steam_rs.csv')

In [238]:
df_games.head()

Unnamed: 0,id,appid,name,purchase,hours_of_play,developer,publisher,positive,negative,release_date,english,platforms,required_age,categories,genres,steamspy_tags,achievements,average_playtime,median_playtime,owners,detailed_description,about_the_game,short_description,price,rank
0,151603712,570,Dota 2,purchase,0.0,Valve,Valve,1097301,194384,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,23944,801,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0,84.95113
1,151603712,570,Dota 2,play,0.5,Valve,Valve,1097301,194384,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,23944,801,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0,84.95113
2,187131847,570,Dota 2,purchase,0.0,Valve,Valve,1097301,194384,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,23944,801,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0,84.95113
3,187131847,570,Dota 2,play,2.3,Valve,Valve,1097301,194384,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,23944,801,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0,84.95113
4,176410694,570,Dota 2,purchase,0.0,Valve,Valve,1097301,194384,2013-07-09,1,windows;mac;linux,0,Multi-player;Co-op;Steam Trading Cards;Steam W...,Action;Free to Play;Strategy,Free to Play;MOBA;Strategy,0,23944,801,100000000-200000000,<strong>The most-played game on Steam.</strong...,<strong>The most-played game on Steam.</strong...,"Every day, millions of players worldwide enter...",0.0,84.95113


In [0]:
# dropping the duplicate ids, and keeping the most recent ones as making sure it dosent remove all the unique games in the data frame
df_games.drop_duplicates(subset = ['id', 'name'],
                     keep = 'last', inplace = True)

In [0]:
# dropping the duplicate ids
df_games.drop_duplicates(subset = ['appid'],
                     keep = 'last', inplace = True)

In [0]:
df_games = df_games[['id', 'appid', 'name', 'purchase', 'developer', 'genres', 'rank']]

In [242]:
svd = SVD(n_factors= 20, reg_all=0.02)
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7feaa642ba90>

In [243]:
svd.predict(2, 4)

Prediction(uid=2, iid=4, r_ui=None, est=5, details={'was_impossible': False})

In [0]:
def game_rater(df_games, num, genre=None):
    userID = 1000
    rating_list = []
    while num > 0:
        if genre:
            game = df_games[df_games['genres'].str.contains(genre)].sample(1)
        else:
            game = df_games.sample(1)
        print(game)
        rating = input('How do you rate this game on a scale of 1-5, press n if you have not seen :\n')
        if rating == 'n':
            continue
        else:
            rating_one_game = {'id':userID,'gameId':game['appid'].values[0],'hours_of_play':rating}
            rating_list.append(rating_one_game) 
            num -= 1
    return rating_list      

In [245]:
user_rating = game_rater(df_games, 4, 'RPG')

              id   appid                       name  purchase                  developer  \
76123  193103684  206480  Dungeons & Dragons Online  purchase  Standing Stone Games, LLC   

                                                  genres       rank  
76123  Action;Adventure;Free to Play;Massively Multip...  71.021107  
How do you rate this game on a scale of 1-5, press n if you have not seen :
66
             id   appid     name purchase        developer                        genres  \
76252  60859695  290340  Armello     play  League of Geeks  Adventure;Indie;RPG;Strategy   

            rank  
76252  81.652078  
How do you rate this game on a scale of 1-5, press n if you have not seen :
3
             id   appid                  name  purchase                        developer  \
52565  73726855  200210  Realm of the Mad God  purchase  Wild Shadow Studios, Deca Games   

                                                  genres      rank  
52565  Action;Free to Play;Indie;Massivel

In [0]:
## add the new ratings to the original ratings DataFrame
new_ratings_df = df_games.append(user_rating,ignore_index=True)

In [0]:
new_ratings_df = new_ratings_df[['id', 'appid', 'hours_of_play']]


In [0]:
# transforming the current data set into something that is compatible with surpirse 
reader = Reader()
new_data = Dataset.load_from_df(new_ratings_df,reader)

In [249]:
# train a model using the new combined DataFrame
svd_ = SVD(n_factors= 20, reg_all=0.02)
svd_.fit(new_data.build_full_trainset())

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7feaa827a710>

In [0]:
# make predictions for the user
list_of_games = []
for m_id in df_games['appid'].unique():
    list_of_games.append( (m_id,svd_.predict(1000,m_id)[3]))

In [0]:
# order the predictions from highest to lowest rated
ranked_games = sorted(list_of_games, key=lambda x:x[1], reverse=True)

In [0]:
# return the top n recommendations
def recommended_games(user_ratings, df_games, n):
        for idx, rec in enumerate(user_ratings):
            title = df_games.loc[df_games['appid'] == int(rec[0])]['name']
            print('Recommendation # ', idx+1, ': ', title, '\n')
            n-= 1
            if n == 0:
                break

In [255]:
recommended_games(ranked_games, df_games, 5)

Recommendation #  1 :  9681    Dota 2
Name: name, dtype: object 

Recommendation #  2 :  14327    Team Fortress 2
Name: name, dtype: object 

Recommendation #  3 :  16959    Unturned
Name: name, dtype: object 

Recommendation #  4 :  18230    Warframe
Name: name, dtype: object 

Recommendation #  5 :  18246    Tom Clancy's Rainbow Six Siege
Name: name, dtype: object 

