In [41]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import difflib
from sklearn import linear_model
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from surprise import Reader, Dataset
from surprise import SVD
from surprise import SVDpp
from surprise import accuracy
from surprise.model_selection import train_test_split

In [42]:
df = pd.read_excel(r'C:\4) Fourth Year\Capstone\Game analysis Dataset\Metacritic Data\Video_Game_Sales.xlsx')

In [43]:
df.head()

Unnamed: 0,Index,Name,User_Id,Platform,Year_of_Release,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Critic_Score,Critic_Count,User_Score,User_Count,Rating
0,0,Wii Sports,10240,Wii,2006,Sports,Nintendo,41.36,28.96,3.77,8.45,82.54,76,51,8.0,324,E
1,1,Super Mario Bros.,10258,NES,1985,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,76,51,8.0,324,E
2,2,Mario Kart Wii,10268,Wii,2008,Racing,Nintendo,15.68,12.8,3.79,3.29,35.57,82,73,8.3,712,E
3,3,Wii Sports Resort,10253,Wii,2009,Sports,Nintendo,15.61,10.95,3.28,2.95,32.78,80,73,8.0,193,E
4,4,Pokemon Red/Pokemon Blue,10213,G,1996,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37,80,73,8.0,193,E


In [44]:
df['Platform']=df['Platform'].astype(str)
df['Genre']=df['Genre'].astype(str)
df['Publisher']=df['Publisher'].astype(str)
df['Rating']=df['Rating'].astype(str)

In [45]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17416 entries, 0 to 17415
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Index            17416 non-null  int64  
 1   Name             17416 non-null  object 
 2   User_Id          17416 non-null  int64  
 3   Platform         17416 non-null  object 
 4   Year_of_Release  17416 non-null  int64  
 5   Genre            17416 non-null  object 
 6   Publisher        17416 non-null  object 
 7   NA_Sales         17416 non-null  float64
 8   EU_Sales         17416 non-null  float64
 9   JP_Sales         17416 non-null  float64
 10  Other_Sales      17416 non-null  float64
 11  Global_Sales     17416 non-null  float64
 12  Critic_Score     17416 non-null  int64  
 13  Critic_Count     17416 non-null  int64  
 14  User_Score       17416 non-null  float64
 15  User_Count       17416 non-null  int64  
 16  Rating           17416 non-null  object 
dtypes: float64(6

In [46]:
df['combined_features'] = df[['Platform', 'Publisher', 'Genre', 'Rating']].apply(lambda x: ' '.join(x), axis=1)

In [47]:
# Gettingcount of each word in the combined column
cv = CountVectorizer()
count_matrix = cv.fit_transform(df['combined_features'])

In [48]:
cosine_sim = cosine_similarity(count_matrix)

In [49]:
def recommend_by_content(title, cosine_sim = cosine_sim):
    recommended_games = []
    idx = df[df['Name'] == title].index[0]
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)
    top_20_indexes = list(score_series.iloc[1:21].index)
    for i in top_20_indexes:
        recommended_games.append(df['Name'][i])
    return recommended_games

In [50]:
# Loading the dataset
reader = Reader()
data = Dataset.load_from_df(df[['User_Id', 'Name', 'User_Score']], reader)

In [51]:
svdpp = SVDpp()
trainset = data.build_full_trainset()
svdpp.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x179530ec6d0>

In [52]:
def recommend_by_cf(user_id):
    recommended_games = []
    # Get the inner id of the user
    user_inner_id = svdpp.trainset.to_inner_uid(user_id)
    # Get the games that the user has rated
    user_games = df[df['User_Id'] == user_id]
    user_games_names = user_games['Name'].tolist()
    all_games = df['Name'].unique()
    # Get the games that the user has not played
    not_played_games = set(all_games) - set(user_games_names)
    for game in not_played_games:
        iid = svdpp.trainset.to_inner_iid(game)
        # Predict the rating for the given user and game
        user_rating = svdpp.predict(user_inner_id, iid)
        # Append the game to the recommended games list if the estimated rating is greater than or equal to 6
        if user_rating.est >= 6:
            recommended_games.append(game)
    return recommended_games

In [53]:
def recommend(user_id, title):
    recommended_games_content = recommend_by_content(title)
    recommended_games_cf = recommend_by_cf(user_id)
    final_recommendation = list(set(recommended_games_content + recommended_games_cf))
    
    # predict ratings for the recommended games using SVDpp
    predicted_ratings = {}
    for game in final_recommendation:
        iid = svdpp.trainset.to_inner_iid(game)
        user_rating = svdpp.predict(user_id, iid)
        predicted_ratings[game] = user_rating.est
    sorted_ratings = sorted(predicted_ratings.items(), key=lambda x: x[1], reverse=True)
    recommended_games_svdpp = [x[0] for x in sorted_ratings]
    
    # combine the three recommendation lists and remove duplicates
    final_recommendation = list(set(recommended_games_content + recommended_games_cf + recommended_games_svdpp))
    return final_recommendation[:20]

In [54]:
user_id = 10240
title = "Wii Sports"
output = recommend(user_id, title)
for i, game in enumerate(output, 1):
    print(f"{i}. {game}")

1. Active Health with Carol Vorderman
2. Punch-Out!!
3. Mario Strikers Charged
4. Mario Super Sluggers
5. Baseball
6. Remington Super Slam Hunting: North America
7. Mario Sports Mix
8. Wii Sports Resort
9. Mario Tennis
10. Cabelas Legendary Adventures
11. Tennis
12. New Play Control! Mario Power Tennis
13. Wii Fit Plus
14. Super Swing Golf
15. My Fitness Coach
16. Golf
17. Wii Fit


In [55]:
trainset, testset = train_test_split(data, test_size=0.2)

In [56]:
svdpp = SVDpp()

In [57]:
svdpp.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x17958587d90>

In [58]:
predictions = svdpp.test(testset)

In [59]:
accuracy.rmse(predictions)

RMSE: 2.5361


2.536120212863274

In [60]:
accuracy.mae(predictions)

MAE:  2.2877


2.2877243012938226