# Recommendation System

In this file I'm going to create a recommendation system by using a ML model.

At the end, we will save the model for recommend future data.

# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity  
from sklearn.feature_extraction.text import TfidfVectorizer

In [5]:
Games = pd.read_csv('Clean_Data/Games.csv')
Games

Unnamed: 0,publisher,genres,app_name,title,release_date,tags,price,early_access,developer,release_year
0,No data,No data,No data,No data,No data,No data,0.00,No data,No data,No data
1,Kotoshiro,Action,Lost Summoner Kitty,Lost Summoner Kitty,2018-01-04 00:00:00,"['Strategy', 'Action', 'Indie', 'Casual', 'Sim...",4.99,0.0,Kotoshiro,2018.0
2,"Making Fun, Inc.",Free to Play,Ironbound,Ironbound,2018-01-04 00:00:00,"['Free to Play', 'Strategy', 'Indie', 'RPG', '...",0.00,0.0,Secret Level SRL,2018.0
3,Poolians.com,Casual,Real Pool 3D - Poolians,Real Pool 3D - Poolians,2017-07-24 00:00:00,"['Free to Play', 'Simulation', 'Sports', 'Casu...",0.00,0.0,Poolians.com,2017.0
4,彼岸领域,Action,弹炸人2222,弹炸人2222,2017-12-07 00:00:00,"['Action', 'Adventure', 'Casual']",0.99,0.0,彼岸领域,2017.0
...,...,...,...,...,...,...,...,...,...,...
32128,Ghost_RUS Games,Casual,Colony On Mars,Colony On Mars,2018-01-04 00:00:00,"['Strategy', 'Indie', 'Casual', 'Simulation']",1.99,0.0,"Nikita ""Ghost_RUS""",2018.0
32129,Sacada,Casual,LOGistICAL: South Africa,LOGistICAL: South Africa,2018-01-04 00:00:00,"['Strategy', 'Indie', 'Casual']",4.99,0.0,Sacada,2018.0
32130,Laush Studio,Indie,Russian Roads,Russian Roads,2018-01-04 00:00:00,"['Indie', 'Simulation', 'Racing']",1.99,0.0,Laush Dmitriy Sergeevich,2018.0
32131,SIXNAILS,Casual,EXIT 2 - Directions,EXIT 2 - Directions,2017-09-02 00:00:00,"['Indie', 'Casual', 'Puzzle', 'Singleplayer', ...",4.99,0.0,"xropi,stev3ns",2017.0


In [6]:
Games.columns

Index(['publisher', 'genres', 'app_name', 'title', 'release_date', 'tags',
       'price', 'early_access', 'developer', 'release_year'],
      dtype='object')

# Functions

In [7]:
def recommend_game(id):
    game =  pd.read_csv("Clean_Data/Games.csv")

    col_drop = ["release_date", "tags", "price", "early_access", "developer", "release_year"]
    game.drop(columns= col_drop, inplace=True )

    # We only choose 20% of data because it's a lot of information to process
    game = game.sample(frac=1/5, random_state=42).reset_index()

    # Let's generate a matrix with a text process 
    tfidf = TfidfVectorizer(stop_words="english")
    game["genres"] = game["genres"].fillna("")
    tfidf_matrix = tfidf.fit_transform(game["genres"])  # Vector TF-IDF

    
    # Training Cosine Similarity

    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix) 

    idx = game[game["id"] == id].index[0]  # It finds the id of the game
    sim_scores = list(enumerate(cosine_sim [idx]))  # Evaluates similiraty
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)  
    sim_scores = sim_scores[1:6]  # Top 5 recommended games, including the analyzed game
    game_indices = [i[0] for i in sim_scores]  

    print(f"Game:  {game[game['id'] == id].iloc[0]}") # The selected game

    
    return game[["genres","title","id"]].iloc[game_indices] # We return the recommended games

# Let's try it out

In [12]:
Games['title']

0                         No data
1             Lost Summoner Kitty
2                       Ironbound
3         Real Pool 3D - Poolians
4                         弹炸人2222
                   ...           
32128              Colony On Mars
32129    LOGistICAL: South Africa
32130               Russian Roads
32131         EXIT 2 - Directions
32132                     No data
Name: title, Length: 32133, dtype: object

In [14]:
recommend_game("271100")

KeyError: 'id'