## Modelo de recomendación

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ast

In [2]:
games = pd.read_csv('games.csv', sep=',')

In [3]:
games = games.sample(frac=0.10) # Remove or comment out this code cell if you want to use the complete games dataframe.

In [4]:
games_model = games.copy()
games_model = games_model.drop(['release_date', 'app_name', 'publisher', 'developer', 'early_access'], axis=1)

In [5]:
games_model['genres'] = games_model['genres'].apply(ast.literal_eval)
games_model['tags'] = games_model['tags'].apply(ast.literal_eval)
games_model['specs'] = games_model['specs'].apply(ast.literal_eval)

In [6]:
games_model['genres'] = games_model['genres'].apply(lambda x: ', '.join(x))
games_model['tags'] = games_model['tags'].apply(lambda x: ', '.join(x))
games_model['specs'] = games_model['specs'].apply(lambda x: ', '.join(x))

In [7]:
games_model.head(1)

Unnamed: 0,genres,title,url,tags,reviews_url,specs,price,id
25344,"Indie, Simulation",Luna's Wandering Stars - Original Soundtrack,http://store.steampowered.com/app/337570/Lunas...,"Indie, Simulation",http://steamcommunity.com/app/337570/reviews/?...,"Single-player, Downloadable Content",4.99,337570.0


In [8]:
def clean_text(tags):
    result = str(tags).lower()
    return(result.replace(',',' '))

In [9]:
games_model['tags'] = games_model['tags'].apply(clean_text)
games_model['genres'] = games_model['genres'].apply(clean_text)
games_model['specs'] = games_model['specs'].apply(clean_text)

In [10]:
def clean_text2(specs):
    result = str(specs).lower()
    return(result.replace('-',''))

In [11]:
games_model['tags'] = games_model['tags'].apply(clean_text2)
games_model['genres'] = games_model['genres'].apply(clean_text2)
games_model['specs'] = games_model['specs'].apply(clean_text2)

In [12]:
games_model = games_model[['url', 'id', 'genres', 'tags', 'specs', 'price']]
games_model['id'] = games_model['id'].astype(int)
games_model.head(1)

Unnamed: 0,url,id,genres,tags,specs,price
25344,http://store.steampowered.com/app/337570/Lunas...,337570,indie simulation,indie simulation,singleplayer downloadable content,4.99


In [13]:
games_model.to_csv('./API_csv/ML_games_model.csv', index=False)

In [14]:
games_selected2 = games_model.drop(['url'],axis=1)

games_selected2 = games_selected2[games_selected2.columns[1:]].apply(
    lambda x: ' '.join(x.dropna().astype(str)),
    axis=1
)
print(games_selected2.shape)
print(games_selected2.head())

(3213,)
25344    indie  simulation indie  simulation singleplay...
29179    action  adventure  indie horror  indie  advent...
11456    no info available action  scifi  movie caption...
9737     simulation simulation singleplayer  downloadab...
25905    casual  indie spelling  typing  casual  indie ...
dtype: object


In [15]:
type(games_selected2)

pandas.core.series.Series

In [16]:
games_selected2.to_csv('./API_csv/ML_games_selected2.csv', index=False, header=False)

In [17]:
vectorizer = CountVectorizer()
vectorized = vectorizer.fit_transform(games_selected2)

In [18]:
vectorized

<3213x458 sparse matrix of type '<class 'numpy.int64'>'
	with 48220 stored elements in Compressed Sparse Row format>

In [19]:
similarities = cosine_similarity(vectorized)
print(similarities)

[[1.         0.39629696 0.08703883 ... 0.35355339 0.12309149 0.14291549]
 [0.39629696 1.         0.17739372 ... 0.76060873 0.62718151 0.23302069]
 [0.08703883 0.17739372 1.         ... 0.18463724 0.38569461 0.08956222]
 ...
 [0.35355339 0.76060873 0.18463724 ... 1.         0.73983004 0.24253563]
 [0.12309149 0.62718151 0.38569461 ... 0.73983004 1.         0.16888013]
 [0.14291549 0.23302069 0.08956222 ... 0.24253563 0.16888013 1.        ]]


In [20]:
games_vect = pd.DataFrame(similarities, columns=games_model['id'], index=games_model['id']).reset_index()
games_vect

id,id.1,337570,293180,669200,570481,373970,658271,636170,13250,444770,...,708800,678290,201277,436870,336810,40950,410290,564100,529760,562590
0,337570,1.000000,0.396297,0.087039,0.696311,0.284747,0.198680,0.321634,0.160817,0.333333,...,0.522233,0.339683,0.280056,0.252646,0.544331,0.301232,0.270501,0.353553,0.123091,0.142915
1,293180,0.396297,1.000000,0.177394,0.177394,0.419137,0.449921,0.546268,0.582686,0.566139,...,0.709575,0.615385,0.237826,0.371884,0.369800,0.321588,0.336910,0.760609,0.627182,0.233021
2,669200,0.087039,0.177394,1.000000,0.090909,0.049568,0.138343,0.055989,0.223957,0.058026,...,0.272727,0.177394,0.073127,0.131940,0.071067,0.044947,0.047088,0.184637,0.385695,0.089562
3,570481,0.696311,0.177394,0.090909,1.000000,0.099136,0.415029,0.223957,0.167968,0.232104,...,0.181818,0.118262,0.292509,0.175920,0.284268,0.314627,0.235441,0.307729,0.128565,0.029854
4,373970,0.284747,0.419137,0.049568,0.099136,1.000000,0.452589,0.641091,0.091584,0.569495,...,0.297409,0.483619,0.478471,0.503581,0.503740,0.343100,0.539171,0.570483,0.490700,0.244169
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3208,40950,0.301232,0.321588,0.044947,0.314627,0.343100,0.273594,0.276818,0.359864,0.286888,...,0.134840,0.204647,0.506171,0.217443,0.316228,1.000000,0.465620,0.273861,0.286039,0.250924
3209,410290,0.270501,0.336910,0.047088,0.235441,0.539171,0.358287,0.522013,0.203005,0.450835,...,0.282529,0.367538,0.568166,0.432825,0.515347,0.465620,1.000000,0.478183,0.266371,0.371124
3210,564100,0.353553,0.760609,0.184637,0.307729,0.570483,0.702439,0.720193,0.341144,0.746390,...,0.615457,0.680545,0.346552,0.476393,0.433013,0.273861,0.478183,1.000000,0.739830,0.242536
3211,529760,0.123091,0.627182,0.385695,0.128565,0.490700,0.586939,0.514675,0.356313,0.574427,...,0.385695,0.543557,0.361961,0.310985,0.251259,0.286039,0.266371,0.739830,1.000000,0.168880


In [21]:
games_u_p = games.drop(['publisher','genres','app_name','id','release_date','tags','reviews_url','specs', 'early_access', 'developer'], axis=1)
games_u_p = games_u_p.reset_index()
games_u_p.head(1)

Unnamed: 0,index,title,url,price
0,25344,Luna's Wandering Stars - Original Soundtrack,http://store.steampowered.com/app/337570/Lunas...,4.99


In [22]:
games_u_p.to_csv('./API_csv/ML_games_u_p.csv', index=False)

In [23]:
games_concat = pd.concat([games_u_p, games_vect], axis=1)

In [24]:
games_concat.head()

Unnamed: 0,index,title,url,price,id,337570,293180,669200,570481,373970,...,708800,678290,201277,436870,336810,40950,410290,564100,529760,562590
0,25344,Luna's Wandering Stars - Original Soundtrack,http://store.steampowered.com/app/337570/Lunas...,4.99,337570,1.0,0.396297,0.087039,0.696311,0.284747,...,0.522233,0.339683,0.280056,0.252646,0.544331,0.301232,0.270501,0.353553,0.123091,0.142915
1,29179,Overcast - Walden and the Werewolf,http://store.steampowered.com/app/293180/Overc...,0.99,293180,0.396297,1.0,0.177394,0.177394,0.419137,...,0.709575,0.615385,0.237826,0.371884,0.3698,0.321588,0.33691,0.760609,0.627182,0.233021
2,11456,Drone Wars,http://store.steampowered.com/app/669200/Drone...,4.99,669200,0.087039,0.177394,1.0,0.090909,0.049568,...,0.272727,0.177394,0.073127,0.13194,0.071067,0.044947,0.047088,0.184637,0.385695,0.089562
3,9737,Birthdays the Beginning / バースデイズ・ザ・ビギニング - Dig...,http://store.steampowered.com/app/570481/Birth...,4.99,570481,0.696311,0.177394,0.090909,1.0,0.099136,...,0.181818,0.118262,0.292509,0.17592,0.284268,0.314627,0.235441,0.307729,0.128565,0.029854
4,25905,Letter Quest: Grimm's Journey Remastered,http://store.steampowered.com/app/373970/Lette...,7.99,373970,0.284747,0.419137,0.049568,0.099136,1.0,...,0.297409,0.483619,0.478471,0.503581,0.50374,0.3431,0.539171,0.570483,0.4907,0.244169


In [29]:
games_concat.to_csv('./API_csv/ML_games_concat.csv', index = False) # Remove or comment out this code cell if the entire dataframe is being used.

In [26]:
def game_recommendation(id: int):
    try:
        print('Wait a minute and you will see our recommendations. Enjoy! ')
        recommendations = pd.DataFrame(games_concat.nlargest(6,id)['id'])
        recommendations = recommendations[recommendations['id']!=id]
        title = pd.DataFrame(games_concat.nlargest(6,id)['title'])
        price = pd.DataFrame(games_concat.nlargest(6,id)['price'])
        url = pd.DataFrame(games_concat.nlargest(6,id)['url'])
        result2 = pd.concat([recommendations,title, price[1:11],url[1:]], axis = 1)
        print(result2)
    except:
        print('Sorry, we can not find a suitable match. Try a different game! ')

In [31]:
game_recommendation(570481)

Wait a minute and you will see our recommendations. Enjoy! 
            id                                              title  price  \
448   433747.0  FSX: Steam Edition - Farm Strips Vol 1: South ...   9.99   
1669  542021.0                              EEP 13 Expert upgrade  19.99   
2428  622352.0        FSX Steam Edition: Toposim Caribbean Add-On   9.99   
2510  494521.0   FSX Steam Edition: McDonnell F3H-2 Demon™ Add-On   9.99   
53    494526.0  FSX Steam Edition: Night Environment: Florida ...  24.99   
3          NaN  Birthdays the Beginning / バースデイズ・ザ・ビギニング - Dig...    NaN   

                                                    url  
448   http://store.steampowered.com/app/433747/FSX_S...  
1669  http://store.steampowered.com/app/542021/EEP_1...  
2428  http://store.steampowered.com/app/622352/FSX_S...  
2510  http://store.steampowered.com/app/494521/FSX_S...  
53    http://store.steampowered.com/app/494526/FSX_S...  
3                                                   NaN  
