## Modelo de recomendación

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ast

In [2]:
games = pd.read_csv('games.csv', sep=',')

In [3]:
games = games.sample(frac=0.05) # Remove or comment out this code cell if you want to use the complete games dataframe.

In [4]:
games_model = games.copy()
games_model = games_model.drop(['release_date', 'app_name', 'publisher', 'developer', 'early_access'], axis=1)

In [5]:
games_model['genres'] = games_model['genres'].apply(ast.literal_eval)
games_model['tags'] = games_model['tags'].apply(ast.literal_eval)
games_model['specs'] = games_model['specs'].apply(ast.literal_eval)

In [6]:
games_model['genres'] = games_model['genres'].apply(lambda x: ', '.join(x))
games_model['tags'] = games_model['tags'].apply(lambda x: ', '.join(x))
games_model['specs'] = games_model['specs'].apply(lambda x: ', '.join(x))

In [7]:
games_model.head(1)

Unnamed: 0,genres,title,url,tags,reviews_url,specs,price,id
23921,"Action, Adventure","METAL GEAR ONLINE EXPANSION PACK ""CLOAKED IN S...",http://store.steampowered.com/app/436030/METAL...,"Action, Adventure",http://steamcommunity.com/app/436030/reviews/?...,"Single-player, Multi-player, Downloadable Cont...",3.99,436030.0


As genres, tags and specs columns are lists, their values are concatenated and transformed into a single string.

In [8]:
def clean_text(tags):
    result = str(tags).lower()
    return(result.replace(',',' '))

In [9]:
games_model['tags'] = games_model['tags'].apply(clean_text)
games_model['genres'] = games_model['genres'].apply(clean_text)
games_model['specs'] = games_model['specs'].apply(clean_text)

A function is created to remove commas.

In [10]:
def clean_text2(specs):
    result = str(specs).lower()
    return(result.replace('-',''))

In [11]:
games_model['tags'] = games_model['tags'].apply(clean_text2)
games_model['genres'] = games_model['genres'].apply(clean_text2)
games_model['specs'] = games_model['specs'].apply(clean_text2)

Another function is created to eliminate hyphens.

In [12]:
games_model = games_model[['url', 'id', 'genres', 'tags', 'specs', 'price']]
games_model['id'] = games_model['id'].astype(int)
games_model.head(1)

Unnamed: 0,url,id,genres,tags,specs,price
23921,http://store.steampowered.com/app/436030/METAL...,436030,action adventure,action adventure,singleplayer multiplayer downloadable conten...,3.99


In [14]:
games_selected2 = games_model.drop(['url'],axis=1)

games_selected2 = games_selected2[games_selected2.columns[1:]].apply(
    lambda x: ' '.join(x.dropna().astype(str)),
    axis=1
)
print(games_selected2.shape)
print(games_selected2.head())

(1607,)
23921    action  adventure action  adventure singleplay...
18641    action  adventure  casual  indie  rpg action  ...
3744     action  strategy  early access early access  s...
14549    action  casual casual  action  multiplayer  vr...
26837    action  free to play action  free to play sing...
dtype: object


The genres, tags and specs columns, after being cleaned, are concatenated into a single string.

In [16]:
vectorizer = CountVectorizer()
vectorized = vectorizer.fit_transform(games_selected2)

In [17]:
vectorized

<1607x422 sparse matrix of type '<class 'numpy.int64'>'
	with 23572 stored elements in Compressed Sparse Row format>

In [18]:
similarities = cosine_similarity(vectorized)
print(similarities)

[[1.         0.39223227 0.48280455 ... 0.12403473 0.32025631 0.69230769]
 [0.39223227 1.         0.24618298 ... 0.10540926 0.45927933 0.26148818]
 [0.48280455 0.24618298 1.         ... 0.42817442 0.22613351 0.31382296]
 ...
 [0.12403473 0.10540926 0.42817442 ... 1.         0.06454972 0.16537965]
 [0.32025631 0.45927933 0.22613351 ... 0.06454972 1.         0.12009612]
 [0.69230769 0.26148818 0.31382296 ... 0.16537965 0.12009612 1.        ]]


The previously created strings are vectorized and the cosine of similarity is applied.

In [19]:
games_vect = pd.DataFrame(similarities, columns=games_model['id'], index=games_model['id']).reset_index()
games_vect

id,id.1,436030,594270,305510,660860,362590,758620,48150,241364,588880,...,384020,352900,404010,739460,549940,657361,492942,25910,337430,294938
0,436030,1.000000,0.392232,0.482805,0.330623,0.710772,0.266469,0.121046,0.627646,0.477567,...,0.665750,0.543493,0.294174,0.261488,0.631798,0.733799,0.692308,0.124035,0.320256,0.692308
1,594270,0.392232,1.000000,0.246183,0.374634,0.268462,0.566139,0.462910,0.123091,0.468293,...,0.424334,0.317543,0.333333,0.666667,0.671156,0.534522,0.261488,0.105409,0.459279,0.261488
2,305510,0.482805,0.246183,1.000000,0.415029,0.421329,0.083624,0.113961,0.590909,0.380443,...,0.541313,0.554322,0.492366,0.246183,0.421329,0.427669,0.313823,0.428174,0.226134,0.313823
3,660860,0.330623,0.374634,0.415029,1.000000,0.339441,0.254514,0.346844,0.138343,0.315789,...,0.476910,0.421776,0.280976,0.187317,0.150863,0.350438,0.220416,0.118470,0.286770,0.220416
4,362590,0.710772,0.268462,0.421329,0.339441,1.000000,0.091192,0.124274,0.495682,0.452589,...,0.590301,0.464991,0.302020,0.134231,0.405405,0.681621,0.552823,0.127343,0.164399,0.552823
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1602,657361,0.733799,0.534522,0.427669,0.350438,0.681621,0.121046,0.164957,0.427669,0.750939,...,0.618590,0.586353,0.356348,0.356348,0.538122,1.000000,0.559085,0.112687,0.436436,0.559085
1603,492942,0.692308,0.261488,0.313823,0.220416,0.552823,0.266469,0.363137,0.675926,0.367359,...,0.484182,0.430265,0.130744,0.261488,0.631798,0.559085,1.000000,0.165380,0.120096,1.000000
1604,25910,0.124035,0.105409,0.428174,0.118470,0.127343,0.143223,0.195180,0.622799,0.118470,...,0.097590,0.219089,0.632456,0.421637,0.339581,0.112687,0.165380,1.000000,0.064550,0.165380
1605,337430,0.320256,0.459279,0.226134,0.286770,0.164399,0.346688,0.094491,0.037689,0.401478,...,0.283473,0.318198,0.051031,0.459279,0.369898,0.436436,0.120096,0.064550,1.000000,0.120096


The similarity values are used to create a new dataframe with the id of each set as rows and columns.

In [20]:
games_u_p = games.drop(['publisher','genres','app_name','id','release_date','tags','reviews_url','specs', 'early_access', 'developer'], axis=1)
games_u_p = games_u_p.reset_index()
games_u_p.head(1)

Unnamed: 0,index,title,url,price
0,23921,"METAL GEAR ONLINE EXPANSION PACK ""CLOAKED IN S...",http://store.steampowered.com/app/436030/METAL...,3.99


In [22]:
games_concat = pd.concat([games_u_p, games_vect], axis=1)

In [23]:
games_concat.head()

Unnamed: 0,index,title,url,price,id,436030,594270,305510,660860,362590,...,384020,352900,404010,739460,549940,657361,492942,25910,337430,294938
0,23921,"METAL GEAR ONLINE EXPANSION PACK ""CLOAKED IN S...",http://store.steampowered.com/app/436030/METAL...,3.99,436030,1.0,0.392232,0.482805,0.330623,0.710772,...,0.66575,0.543493,0.294174,0.261488,0.631798,0.733799,0.692308,0.124035,0.320256,0.692308
1,18641,A Step Into Darkness - Original Soundtrack,http://store.steampowered.com/app/594270/A_Ste...,4.99,594270,0.392232,1.0,0.246183,0.374634,0.268462,...,0.424334,0.317543,0.333333,0.666667,0.671156,0.534522,0.261488,0.105409,0.459279,0.261488
2,3744,Servo,http://store.steampowered.com/app/305510/Servo/,Cooming Soon,305510,0.482805,0.246183,1.0,0.415029,0.421329,...,0.541313,0.554322,0.492366,0.246183,0.421329,0.427669,0.313823,0.428174,0.226134,0.313823
3,14549,Cube Monster,http://store.steampowered.com/app/660860/Cube_...,Cooming Soon,660860,0.330623,0.374634,0.415029,1.0,0.339441,...,0.47691,0.421776,0.280976,0.187317,0.150863,0.350438,0.220416,0.11847,0.28677,0.220416
4,26837,METAL SLUG DEFENSE - “Morden Army Pack” Vol.3,http://store.steampowered.com/app/362590/METAL...,4.99,362590,0.710772,0.268462,0.421329,0.339441,1.0,...,0.590301,0.464991,0.30202,0.134231,0.405405,0.681621,0.552823,0.127343,0.164399,0.552823


The title, url and price of each set is concatenated to the previous df.

In [24]:
games_concat.to_csv('./API_csv/ML_games_concat.csv', index = False) # Remove or comment out this code cell if the entire dataframe is being used.

This last dataframe is exported for API use.

In [25]:
def game_recommendation(id: int):
    try:
        print('Wait a minute and you will see our recommendations. Enjoy! ')
        recommendations = pd.DataFrame(games_concat.nlargest(6,id)['id'])
        recommendations = recommendations[recommendations['id']!=id]
        title = pd.DataFrame(games_concat.nlargest(6,id)['title'])
        price = pd.DataFrame(games_concat.nlargest(6,id)['price'])
        url = pd.DataFrame(games_concat.nlargest(6,id)['url'])
        result2 = pd.concat([recommendations,title, price[1:11],url[1:]], axis = 1)
        print(result2)
    except:
        print('Sorry, we can not find a suitable match. Try a different game! ')

In [26]:
game_recommendation(305510)

Wait a minute and you will see our recommendations. Enjoy! 
            id                                    title         price  \
1566  258050.0                           Survivor Squad          2.99   
626   288260.0  Conflicks - Revolutionary Space Battles          6.99   
930   387870.0                          Mold on Pizza 🍕          5.99   
1166  206500.0                           AirMech Strike  Free to Play   
380   453870.0                            Quantum Chess          9.99   
2          NaN                                    Servo           NaN   

                                                    url  
1566  http://store.steampowered.com/app/258050/Survi...  
626   http://store.steampowered.com/app/288260/Confl...  
930   http://store.steampowered.com/app/387870/Mold_...  
1166  http://store.steampowered.com/app/206500/AirMe...  
380   http://store.steampowered.com/app/453870/Quant...  
2                                                   NaN  


The function <code>game_recommendation</code> receives an id as int, and returns 5 game recommendations with their title, url and price.