## Modelo de recomendación

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ast

In [2]:
games = pd.read_csv('games.csv', sep=',')

In [4]:
games = games.sample(frac=0.05) # Remove or comment out this code cell if you want to use the complete games dataframe.

In [5]:
games_model = games.copy()
games_model = games_model.drop(['release_date', 'app_name', 'publisher', 'developer', 'early_access'], axis=1)

In [6]:
games_model['genres'] = games_model['genres'].apply(ast.literal_eval)
games_model['tags'] = games_model['tags'].apply(ast.literal_eval)
games_model['specs'] = games_model['specs'].apply(ast.literal_eval)

In [7]:
games_model['genres'] = games_model['genres'].apply(lambda x: ', '.join(x))
games_model['tags'] = games_model['tags'].apply(lambda x: ', '.join(x))
games_model['specs'] = games_model['specs'].apply(lambda x: ', '.join(x))

In [8]:
games_model.head(1)

Unnamed: 0,genres,title,url,tags,reviews_url,specs,price,id
31107,Action,Saints Row: The Third Season Pass DLC Pack,http://store.steampowered.com/app/901805/Saint...,Action,http://steamcommunity.com/app/901805/reviews/?...,"Single-player, Co-op, Downloadable Content, Pa...",4.99,901805.0


In [9]:
def clean_text(tags):
    result = str(tags).lower()
    return(result.replace(',',' '))

In [10]:
games_model['tags'] = games_model['tags'].apply(clean_text)
games_model['genres'] = games_model['genres'].apply(clean_text)
games_model['specs'] = games_model['specs'].apply(clean_text)

In [11]:
def clean_text2(specs):
    result = str(specs).lower()
    return(result.replace('-',''))

In [12]:
games_model['tags'] = games_model['tags'].apply(clean_text2)
games_model['genres'] = games_model['genres'].apply(clean_text2)
games_model['specs'] = games_model['specs'].apply(clean_text2)

In [13]:
games_model = games_model[['url', 'id', 'genres', 'tags', 'specs', 'price']]
games_model['id'] = games_model['id'].astype(int)
games_model.head(1)

Unnamed: 0,url,id,genres,tags,specs,price
31107,http://store.steampowered.com/app/901805/Saint...,901805,action,action,singleplayer coop downloadable content part...,4.99


In [14]:
games_model.to_csv('./API_csv/ML_games_model.csv', index=False)

In [15]:
games_selected2 = games_model.drop(['url'],axis=1)

games_selected2 = games_selected2[games_selected2.columns[1:]].apply(
    lambda x: ' '.join(x.dropna().astype(str)),
    axis=1
)
print(games_selected2.shape)
print(games_selected2.head())

(1607,)
31107    action action singleplayer  coop  downloadable...
21418    action  adventure  indie  rpg rpg  action  dun...
19245    action  adventure  indie  rpg  simulation  ear...
23603    casual  indie  strategy strategy  casual  indi...
1158     rpg rpg  fantasy  open world  dragons  third p...
dtype: object


In [16]:
type(games_selected2)

pandas.core.series.Series

In [17]:
games_selected2.to_csv('./API_csv/ML_games_selected2.csv', index=False, header=False)

In [18]:
vectorizer = CountVectorizer()
vectorized = vectorizer.fit_transform(games_selected2)

In [19]:
vectorized

<1607x431 sparse matrix of type '<class 'numpy.int64'>'
	with 23743 stored elements in Compressed Sparse Row format>

In [20]:
similarities = cosine_similarity(vectorized)
print(similarities)

[[1.         0.48304589 0.3678836  ... 0.21821789 0.06900656 0.38575837]
 [0.48304589 1.         0.48868782 ... 0.28987545 0.33333333 0.2422407 ]
 [0.3678836  0.48868782 1.         ... 0.2575611  0.29617444 0.48014298]
 ...
 [0.21821789 0.28987545 0.2575611  ... 1.         0.47434165 0.1767767 ]
 [0.06900656 0.33333333 0.29617444 ... 0.47434165 1.         0.186339  ]
 [0.38575837 0.2422407  0.48014298 ... 0.1767767  0.186339   1.        ]]


In [21]:
games_vect = pd.DataFrame(similarities, columns=games_model['id'], index=games_model['id']).reset_index()
games_vect

id,id.1,901805,492500,526160,455940,219780,752450,619870,237550,396060,...,482990,620270,393021,511950,360422,582380,436603,413840,698770,325300
0,901805,1.000000,0.483046,0.367884,0.278639,0.299572,0.350857,0.222375,0.303046,0.200446,...,0.418718,0.248146,0.429198,0.069007,0.192006,0.321429,0.507093,0.218218,0.069007,0.385758
1,492500,0.483046,1.000000,0.488688,0.430706,0.516811,0.282466,0.393863,0.731925,0.193649,...,0.606780,0.311651,0.473879,0.033333,0.417365,0.207020,0.449073,0.289875,0.333333,0.242241
2,526160,0.367884,0.488688,1.000000,0.358774,0.385727,0.401565,0.318142,0.476910,0.143385,...,0.419330,0.532518,0.342105,0.088852,0.412043,0.413869,0.326464,0.257561,0.296174,0.480143
3,455940,0.278639,0.430706,0.358774,1.000000,0.100167,0.243320,0.693978,0.472866,0.260643,...,0.689656,0.348481,0.813220,0.053838,0.337053,0.055728,0.560473,0.255377,0.269191,0.300965
4,219780,0.299572,0.516811,0.385727,0.100167,1.000000,0.070071,0.133235,0.423659,0.080064,...,0.167248,0.208145,0.110208,0.082690,0.316359,0.278174,0.075955,0.163430,0.165380,0.161788
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1602,582380,0.321429,0.207020,0.413869,0.055728,0.278174,0.643237,0.074125,0.176777,0.100223,...,0.116311,0.595550,0.122628,0.034503,0.528017,1.000000,0.147902,0.109109,0.034503,0.347183
1603,436603,0.507093,0.449073,0.326464,0.560473,0.075955,0.507392,0.482382,0.388449,0.197642,...,0.688102,0.088083,0.689202,0.040825,0.085194,0.147902,1.000000,0.129099,0.040825,0.251040
1604,413840,0.218218,0.289875,0.257561,0.255377,0.163430,0.119098,0.339683,0.424334,0.255155,...,0.355335,0.265334,0.374634,0.052705,0.256632,0.109109,0.129099,1.000000,0.474342,0.176777
1605,698770,0.069007,0.333333,0.296174,0.269191,0.165380,0.188311,0.358057,0.487950,0.193649,...,0.404520,0.191785,0.296174,0.000000,0.185496,0.034503,0.040825,0.474342,1.000000,0.186339


In [22]:
games_u_p = games.drop(['publisher','genres','app_name','id','release_date','tags','reviews_url','specs', 'early_access', 'developer'], axis=1)
games_u_p = games_u_p.reset_index()
games_u_p.head(1)

Unnamed: 0,index,title,url,price
0,31107,Saints Row: The Third Season Pass DLC Pack,http://store.steampowered.com/app/901805/Saint...,4.99


In [23]:
games_u_p.to_csv('./API_csv/ML_games_u_p.csv', index=False)

In [24]:
games_concat = pd.concat([games_u_p, games_vect], axis=1)

In [25]:
games_concat.head()

Unnamed: 0,index,title,url,price,id,901805,492500,526160,455940,219780,...,482990,620270,393021,511950,360422,582380,436603,413840,698770,325300
0,31107,Saints Row: The Third Season Pass DLC Pack,http://store.steampowered.com/app/901805/Saint...,4.99,901805,1.0,0.483046,0.367884,0.278639,0.299572,...,0.418718,0.248146,0.429198,0.069007,0.192006,0.321429,0.507093,0.218218,0.069007,0.385758
1,21418,Heroes of the Monkey Tavern,http://store.steampowered.com/app/492500/Heroe...,9.99,492500,0.483046,1.0,0.488688,0.430706,0.516811,...,0.60678,0.311651,0.473879,0.033333,0.417365,0.20702,0.449073,0.289875,0.333333,0.242241
2,19245,The Wild Eight,http://store.steampowered.com/app/526160/The_W...,19.99,526160,0.367884,0.488688,1.0,0.358774,0.385727,...,0.41933,0.532518,0.342105,0.088852,0.412043,0.413869,0.326464,0.257561,0.296174,0.480143
3,23603,Odd||Even,http://store.steampowered.com/app/455940/OddEven/,4.99,455940,0.278639,0.430706,0.358774,1.0,0.100167,...,0.689656,0.348481,0.81322,0.053838,0.337053,0.055728,0.560473,0.255377,0.269191,0.300965
4,1158,Divinity II: Developer's Cut,http://store.steampowered.com/app/219780/Divin...,19.99,219780,0.299572,0.516811,0.385727,0.100167,1.0,...,0.167248,0.208145,0.110208,0.08269,0.316359,0.278174,0.075955,0.16343,0.16538,0.161788


In [26]:
games_concat.to_csv('./API_csv/ML_games_concat.csv', index = False) # Remove or comment out this code cell if the entire dataframe is being used.

In [27]:
def game_recommendation(id: int):
    try:
        print('Wait a minute and you will see our recommendations. Enjoy! ')
        recommendations = pd.DataFrame(games_concat.nlargest(6,id)['id'])
        recommendations = recommendations[recommendations['id']!=id]
        title = pd.DataFrame(games_concat.nlargest(6,id)['title'])
        price = pd.DataFrame(games_concat.nlargest(6,id)['price'])
        url = pd.DataFrame(games_concat.nlargest(6,id)['url'])
        result2 = pd.concat([recommendations,title, price[1:11],url[1:]], axis = 1)
        print(result2)
    except:
        print('Sorry, we can not find a suitable match. Try a different game! ')

In [29]:
game_recommendation(455940)

Wait a minute and you will see our recommendations. Enjoy! 
            id                       title price  \
529   476240.0                     KNIGHTS  0.99   
1543  412520.0              Evil Defenders  9.99   
1442  429060.0       Zombie Wars: Invasion  2.99   
429   721380.0  LOGistICAL: USA - New York  9.99   
1331  485740.0              Color Syndrome  0.99   
3          NaN                   Odd||Even   NaN   

                                                    url  
529   http://store.steampowered.com/app/476240/KNIGHTS/  
1543  http://store.steampowered.com/app/412520/Evil_...  
1442  http://store.steampowered.com/app/429060/Zombi...  
429   http://store.steampowered.com/app/721380/LOGis...  
1331  http://store.steampowered.com/app/485740/Color...  
3                                                   NaN  
