In [194]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

## Analizamos el DF **`steam_games`**

Importamos el archivo ***steam_games.parquet***

In [211]:
steam_games = pd.read_parquet('Dataset/steam_games.parquet')
steam_games

Unnamed: 0,item_id,title,genres,tags,specs,release_date
0,761140,Lost Summoner Kitty,"[Action, Casual, Indie, Simulation, Strategy]","[Strategy, Action, Indie, Casual, Simulation]",[Single-player],2018-01-04
1,643980,Ironbound,"[Free to Play, Indie, RPG, Strategy]","[Free to Play, Strategy, Indie, RPG, Card Game...","[Single-player, Multi-player, Online Multi-Pla...",2018-01-04
2,670290,Real Pool 3D - Poolians,"[Casual, Free to Play, Indie, Simulation, Sports]","[Free to Play, Simulation, Sports, Casual, Ind...","[Single-player, Multi-player, Online Multi-Pla...",2017-07-24
3,767400,弹炸人2222,"[Action, Adventure, Casual]","[Action, Adventure, Casual]",[Single-player],2017-12-07
4,772540,Battle Royale Trainer,"[Action, Adventure, Simulation]","[Action, Adventure, Simulation, FPS, Shooter, ...","[Single-player, Steam Achievements]",2018-01-04
...,...,...,...,...,...,...
28233,745400,Kebab it Up!,"[Action, Adventure, Casual, Indie]","[Action, Indie, Casual, Violent, Adventure]","[Single-player, Steam Achievements, Steam Cloud]",2018-01-04
28234,773640,Colony On Mars,"[Casual, Indie, Simulation, Strategy]","[Strategy, Indie, Casual, Simulation]","[Single-player, Steam Achievements]",2018-01-04
28235,733530,LOGistICAL: South Africa,"[Casual, Indie, Strategy]","[Strategy, Indie, Casual]","[Single-player, Steam Achievements, Steam Clou...",2018-01-04
28236,610660,Russian Roads,"[Indie, Racing, Simulation]","[Indie, Simulation, Racing]","[Single-player, Steam Achievements, Steam Trad...",2018-01-04


Convertimos las columnas **genres**, **tags** y **specs** a cadenas de texto para manejarlas con NLP

In [212]:
# Ejecutar este código sólo una vez
steam_games['genres'] = steam_games['genres'].apply(lambda lista: ", ".join(lista))
steam_games['tags'] = steam_games['tags'].apply(lambda lista: ", ".join(lista))
steam_games['specs'] = steam_games['specs'].apply(lambda lista: ", ".join(lista))

Concatenamos las tres columnas anteriores en una columna **description**

In [213]:
steam_games['description'] = steam_games['genres'] + ', ' + steam_games['tags'] + ', ' + steam_games['specs']
steam_games.head()

Unnamed: 0,item_id,title,genres,tags,specs,release_date,description
0,761140,Lost Summoner Kitty,"Action, Casual, Indie, Simulation, Strategy","Strategy, Action, Indie, Casual, Simulation",Single-player,2018-01-04,"Action, Casual, Indie, Simulation, Strategy, S..."
1,643980,Ironbound,"Free to Play, Indie, RPG, Strategy","Free to Play, Strategy, Indie, RPG, Card Game,...","Single-player, Multi-player, Online Multi-Play...",2018-01-04,"Free to Play, Indie, RPG, Strategy, Free to Pl..."
2,670290,Real Pool 3D - Poolians,"Casual, Free to Play, Indie, Simulation, Sports","Free to Play, Simulation, Sports, Casual, Indi...","Single-player, Multi-player, Online Multi-Play...",2017-07-24,"Casual, Free to Play, Indie, Simulation, Sport..."
3,767400,弹炸人2222,"Action, Adventure, Casual","Action, Adventure, Casual",Single-player,2017-12-07,"Action, Adventure, Casual, Action, Adventure, ..."
4,772540,Battle Royale Trainer,"Action, Adventure, Simulation","Action, Adventure, Simulation, FPS, Shooter, T...","Single-player, Steam Achievements",2018-01-04,"Action, Adventure, Simulation, Action, Adventu..."


Eliminamos las columnas **genres**, **tags**, **specs** y **release_date**

In [326]:
steam_games.drop(columns=['genres', 'tags', 'specs', 'release_date'], inplace=True)
steam_games

Unnamed: 0,item_id,title,description
0,761140,Lost Summoner Kitty,"Action, Casual, Indie, Simulation, Strategy, S..."
1,643980,Ironbound,"Free to Play, Indie, RPG, Strategy, Free to Pl..."
2,670290,Real Pool 3D - Poolians,"Casual, Free to Play, Indie, Simulation, Sport..."
3,767400,弹炸人2222,"Action, Adventure, Casual, Action, Adventure, ..."
4,772540,Battle Royale Trainer,"Action, Adventure, Simulation, Action, Adventu..."
...,...,...,...
28233,745400,Kebab it Up!,"Action, Adventure, Casual, Indie, Action, Indi..."
28234,773640,Colony On Mars,"Casual, Indie, Simulation, Strategy, Strategy,..."
28235,733530,LOGistICAL: South Africa,"Casual, Indie, Strategy, Strategy, Indie, Casu..."
28236,610660,Russian Roads,"Indie, Racing, Simulation, Indie, Simulation, ..."


Guardamos el dataframe en un archivo .parquet 'steam_games_ml.parquet' para desplegar la API

In [None]:
steam_games.to_parquet('steam_games_ml.parquet', index=False)

### Modelo 1

Utilizamos la clase `CountVectorizer` de scikit para convertir las descripciones en representaciones numéricas.

In [349]:
# Creamos una instancia de la clase CountVectorizer
vector = CountVectorizer(tokenizer= lambda x: x.split(', '))

# Dividimos cada cadena de descripción en palabras individuales y se crea una matriz de conteo que representa cuántas veces aparece cada género en cada videojuego.
matriz_descripcion = vector.fit_transform(steam_games['description'])

In [350]:
def recomendacion_juego(id_producto: int):
    '''
    Se ingresa el id de producto (item_id) y retorna una lista con 5 juegos recomendados similares al ingresado (title).
    
    '''
    # Si el id ingresado no se encuentra en la columna de id de la tabla 'steam_games' se le pide al usuario que intente con otro id
    if id_producto not in steam_games['item_id'].values:
        return 'El ID no existe, intente con otro'
    else:
        # buscamos el índice del id ingresado
        index = steam_games.index[steam_games['item_id']==id_producto][0]

        # De la matriz de conteo, tomamos el array de descripciones con índice igual a 'index'
        description_index = matriz_descripcion[index]

        # Calculamos la similitud coseno entre la descripción de entrada y la descripción de las demás filas: cosine_similarity(description_index, matriz_descripcion)
        # Obtenemos los índices de las mayores similitudes mediante el método argsort() y las similitudes ordenadas de manera descendente
        # Tomamos los índices del 1 al 6 [0, 1:6] ya que el índice 0 es el mismo índice de entrada
        indices_maximos = np.argsort(-cosine_similarity(description_index, matriz_descripcion))[0, 1:6]

        # Construimos la lista
        recomendaciones = []
        for i in indices_maximos:
            recomendaciones.append(steam_games['title'][i])
        
        return recomendaciones

In [352]:
recomendacion_juego(745400)

['Dark Snow',
 'M1: A Death in the Desert',
 'The Moon Night',
 'Swingin Swiggins',
 'Cyborg Arena']

In [None]:
def recomendación_usuario(id_usuario: int):
    '''
    Se ingresa el id de un usuario (user_id) y retorna una lista con 5 juegos recomendados para dicho usuario (title).

    '''