#### En este notebook aplicaremos un modelo de recomendacion basado en la similitud del coseno. Esta funcion esta asociada a un endpoint de nuestra api, pero debido a las limitaciones de memoria en el servidor, realizaremos este proceso localmente y crearemos un dataset especifico para realizar consultas en nuestro endpoint.

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df_games_complete=pd.read_parquet('processed_data/games.parquet')

In [3]:
df_games_complete

Unnamed: 0,genres,app_name,tags,specs,id,developer,release_year
0,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,"[Strategy, Action, Indie, Casual, Simulation]",[Single-player],761140.0,Kotoshiro,2018.0
1,"[Free to Play, Indie, RPG, Strategy]",Ironbound,"[Free to Play, Strategy, Indie, RPG, Card Game...","[Single-player, Multi-player, Online Multi-Pla...",643980.0,Secret Level SRL,2018.0
2,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,"[Free to Play, Simulation, Sports, Casual, Ind...","[Single-player, Multi-player, Online Multi-Pla...",670290.0,Poolians.com,2017.0
3,"[Action, Adventure, Casual]",弹炸人2222,"[Action, Adventure, Casual]",[Single-player],767400.0,彼岸领域,2017.0
4,"[Casual, Sports, Action, Indie]",Log Challenge,"[Action, Indie, Casual, Sports]","[Single-player, Full controller support, HTC V...",773570.0,,
...,...,...,...,...,...,...,...
32127,"[Casual, Indie, Simulation, Strategy]",Colony On Mars,"[Strategy, Indie, Casual, Simulation]","[Single-player, Steam Achievements]",773640.0,"Nikita ""Ghost_RUS""",2018.0
32128,"[Casual, Indie, Strategy]",LOGistICAL: South Africa,"[Strategy, Indie, Casual]","[Single-player, Steam Achievements, Steam Clou...",733530.0,Sacada,2018.0
32129,"[Indie, Racing, Simulation]",Russian Roads,"[Indie, Simulation, Racing]","[Single-player, Steam Achievements, Steam Trad...",610660.0,Laush Dmitriy Sergeevich,2018.0
32130,"[Casual, Indie]",EXIT 2 - Directions,"[Indie, Casual, Puzzle, Singleplayer, Atmosphe...","[Single-player, Steam Achievements, Steam Cloud]",658870.0,"xropi,stev3ns",2017.0


In [4]:
df_games=df_games_complete[['id','genres','app_name','specs']]

In [5]:
df_games

Unnamed: 0,id,genres,app_name,specs
0,761140.0,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,[Single-player]
1,643980.0,"[Free to Play, Indie, RPG, Strategy]",Ironbound,"[Single-player, Multi-player, Online Multi-Pla..."
2,670290.0,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,"[Single-player, Multi-player, Online Multi-Pla..."
3,767400.0,"[Action, Adventure, Casual]",弹炸人2222,[Single-player]
4,773570.0,"[Casual, Sports, Action, Indie]",Log Challenge,"[Single-player, Full controller support, HTC V..."
...,...,...,...,...
32127,773640.0,"[Casual, Indie, Simulation, Strategy]",Colony On Mars,"[Single-player, Steam Achievements]"
32128,733530.0,"[Casual, Indie, Strategy]",LOGistICAL: South Africa,"[Single-player, Steam Achievements, Steam Clou..."
32129,610660.0,"[Indie, Racing, Simulation]",Russian Roads,"[Single-player, Steam Achievements, Steam Trad..."
32130,658870.0,"[Casual, Indie]",EXIT 2 - Directions,"[Single-player, Steam Achievements, Steam Cloud]"


In [None]:
#Se rellenan valores nulos en la columna 'genres'
df_games['genres'].fillna(value='', inplace=True)
#Se crea una nueva columna 'genres_str' que contenga las listas de géneros como strings
df_games['genres_str'] = df_games['genres'].apply(lambda x: ' '.join(map(str, x)))

#Se rellenan valores nulos en la columna 'specs'
df_games['specs'].fillna(value='', inplace=True)
#Se crea una nueva columna 'specs_str' que contenga las listas de géneros como strings
df_games['specs_str'] = df_games['specs'].apply(lambda x: ' '.join(map(str, x)))

#Se crea una nueva columna combinando generos y specs
df_games['genres_specs_str'] = df_games['genres_str'] + ' ' + df_games['specs_str']

In [8]:
df_games

Unnamed: 0,id,genres,app_name,specs,genres_str,specs_str,genres_specs_str
0,761140.0,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,[Single-player],Action Casual Indie Simulation Strategy,Single-player,Action Casual Indie Simulation Strategy Single...
1,643980.0,"[Free to Play, Indie, RPG, Strategy]",Ironbound,"[Single-player, Multi-player, Online Multi-Pla...",Free to Play Indie RPG Strategy,Single-player Multi-player Online Multi-Player...,Free to Play Indie RPG Strategy Single-player ...
2,670290.0,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,"[Single-player, Multi-player, Online Multi-Pla...",Casual Free to Play Indie Simulation Sports,Single-player Multi-player Online Multi-Player...,Casual Free to Play Indie Simulation Sports Si...
3,767400.0,"[Action, Adventure, Casual]",弹炸人2222,[Single-player],Action Adventure Casual,Single-player,Action Adventure Casual Single-player
4,773570.0,"[Casual, Sports, Action, Indie]",Log Challenge,"[Single-player, Full controller support, HTC V...",Casual Sports Action Indie,Single-player Full controller support HTC Vive...,Casual Sports Action Indie Single-player Full ...
...,...,...,...,...,...,...,...
32127,773640.0,"[Casual, Indie, Simulation, Strategy]",Colony On Mars,"[Single-player, Steam Achievements]",Casual Indie Simulation Strategy,Single-player Steam Achievements,Casual Indie Simulation Strategy Single-player...
32128,733530.0,"[Casual, Indie, Strategy]",LOGistICAL: South Africa,"[Single-player, Steam Achievements, Steam Clou...",Casual Indie Strategy,Single-player Steam Achievements Steam Cloud S...,Casual Indie Strategy Single-player Steam Achi...
32129,610660.0,"[Indie, Racing, Simulation]",Russian Roads,"[Single-player, Steam Achievements, Steam Trad...",Indie Racing Simulation,Single-player Steam Achievements Steam Trading...,Indie Racing Simulation Single-player Steam Ac...
32130,658870.0,"[Casual, Indie]",EXIT 2 - Directions,"[Single-player, Steam Achievements, Steam Cloud]",Casual Indie,Single-player Steam Achievements Steam Cloud,Casual Indie Single-player Steam Achievements ...


In [9]:
#Se eliminan las columnas que ya no vamos a utilizar
df_games.drop(columns=['genres','specs','genres_str','specs_str'],inplace=True)
df_games.head()

Unnamed: 0,id,app_name,genres_specs_str
0,761140.0,Lost Summoner Kitty,Action Casual Indie Simulation Strategy Single...
1,643980.0,Ironbound,Free to Play Indie RPG Strategy Single-player ...
2,670290.0,Real Pool 3D - Poolians,Casual Free to Play Indie Simulation Sports Si...
3,767400.0,弹炸人2222,Action Adventure Casual Single-player
4,773570.0,Log Challenge,Casual Sports Action Indie Single-player Full ...


In [10]:
#Se crea el objeto CountVectorizer
vectorizer = CountVectorizer(binary=True)
vectorizer

In [11]:
#Se crea la matriz de términos-documentos
genres_matrix = vectorizer.fit_transform(df_games['genres_specs_str']).toarray()
genres_matrix

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [12]:
#Se calcula la similitud del coseno entre los juegos
cosine_similarities = cosine_similarity(genres_matrix, genres_matrix)

  ret = a @ b


In [13]:
cosine_similarities

array([[1.        , 0.3380617 , 0.48795004, ..., 0.50395263, 0.57142857,
        0.41239305],
       [0.3380617 , 1.        , 0.6350853 , ..., 0.52174919, 0.42257713,
        0.19518001],
       [0.48795004, 0.6350853 , 1.        , ..., 0.34426519, 0.39036003,
        0.28171808],
       ...,
       [0.50395263, 0.52174919, 0.34426519, ..., 1.        , 0.62994079,
        0.36369648],
       [0.57142857, 0.42257713, 0.39036003, ..., 0.62994079, 1.        ,
        0.32991444],
       [0.41239305, 0.19518001, 0.28171808, ..., 0.36369648, 0.32991444,
        1.        ]])

### Se crea la funcion que devuelve las 5 recomendaciones de juego basandose en la similitud del coseno

In [14]:
def recommend(product_id):
   
    n=5
    #Se comprueba que ele id ingresado este en el dataframe
    if product_id not in df_games['id'].values:
        return "Juego no encontrado en la base de datos"

    #Se obtiene el índice del juego con el id proporcionado
    idx = df_games[df_games['id'] == product_id].index[0]

    #Se obtienen las puntuaciones de similitud del coseno para ese juego con respecto a todos los demás
    sim_scores = list(enumerate(cosine_similarities[idx]))

    #Se ordenan los juegos según sus puntuaciones de similitud
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    #Se obtienen los índices de los juegos recomendados
    sim_scores = sim_scores[1:n+1]  # Excluyendo el propio juego
    game_indices = [i[0] for i in sim_scores]

    #Devuelve la lista de juegos recomendados
    return df_games['app_name'].iloc[game_indices].tolist()

### Dado que las limitaciones de memoria del sitio utilizado para hacer el deploy no me permiten realizar este proceso, se realiza en local y se crea una nueva columna con las recomendaciones.

In [15]:
#Se aplica la funcion a cada registro del dataframe y se guarda el resultado en una nueva columna
df_games['recommended_5'] = df_games['id'].apply(recommend)

## df_games['recommended_5']

In [16]:
df_games['recommended_5']


0        [Wooden Battles, Bubonic: Outbreak, YOU ARE KI...
1        [Duelyst, Animation Throwdown: The Quest for C...
2        [Snooker-online multiplayer snooker game!, Her...
3        [Biozone, Luxor: 5th Passage, CHASER, Turtle O...
4        [Wingless, Caretaker Retribution, Fall Fear Fl...
                               ...                        
32127    [Trivia Vault: Science & History Trivia, Trivi...
32128    [LOGistICAL: Italy, LOGistICAL: USA - Oregon, ...
32129    [Deserter Simulator, Sky Valley, Tapocalypse, ...
32130    [EGO PROTOCOL, Didgery, Adjacency, Heroic Dung...
32131    [God Of Arrows VR, Final Fleet, VR Monster Awa...
Name: recommended_5, Length: 32132, dtype: object

In [17]:
df_games.head()

Unnamed: 0,id,app_name,genres_specs_str,recommended_5
0,761140.0,Lost Summoner Kitty,Action Casual Indie Simulation Strategy Single...,"[Wooden Battles, Bubonic: Outbreak, YOU ARE KI..."
1,643980.0,Ironbound,Free to Play Indie RPG Strategy Single-player ...,"[Duelyst, Animation Throwdown: The Quest for C..."
2,670290.0,Real Pool 3D - Poolians,Casual Free to Play Indie Simulation Sports Si...,"[Snooker-online multiplayer snooker game!, Her..."
3,767400.0,弹炸人2222,Action Adventure Casual Single-player,"[Biozone, Luxor: 5th Passage, CHASER, Turtle O..."
4,773570.0,Log Challenge,Casual Sports Action Indie Single-player Full ...,"[Wingless, Caretaker Retribution, Fall Fear Fl..."


### Se guarda el dataframe con la nueva columna generada para utilizar en el endpoint de recomendacion

### Se elimina la columnas que ya no son necesarias y se guarda el df con las recomendaciones.


In [18]:
df_games.drop(columns=['genres_specs_str'],inplace=True)

### Se guarda en parquet games_recommendations.parquet

In [19]:
df_games.to_parquet('API/games_recommendations.parquet')