# Machine Learning
- Este jupyter notebook será utilizado para crear un modelo de recomendación consumible por la API


# Funcion Recomendacion item-item

In [42]:
#Importamos las librerias que vamos a usar
import pandas as pd
import numpy as np

import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity
import operator

import fastparquet as fp 
import pyarrow as pa
import pyarrow.parquet as pq

In [43]:
# Creamos Dummies para despues crear la matriz similitud del coseno 
games_ML = pd.read_parquet('src/games_ML.parquet')
games_ML.head(3)

Unnamed: 0,publisher,title,id,genre
0,Kotoshiro,Lost Summoner Kitty,761140,Action
0,Kotoshiro,Lost Summoner Kitty,761140,Casual
0,Kotoshiro,Lost Summoner Kitty,761140,Indie


In [49]:
#Creacíon de dummies
modelo_item= pd.get_dummies(games_ML, columns=["genre"], prefix="")

modelo_item= modelo_item.groupby(["id","title"]).sum().reset_index()

modelo_item

Unnamed: 0,id,title,publisher,_Accounting,_Action,_Adventure,_Animation &amp; Modeling,_Audio Production,_Casual,_Design &amp; Illustration,...,_Photo Editing,_RPG,_Racing,_Simulation,_Software Training,_Sports,_Strategy,_Utilities,_Video Production,_Web Publishing
0,10,Counter-Strike,Valve,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,20,Team Fortress Classic,Valve,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,30,Day of Defeat,Valve,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,40,Deathmatch Classic,Valve,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,50,Half-Life: Opposing Force,Valve,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28728,2028055,Tom Clancy's Ghost Recon Future Soldier - Seas...,Ubisoft,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28729,2028056,Worms Revolution Season Pass,Team17 Digital Ltd,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
28730,2028062,Call of Duty®: Black Ops II Season Pass,Activision,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28731,2028103,Assassin’s Creed® III Season Pass,UbisoftUbisoft,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [50]:
modelo_item = modelo_item.drop(columns='publisher')

In [51]:
modelo_item.head()

Unnamed: 0,id,title,_Accounting,_Action,_Adventure,_Animation &amp; Modeling,_Audio Production,_Casual,_Design &amp; Illustration,_Early Access,...,_Photo Editing,_RPG,_Racing,_Simulation,_Software Training,_Sports,_Strategy,_Utilities,_Video Production,_Web Publishing
0,10,Counter-Strike,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,20,Team Fortress Classic,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,30,Day of Defeat,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,40,Deathmatch Classic,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,50,Half-Life: Opposing Force,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [70]:
#Cuento filas
cant_filas= len(modelo_item)

#Calculo la mitad
mitad_filas= cant_filas // 10
 
#Selecciono la mitad superior
modelo_render= modelo_item.iloc[:mitad_filas]

In [74]:
modelo_render.shape

(2873, 24)

In [76]:
modelo_render.to_parquet("src/modelo_render.parquet")

# Calcular la similitud del coseno 


In [72]:
similitudes = cosine_similarity(modelo_item.iloc[:,3:])

In [73]:
similitudes.shape

(28733, 28733)

In [79]:
def recommend_games(id: int):
    '''
    Esta función recomienda 5 juegos a partir del juego ingresado.

    Args:
        game_id (int): ID único del videojuego al cual se le harán las recomendaciones.
    '''
    # Verifica si el juego con game_id existe en df_games
    game = modelo_render[modelo_render['id'] == id]

    if game.empty:
        return("El juego '{id}' no posee registros.")
    
    # Obtiene el índice del juego dado
    idx = game.index[0]

    # Toma una muestra aleatoria del DataFrame df_games
    sample_size = 2000  # Define el tamaño de la muestra (ajusta según sea necesario)
    df_sample = modelo_render.sample(n=sample_size, random_state=42)  # Ajusta la semilla aleatoria según sea necesario

    # Calcula la similitud de contenido solo para el juego dado y la muestra
    sim_scores = cosine_similarity([modelo_render.iloc[idx, 3:]], df_sample.iloc[:, 3:])

    # Obtiene las puntuaciones de similitud del juego dado con otros juegos
    sim_scores = sim_scores[0]

    # Ordena los juegos por similitud en orden descendente
    similar_games = [(i, sim_scores[i]) for i in range(len(sim_scores)) if i != idx]
    similar_games = sorted(similar_games, key=lambda x: x[1], reverse=True)

    # Obtiene los 5 juegos más similares
    similar_game_indices = [i[0] for i in similar_games[:5]]

    # Lista de juegos similares (solo nombres)
    similar_game_names = df_sample['title'].iloc[similar_game_indices].tolist()

    return {"similar_games": similar_game_names}

In [80]:
recommend_games(10)

{'similar_games': ["Mafia II DLC: Joe's Adventure",
  'Singularity™',
  'Gotham City Impostors Free to Play: Premium Card Pack 1',
  'Call of Duty: World at War',
  'Saints Row: The Third - Nyte Blayde Pack']}