In [9]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats 
from ydata_profiling import ProfileReport
from collections import Counter
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
import heapq


In [10]:
dfreviews = pd.read_parquet('User_reviews_reducido_32.parquet')
dfsteam = pd.read_parquet('Output_steam_games_reducido_32.parquet')
dfitems = pd.read_parquet('User_items_reducido_32.parquet')

In [11]:
dfreviews = dfreviews.merge(dfsteam, on='Item_Id', how='inner')

In [12]:
dfreviews=dfreviews[["User_Id","Item_Id","Recommend","Sentiment Analysis","App_name"]]

In [13]:
dfreviews['Recommend'] = dfreviews['Recommend'].astype(int)

# Mostrar el DataFrame resultante
dfreviews

Unnamed: 0,User_Id,Item_Id,Recommend,Sentiment Analysis,App_name
0,76561197970982479,1250,1,2,Killing Floor
1,death-hunter,1250,1,2,Killing Floor
2,DJKamBer,1250,1,0,Killing Floor
3,diego9031,1250,1,2,Killing Floor
4,76561198081962345,1250,1,2,Killing Floor
...,...,...,...,...,...
41797,laislabonita75,367780,1,2,Aero's Quest
41798,evilindiegaming,367780,1,2,Aero's Quest
41799,laislabonita75,305920,0,0,Another Perspective
41800,Gamer0009,306040,1,2,The Howler


In [14]:
dfreviews.dropna(inplace=True)

In [15]:
dfreviews["Recommend"].value_counts()

Recommend
1    37960
0     3842
Name: count, dtype: int64

In [16]:
dfreviews.to_parquet('ML_reducido.parquet')

In [17]:


def train_and_predict(user_id):
    # Cargar los datos en un DataFrame de pandas
    df = dfreviews
    n=5
    # Definir la escala de calificación de los datos
    reader = Reader(rating_scale=(0, 1))

    # Crear un objeto Dataset a partir del DataFrame y el Reader
    data = Dataset.load_from_df(df[['User_Id', 'Item_Id', 'Recommend']], reader)

    # Dividir los datos en conjuntos de entrenamiento y prueba
    trainset, testset = train_test_split(data, test_size=0.2)

    # Crear un objeto SVD para entrenar el modelo de recomendación
    model = SVD()
    model.fit(trainset)

    # Utilizar el modelo entrenado para hacer predicciones para el usuario especificado
    items_to_predict = df['Item_Id'].unique()
    predictions = []
    for item_id in items_to_predict:
        prediction = model.predict(user_id, item_id)
        heapq.heappush(predictions, (-prediction.est, item_id))
    
    top_n = heapq.nsmallest(n, predictions)
    top_n = [(item_id, -est) for est, item_id in top_n]
    top_n_items = [df[df['Item_Id'] == item_id]['App_name'].iloc[0] for item_id, _ in top_n]

    return top_n_items



In [20]:
train_and_predict("Gamer0009")

['Day of Defeat',
 'Ricochet',
 'Half-Life',
 'Portal',
 'Half-Life 2: Episode Two']