# Imports

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Files

In [2]:
df = pd.read_pickle("finaljuegos.pkl", compression="gzip")


In [3]:
df

Unnamed: 0,index,name,released,background_image,rating,playtime,Android,Apple II,Game Boy,Game Boy Advance,...,GOG,Google Play,Nintendo Store,PlayStation Store,Steam,Xbox 360 Store,Xbox Store,Multiplayer,Singleplayer,tag_names
0,0,PAYDAY 2,2013-08-13,https://media.rawg.io/media/games/73e/73eecb89...,3.51,9,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,"['Singleplayer', 'Steam Achievements', 'Multip..."
1,1,Horizon Zero Dawn,2017-02-28,https://media.rawg.io/media/games/b7d/b7d3f171...,4.31,10,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,"['Singleplayer', 'Atmospheric', 'Great Soundtr..."
2,2,Grand Theft Auto IV,2008-04-29,https://media.rawg.io/media/games/4a0/4a0a1316...,4.26,10,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,"['Singleplayer', 'Multiplayer', 'Atmospheric',..."
3,3,Rocket League,2015-07-07,https://media.rawg.io/media/games/8cc/8cce7c0e...,3.95,20,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,"['Singleplayer', 'Steam Achievements', 'Multip..."
4,4,Dota 2,2013-07-09,https://media.rawg.io/media/games/6fc/6fcf4cd3...,3.05,14,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,"['Multiplayer', 'steamtradingcards', 'RPG', 'C..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61033,61033,Sweet Desire 3,2021-12-31,https://media.rawg.io/media/screenshots/599/59...,0.00,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,"['Singleplayer', 'Story Rich', '2D', 'ActionAd..."
61034,61034,Icing,2021-12-03,https://media.rawg.io/media/games/822/82228d8e...,0.00,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,"['Singleplayer', 'Story Rich', '2D', 'Anime', ..."
61035,61035,Touhou Choushinsei,2021-12-05,https://media.rawg.io/media/screenshots/158/15...,0.00,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,"['Singleplayer', 'Open World', 'FirstPerson', ..."
61037,61037,Sacrosanct (itch),2021-11-29,https://media.rawg.io/media/screenshots/de4/de...,0.00,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"['Third Person', 'Vampire', 'vampirejam', 'dar..."


In [4]:
print(df["name"].isnull().sum())  # Debería imprimir 0 si no hay valores nulos
print(df["name"].apply(type).value_counts())  # Debería imprimir solo <class 'str'>

1
<class 'str'>      60246
<class 'float'>        1
Name: name, dtype: int64


In [8]:
df = df.dropna(subset=["name"])

In [9]:
df["name"] = df["name"].astype(str)

In [10]:
df["tag_names"] = df["tag_names"].apply(lambda x: [tag for tag in x if tag.strip()])


# Preprocesamiento



In [11]:
tfidf_name = TfidfVectorizer()
tfidf_tags = TfidfVectorizer()

In [12]:
tfidf_name_matrix = tfidf_name.fit_transform(df["name"])
tfidf_tags_matrix = tfidf_tags.fit_transform(df["tag_names"].apply(lambda x: " ".join(x)))


ValueError: empty vocabulary; perhaps the documents only contain stop words

In [None]:
df["tfidf_name"] = list(tfidf_name_matrix.toarray())
df["tfidf_tags"] = list(tfidf_tags_matrix.toarray())


In [None]:
columns_to_exclude = ["name", "released", "background_image", "rating", "playtime", "tag_names"]
columns_to_use = [col for col in df.columns if col not in columns_to_exclude]


In [None]:
similarity_matrix = cosine_similarity(df[columns_to_use])

# Recomendador

In [None]:
def get_recommendations(title, rating_threshold=7.5):
    while True:
        # Buscar el índice del juego que se desea recomendar
        try:
            idx = df[df['name'].str.lower() == title.lower()].index[0]
        except IndexError:
            title = input("No se encontró el juego. Introduce otro o escribe 'cancelar' para salir: ")
            if title.lower() == "cancelar":
                return None
            continue
        
        # Obtener las puntuaciones de similitud del juego
        sim_scores = list(enumerate(cosine_sim[idx]))

        # Eliminar el propio juego de la lista
        sim_scores = [x for x in sim_scores if x[0] != idx]

        # Filtrar por rating y obtener los 3 juegos más similares
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = [(i, score) for i, score in sim_scores if df.iloc[i]['rating'] >= rating_threshold][:3]

        # Obtener los índices de los juegos recomendados
        game_indices = [i for i, _ in sim_scores]

        # Devolver los nombres, puntuaciones de rating y enlaces de imagen de los juegos recomendados
        return df[['name', 'rating', 'background_image']].iloc[game_indices]

In [None]:
get_recommendations()