En este archivo voy a explicar cómo desarrollé y creé el primer sistema de recomendación item-item.
Este sistema de recomendación funciona de la siguiente manera:

![Sistema de recomendación item based](../img/item-based.jpg)

De forma muy breve y resumida, este algoritmo recomienda otros items (en nuestro caso juegos) basado en las categorías de los mismos. 

Se me pide lo siguiente:

![Consigna sistema de recomendación](../img/sistema_recomendacion_consigna_item_item.png)

Importación de las librerías necesarias para trabajar.

In [4]:
# Pandas para leer el archivo
import pandas as pd

# Scikit-learn para el sistema de recomendación
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
# Lectura y vistazo al archivo de juegos con el que voy a desarrollar el sistema de recomendación
juegos_steam = pd.read_parquet("../Datasets/steam_games_complete.parquet")
juegos_steam.head(1)

Unnamed: 0,item_id,item_name,developer,genres,tags,specs,release_date,price
88310,761140,Lost Summoner Kitty,Kotoshiro,"[Action, Casual, Indie, Simulation, Strategy]","[Strategy, Action, Indie, Casual, Simulation]",[Single-player],2018-01-04,4.99


In [4]:
juegos_steam = juegos_steam[["item_id","item_name","genres","tags","specs"]]

In [5]:
juegos_steam.head(3)

Unnamed: 0,item_id,item_name,genres,tags,specs
88310,761140,Lost Summoner Kitty,"[Action, Casual, Indie, Simulation, Strategy]","[Strategy, Action, Indie, Casual, Simulation]",[Single-player]
88311,643980,Ironbound,"[Free to Play, Indie, RPG, Strategy]","[Free to Play, Strategy, Indie, RPG, Card Game...","[Single-player, Multi-player, Online Multi-Pla..."
88312,670290,Real Pool 3D - Poolians,"[Casual, Free to Play, Indie, Simulation, Sports]","[Free to Play, Simulation, Sports, Casual, Ind...","[Single-player, Multi-player, Online Multi-Pla..."


In [6]:
# Lo que hago en este bloque de código es pasar las columnas genres, tags y specs a un diccionario que tiene cómo clave el indice del dataframe y cómo valor una lista que contiene los atributos de las listas contenidas en esas 3 columnas.
diccionario_categorico = {}

for index, row in juegos_steam.iterrows():
    lista_categorica = []
    for i in row["genres"]:
        lista_categorica.append(i)
    for i in row["tags"]:
        lista_categorica.append(i)
    for i in row["specs"]:
        lista_categorica.append(i)

    diccionario_categorico[index] = lista_categorica
    # if index == 88311:
    #     break



In [10]:
diccionario_categorico  #{88310: ['Action',
#   'Casual',
#   'Indie',
#   'Simulation',
#   'Strategy',
#   'Strategy',
#   'Action',
#   'Indie',
#   'Casual',
#   'Simulation',
#   'Single-player'], ... }

{88310: ['Action',
  'Casual',
  'Indie',
  'Simulation',
  'Strategy',
  'Strategy',
  'Action',
  'Indie',
  'Casual',
  'Simulation',
  'Single-player'],
 88311: ['Free to Play',
  'Indie',
  'RPG',
  'Strategy',
  'Free to Play',
  'Strategy',
  'Indie',
  'RPG',
  'Card Game',
  'Trading Card Game',
  'Turn-Based',
  'Fantasy',
  'Tactical',
  'Dark Fantasy',
  'Board Game',
  'PvP',
  '2D',
  'Competitive',
  'Replay Value',
  'Character Customization',
  'Female Protagonist',
  'Difficult',
  'Design & Illustration',
  'Single-player',
  'Multi-player',
  'Online Multi-Player',
  'Cross-Platform Multiplayer',
  'Steam Achievements',
  'Steam Trading Cards',
  'In-App Purchases'],
 88312: ['Casual',
  'Free to Play',
  'Indie',
  'Simulation',
  'Sports',
  'Free to Play',
  'Simulation',
  'Sports',
  'Casual',
  'Indie',
  'Multiplayer',
  'Single-player',
  'Multi-player',
  'Online Multi-Player',
  'In-App Purchases',
  'Stats'],
 88313: ['Action',
  'Adventure',
  'Casual',


In [11]:
# Lo que hago con este bucle es interar todo el diccionario previamente creado
for i in diccionario_categorico:
    # En cada iteración, cambia los valores: "diccionario_categorico[i]" para dejar unicamente los datos dentro de cada lista que no se repiten con el método "dict.fromkeys" y lo deja en formato lista con el método list
    diccionario_categorico[i] = list(dict.fromkeys(diccionario_categorico[i]))
    print(diccionario_categorico[i])



['Action', 'Casual', 'Indie', 'Simulation', 'Strategy', 'Single-player']
['Free to Play', 'Indie', 'RPG', 'Strategy', 'Card Game', 'Trading Card Game', 'Turn-Based', 'Fantasy', 'Tactical', 'Dark Fantasy', 'Board Game', 'PvP', '2D', 'Competitive', 'Replay Value', 'Character Customization', 'Female Protagonist', 'Difficult', 'Design & Illustration', 'Single-player', 'Multi-player', 'Online Multi-Player', 'Cross-Platform Multiplayer', 'Steam Achievements', 'Steam Trading Cards', 'In-App Purchases']
['Casual', 'Free to Play', 'Indie', 'Simulation', 'Sports', 'Multiplayer', 'Single-player', 'Multi-player', 'Online Multi-Player', 'In-App Purchases', 'Stats']
['Action', 'Adventure', 'Casual', 'Single-player']
['Action', 'Adventure', 'Simulation', 'FPS', 'Shooter', 'Third-Person Shooter', 'Sniper', 'Third Person', 'Single-player', 'Steam Achievements']
['Free to Play', 'Indie', 'Simulation', 'Sports', 'Single-player', 'Multi-player', 'Online Multi-Player', 'Cross-Platform Multiplayer', 'Downlo

In [19]:
# Antes la salida era: {88310: ['Action',
#   'Casual',
#   'Indie',
#   'Simulation',
#   'Strategy',
#   'Strategy',
#   'Action',
#   'Indie',
#   'Casual',
#   'Simulation',
#   'Single-player'], ... }
diccionario_categorico # y ahora es {88310: ['Action',
#   'Casual',
#   'Indie',
#   'Simulation',
#   'Strategy',
#   'Single-player'], ... } Se aplicaron los cambios en cada clave del diccionario

{88310: ['Action',
  'Casual',
  'Indie',
  'Simulation',
  'Strategy',
  'Single-player'],
 88311: ['Free to Play',
  'Indie',
  'RPG',
  'Strategy',
  'Card Game',
  'Trading Card Game',
  'Turn-Based',
  'Fantasy',
  'Tactical',
  'Dark Fantasy',
  'Board Game',
  'PvP',
  '2D',
  'Competitive',
  'Replay Value',
  'Character Customization',
  'Female Protagonist',
  'Difficult',
  'Design & Illustration',
  'Single-player',
  'Multi-player',
  'Online Multi-Player',
  'Cross-Platform Multiplayer',
  'Steam Achievements',
  'Steam Trading Cards',
  'In-App Purchases'],
 88312: ['Casual',
  'Free to Play',
  'Indie',
  'Simulation',
  'Sports',
  'Multiplayer',
  'Single-player',
  'Multi-player',
  'Online Multi-Player',
  'In-App Purchases',
  'Stats'],
 88313: ['Action', 'Adventure', 'Casual', 'Single-player'],
 88315: ['Action',
  'Adventure',
  'Simulation',
  'FPS',
  'Shooter',
  'Third-Person Shooter',
  'Sniper',
  'Third Person',
  'Single-player',
  'Steam Achievements'],


In [14]:
# Agrego la columna "categorical" que va a ser la combinación de "genres", "tags", "specs" con sus valores únicos y lo muetro en pandalla
juegos_steam['categorical'] = juegos_steam.index.map(diccionario_categorico)
juegos_steam.head(1)

Unnamed: 0,item_id,item_name,genres,tags,specs,categorical
88310,761140,Lost Summoner Kitty,"[Action, Casual, Indie, Simulation, Strategy]","[Strategy, Action, Indie, Casual, Simulation]",[Single-player],"[Action, Casual, Indie, Simulation, Strategy, ..."


In [15]:
juegos_steam["categorical"]

88310     [Action, Casual, Indie, Simulation, Strategy, ...
88311     [Free to Play, Indie, RPG, Strategy, Card Game...
88312     [Casual, Free to Play, Indie, Simulation, Spor...
88313            [Action, Adventure, Casual, Single-player]
88315     [Action, Adventure, Simulation, FPS, Shooter, ...
                                ...                        
120439    [Action, Adventure, Casual, Indie, Violent, Si...
120440    [Casual, Indie, Simulation, Strategy, Single-p...
120441    [Casual, Indie, Strategy, Single-player, Steam...
120442    [Indie, Racing, Simulation, Single-player, Ste...
120443    [Casual, Indie, Puzzle, Singleplayer, Atmosphe...
Name: categorical, Length: 22530, dtype: object

In [16]:
# Para el sistema de recomendación, las únicas 3 columnas que necesito son las que dejo en el Dataframe
juegos_steam = juegos_steam[["item_id","item_name","categorical"]]


In [17]:
juegos_steam.head(1)

Unnamed: 0,item_id,item_name,categorical
88310,761140,Lost Summoner Kitty,"[Action, Casual, Indie, Simulation, Strategy, ..."


In [18]:
# Paso las listas de cada fila de la columna categorical a un string
juegos_steam["categorical"] = juegos_steam["categorical"].apply(lambda x: " ".join(x))
juegos_steam["categorical"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  juegos_steam["categorical"] = juegos_steam["categorical"].apply(lambda x: " ".join(x))


88310     Action Casual Indie Simulation Strategy Single...
88311     Free to Play Indie RPG Strategy Card Game Trad...
88312     Casual Free to Play Indie Simulation Sports Mu...
88313                 Action Adventure Casual Single-player
88315     Action Adventure Simulation FPS Shooter Third-...
                                ...                        
120439    Action Adventure Casual Indie Violent Single-p...
120440    Casual Indie Simulation Strategy Single-player...
120441    Casual Indie Strategy Single-player Steam Achi...
120442    Indie Racing Simulation Single-player Steam Ac...
120443    Casual Indie Puzzle Singleplayer Atmospheric R...
Name: categorical, Length: 22530, dtype: object

In [19]:
juegos_steam.head(1)

Unnamed: 0,item_id,item_name,categorical
88310,761140,Lost Summoner Kitty,Action Casual Indie Simulation Strategy Single...


In [21]:
# Cambio el indice, reiniciandolo, para que no me traiga problemas al momento de implementar el sistema de recomendación
juegos_steam.reset_index(drop=True, inplace=True)
juegos_steam.head(1)

Unnamed: 0,item_id,item_name,categorical
0,761140,Lost Summoner Kitty,Action Casual Indie Simulation Strategy Single...


In [22]:
# Exporto el dataframe cómo parquet para poder usarlo en el archivo main.py
juegos_steam.to_parquet("id_name_categorical_of_games.parquet",compression="snappy")

In [35]:
cv = CountVectorizer(max_features=15, stop_words='english')

In [31]:
juegos_steam["categorical"]
vector = cv.fit_transform(juegos_steam["categorical"]).toarray()


In [33]:
vector.shape

(22530, 15)

In [76]:
similitud = cosine_similarity(vector)


array([1.        , 0.5       , 0.64549722, ..., 0.40089186, 0.47434165,
       0.5       ])

In [112]:
cv = CountVectorizer(max_features=15, stop_words='english')
vector = cv.fit_transform(juegos_steam["categorical"]).toarray()
similitud = cosine_similarity(vector)
# Función que toma cómo argumento el ID de un juego y recomienda 5 similares
def recomendacion_juego(id_juego):

    # Busca en 
    indice_juego = juegos_steam[juegos_steam["item_id"] == id_juego].index[0]

    distancias = sorted(list(enumerate(similitud[indice_juego])), reverse=True, key=lambda x: x[1])
    
    juegos_recomendados = []
    detalles_juego = []
    salida = {}

    for i in distancias[1:6]:
        juegos_recomendados.append(juegos_steam.iloc[i[0]].item_name)
        detalles_juego.append(juegos_steam.iloc[i[0]].categorical)
    
    for i,j in enumerate(juegos_recomendados):
        salida[j] = detalles_juego[i]

    return salida
    

        
    

In [113]:
recomendacion_juego("761140")

{'Fallen Mage': 'Action Indie RPG Single-player',
 'Souland': 'Action Indie Strategy Early Access Pixel Graphics Single-player',
 'The Ship: Single Player': 'Action Indie RPG Singleplayer FPS Single-player',
 'Hammerfight': 'Action Indie Physics Mouse only 2D Singleplayer Experimental Multiple Endings Difficult Fighting Local Multiplayer Steampunk Single-player',
 'Aztaka': 'Action RPG Indie Metroidvania Platformer Singleplayer Side Scroller 2D Mythology Single-player'}

In [82]:
juegos_steam[juegos_steam["item_id"] == '761140']

Unnamed: 0,item_id,item_name,categorical
0,761140,Lost Summoner Kitty,Action Casual Indie Simulation Strategy Single...


In [83]:
juegos_steam[juegos_steam["item_name"] == 'Fallen Mage']

Unnamed: 0,item_id,item_name,categorical
20,752360,Fallen Mage,Action Indie RPG Single-player


In [84]:
juegos_steam[juegos_steam["item_name"] == 'Souland']

Unnamed: 0,item_id,item_name,categorical
26,759920,Souland,Action Indie Strategy Early Access Pixel Graph...


In [58]:
juegos_steam["item_id"] == '761140'

88310      True
88311     False
88312     False
88313     False
88315     False
          ...  
120439    False
120440    False
120441    False
120442    False
120443    False
Name: item_id, Length: 22530, dtype: bool