# LightFM & ScaNN Game Recommendation System Overview

### 1. Load needed data

In [1]:
# --=[ Load dependencies ]=--

import numpy as np
import pandas as pd
import scann
from lightfm import LightFM
from scipy.sparse import coo_matrix, save_npz, load_npz
from sklearn.feature_extraction.text import CountVectorizer
from scipy.sparse import hstack, identity

2025-01-11 15:28:08.899439: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-11 15:28:08.900605: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-11 15:28:08.925356: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-11 15:28:08.925897: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# --=[ Load data ]=--

# Base data
users = pd.read_csv('./data/users.csv')
games = pd.read_csv('./data/games.csv')
recommendations = pd.read_csv('./data/recommendations.csv')
games_metadata = pd.read_json('./data/games_metadata.json', lines=True)

# User and Games interaction matrix
USERSxGAMES = load_npz('./data/rating_matrix_sparse.npz')

# Games features


In [3]:
# --=[ Data visualization ]=--
games_metadata.head()

Unnamed: 0,app_id,description,tags
0,13500,Enter the dark underworld of Prince of Persia ...,"[Action, Adventure, Parkour, Third Person, Gre..."
1,22364,,[Action]
2,113020,Monaco: What's Yours Is Mine is a single playe...,"[Co-op, Stealth, Indie, Heist, Local Co-Op, St..."
3,226560,Escape Dead Island is a Survival-Mystery adven...,"[Zombies, Adventure, Survival, Action, Third P..."
4,249050,Dungeon of the Endless is a Rogue-Like Dungeon...,"[Roguelike, Strategy, Tower Defense, Pixel Gra..."


In [4]:
# --=[ Mappers ]=--

# Extract unique user and game IDs
unique_user_ids = users['user_id'].unique()
unique_game_ids = games['app_id'].unique()

# Create mappers
user_id_to_index = {user_id: idx for idx, user_id in enumerate(unique_user_ids)}
game_id_to_index = {game_id: idx for idx, game_id in enumerate(unique_game_ids)}
index_to_user_id = {idx: user_id for user_id, idx in user_id_to_index.items()}
index_to_game_id = {idx: game_id for game_id, idx in game_id_to_index.items()}

In [5]:
# --=[ Preprocess data ]=--

games_metadata['tags'] = games_metadata['tags'].apply(lambda x: ','.join(x))

features = games_metadata['tags']

vectorizer = CountVectorizer(tokenizer=lambda x: x.split(','), token_pattern=None)

game_features_sparse = vectorizer.fit_transform(features)
game_identity_matrix = identity(len(games), format='csr')

final_game_features = hstack([game_identity_matrix, game_features_sparse])

In [None]:
model = LightFM(loss='warp')
model.fit(USERSxGAMES, epochs=5, item_features=final_game_features, num_threads=10)

<lightfm.lightfm.LightFM at 0x7fc9a13ebe20>

In [7]:
def game_id_to_name(game_id):
  return games[games['app_id'] == game_id]['title'].values[0]

### 2. Create Users and Games embeddings

### 3. Create Recommender

### 4. Evaluate the model