# LightFM & ScaNN  |  Game Recommendation System

In [1]:
# -----------------=[ Load Dependencies ]=----------------

import numpy as np
import pandas as pd
import scann
from lightfm import LightFM
from lightfm.data import Dataset
from tqdm import tqdm
from scipy.sparse import load_npz
import pickle
from sklearn.preprocessing import MultiLabelBinarizer

2025-01-20 23:43:28.943500: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-20 23:43:28.944734: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-20 23:43:28.969484: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-20 23:43:28.970018: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## 1. Data preprocessing

In [2]:
# -----------------=[ Data reading ]=----------------

users = pd.read_csv('./data/users.csv')
games = pd.read_csv('./data/games.csv')
recommendations = pd.read_csv('./data/recommendations.csv')
gamesMetadata = pd.read_json('./data/games_metadata.json', lines=True)

interactions = load_npz('./data/train_and_test.npz').tocsr()

# Test users with 40% of history (This is used for testing)
rest_test = load_npz('./data/rest_test.npz').tocsr()

# Test users with 100% history (Used for getting user indicies)
test_matrix = load_npz('./data/test_matrix.npz').tocsr()

In [3]:
# -------------------=[ Mappers ]=-------------------

userIds = users['user_id'].unique()
gameIds = games['app_id'].unique()

mapUserId = {user_id: idx for idx, user_id in enumerate(userIds)}
mapGameId = {game_id: idx for idx, game_id in enumerate(gameIds)}
mapUserIndex = {idx: user_id for user_id, idx in mapUserId.items()}
mapGameIndex = {idx: game_id for game_id, idx in mapGameId.items()}

mapToTitle = lambda game_id: games[games['app_id'] == game_id]['title'].values[0]

In [4]:
# -------------------=[ Game Features ]=-------------------

mlb = MultiLabelBinarizer()
feature_matrix = mlb.fit_transform(gamesMetadata['tags'])

feature_matrix_df = pd.DataFrame(feature_matrix, columns=mlb.classes_)

dataset = Dataset()

dataset.fit(
  items=gameIds,
  users=userIds,
  item_features=feature_matrix_df
)

item_features = dataset.build_item_features(
    ((row['app_id'], [tag]) for _, row in gamesMetadata.iterrows() for tag in row['tags'])
)

## 2. Model training

In [4]:
# -------------------=[ Data preparation ]=-------------------

def fit(model, loss, epochs=6):
  for epoch in tqdm(range(1, epochs + 1)):
    model.fit_partial(interactions, epochs=50, num_threads=20)

    with open(f'./data/model/lightfm_{loss}.pkl', 'wb') as f:
      pickle.dump(model, f)

def loadModel(loss) -> LightFM:
  with open(f'./data/model/lightfm_{loss}.pkl', 'rb') as f:
    model = pickle.load(f)
    return model

In [5]:
# ----------------=[ Model training ]=---------------

model = loadModel('bpr')
fit(model, 'bpr', 12)

100%|██████████| 12/12 [1:20:21<00:00, 401.78s/it]


In [5]:
def listUserLikedGames(user_id, matrix):
  user_index = mapUserId[user_id]
  user_ratings = matrix[user_index].toarray()[0]

  games = []

  for idx, rating in enumerate(user_ratings):
    if rating == 1:
      games.append(mapGameIndex[idx])

  return games

In [11]:
# -----------------=[ Prediction ]=------------------

def embedUser(user_id):
  user_games = listUserLikedGames(user_id, interactions)

  if len(user_games) == 0:
    return np.zeros(64)
  
  game_indices = [mapGameId[game_id] for game_id in user_games]
  game_embeddings = model.item_embeddings[game_indices]

  user_embedding = np.mean(game_embeddings, axis=0)
  
  return user_embedding



def recommend(user_id, k):
    searcher = scann.scann_ops_pybind.builder(model.item_embeddings, k, "dot_product").score_ah(1).build()

    user_embedding = embedUser(user_id)
    indices, scores = searcher.search(user_embedding)

    sorted_indices = np.argsort(-scores)
    sorted_item_indices = [indices[i] for i in sorted_indices]

    return [mapGameIndex[idx] for idx in sorted_item_indices]

# -----------------=[ For Fun ]=------------------

def similarGames(game_id, k):
  searcher = scann.scann_ops_pybind.builder(model.item_embeddings, k, "dot_product").score_ah(1).build()
  
  game_index = mapGameId[game_id]
  game_embedding = model.item_embeddings[game_index]
  indecies, scores = searcher.search(game_embedding)

  return [mapGameIndex[idx] for idx in indecies]

## 4. Extras

In [None]:
# ----------------=[ Testing Games ]=------------------

game_title = 'ELDEN RING'

game_id = games[games['title'] == game_title]['app_id'].values[0]

# user


predicted2 = similarGames(game_id, 10)

print(list(map(mapToTitle, predicted2)))

In [None]:
recommend(1723232, 5)