# LightFM Game Recommendation System

## 1. Load dependencies

In [1]:
# -----------------=[ Load Dependencies ]=----------------

import numpy as np
import pandas as pd
import scann
from lightfm import LightFM
from lightfm.data import Dataset
from tqdm import tqdm
from scipy.sparse import load_npz
import pickle
from sklearn.preprocessing import MultiLabelBinarizer
from lightfm.evaluation import precision_at_k, recall_at_k
from lightfm.cross_validation import random_train_test_split

2025-01-25 01:04:00.954902: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-25 01:04:00.956424: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-25 01:04:00.982877: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-25 01:04:00.983426: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## 2. Load preprocessed data
We have original data:
1. games.csv
2. users.csv
3. recommendations.csv
4. games_metadata.json

...

Then we preprocessed the original data by getting rid of games with less than 15 reviews, in order to decrease the sparisty and maintain only valuable information.

Preprocessed data:
1. rating_matrix_sparse.npz
2. test_matrix.npz
3. train_matrix.npz
4. train_and_test.npz


Now let's jump into the code...

In [2]:
# -----------------=[ Data reading ]=----------------

users = pd.read_csv('./data/users.csv')
games = pd.read_csv('./data/games.csv')
recommendations = pd.read_csv('./data/recommendations.csv')
gamesMetadata = pd.read_json('./data/games_metadata.json', lines=True)

interactions = load_npz('./data/train_and_test.npz').tocsr()

# Test users with 40% of history (This is used for testing)
rest_test = load_npz('./data/rest_test.npz').tocsr()

# Test users with 100% history (Used for getting user indicies)
test_matrix = load_npz('./data/test_matrix.npz').tocsr()

## 3. Mappers
Then we create mappers:   
index -> app_id / user_id  
app_id / user_id -> index

This is mostly used for creating sparse matrices due to the fact that app_id or user_id can be very high and we want to keep everything in order. So we just use their indexes as cooridinates in sparse matrices.

In [3]:
# -------------------=[ Mappers ]=-------------------

userIds = users['user_id'].unique()
gameIds = games['app_id'].unique()

mapUserId = {user_id: idx for idx, user_id in enumerate(userIds)}
mapGameId = {game_id: idx for idx, game_id in enumerate(gameIds)}
mapUserIndex = {idx: user_id for user_id, idx in mapUserId.items()}
mapGameIndex = {idx: game_id for game_id, idx in mapGameId.items()}

mapToTitle = lambda game_id: games[games['app_id'] == game_id]['title'].values[0]

## 4. Game features
We use game features as additional way to keep similar games closely together. This is widely used in LightFM model

In [4]:
# -------------------=[ Game Features ]=-------------------

mlb = MultiLabelBinarizer()
feature_matrix = mlb.fit_transform(gamesMetadata['tags'])

feature_matrix_df = pd.DataFrame(feature_matrix, columns=mlb.classes_)

dataset = Dataset()

dataset.fit(
  items=gameIds,
  users=userIds,
  item_features=feature_matrix_df
)

item_features = dataset.build_item_features(
    (row['app_id'], row['tags']) for _, row in gamesMetadata.iterrows()
)

## 5. Model training and fine tuning
With these functions we train our LightFM model. We also can tweak a bunch of parameters in order to squeeze out better metrics. More about them later on.

In [4]:
# -------------------=[ Model training ]=-------------------
def fit(model, name, epochs=100):
  for epoch in range(1, epochs + 1):
    model.fit_partial(interactions, epochs=1, num_threads=15)

    val_recall = recall_at_k(
      model,
      rest_test,
      k=20,
      num_threads=15
    ).mean()

    print(f"Epoch {epoch}: Value of Recall@20 = {val_recall:.4f}")

    # with open(f'./data/model/lightfm_{name}.pkl', 'wb') as f:
    #   pickle.dump(model, f)


def loadModel(name) -> LightFM:
  with open(f'./data/model/lightfm_{name}.pkl', 'rb') as f:
    model = pickle.load(f)
    return model

In [4]:
from hyperopt import hp
from hyperopt import STATUS_OK

space = {
    'no_components': hp.choice('no_components', [32, 64, 100, 128]),
    'loss': hp.choice('loss', ['warp', 'warp-kos', 'bpr']),
    'learning_rate': hp.loguniform('learning_rate', np.log(1e-4), np.log(0.1)),
    'k': hp.choice('k', [10, 15, 20]),
    'user_alpha': hp.loguniform('user_alpha', np.log(1e-6), np.log(1e-3)),
    'item_alpha': hp.loguniform('item_alpha', np.log(1e-6), np.log(1e-3)),
}


In [6]:
def objective(params):
    # Initialize model with sampled hyperparameters
    model = LightFM(
        no_components=params['no_components'],
        loss=params['loss'],
        k=params['k'],
        user_alpha=params['user_alpha'],
        item_alpha=params['item_alpha'],
        learning_rate=params['learning_rate'],
        random_state=42
    )
    
    # Train for fewer epochs during hyperparameter search (e.g., 30)
    for _ in tqdm(range(30)):
        model.fit_partial(interactions, num_threads=20)
    
    # Calculate validation recall
    val_recall = recall_at_k(model, rest_test, k=20, num_threads=20).mean()
    
    # Hyperopt minimizes the loss, so return negative recall
    return {
        'loss': -val_recall,
        'status': STATUS_OK,
        'params': params,
    }

In [None]:
from hyperopt import fmin, tpe, Trials

trials = Trials()  # Track results
best_params = fmin(
    fn=objective,      # Objective function
    space=space,       # Search space
    algo=tpe.suggest,  # Optimization algorithm (Tree-structured Parzen Estimator)
    max_evals=50,      # Number of trials (increase for better results)
    trials=trials,     # Store results
    verbose=True,      # Show progress
)

print("Best hyperparameters:", best_params)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

  0%|          | 0/30 [00:00<?, ?it/s]
  3%|3         | 1/30 [00:12<06:08, 12.71s/it]
  7%|6         | 2/30 [00:21<04:54, 10.51s/it]
 10%|#         | 3/30 [00:30<04:21,  9.69s/it]
 13%|#3        | 4/30 [00:39<04:03,  9.37s/it]
 17%|#6        | 5/30 [00:48<03:52,  9.29s/it]
 20%|##        | 6/30 [00:57<03:41,  9.25s/it]
 23%|##3       | 7/30 [01:06<03:33,  9.27s/it]
 27%|##6       | 8/30 [01:15<03:22,  9.22s/it]
 30%|###       | 9/30 [01:25<03:14,  9.24s/it]
 33%|###3      | 10/30 [01:34<03:05,  9.30s/it]
 37%|###6      | 11/30 [01:44<02:57,  9.33s/it]
 40%|####      | 12/30 [01:53<02:47,  9.31s/it]
 43%|####3     | 13/30 [02:03<02:40,  9.42s/it]
 47%|####6     | 14/30 [02:12<02:31,  9.48s/it]
 50%|#####     | 15/30 [02:22<02:23,  9.59s/it]
 53%|#####3    | 16/30 [02:32<02:16,  9.77s/it]
 57%|#####6    | 17/30 [02:42<02:05,  9.63s/it]
 60%|######    | 18/30 [02:51<01:53,  9.45s/it]
 63%|######3   | 19/30 [03:00<01:44,  9.51s/it]
 67%|######6   | 20/30 [03:10<01:35,  9.54s/it]
 70%|#####

  2%|▏         | 1/50 [04:44<3:52:15, 284.40s/trial, best loss: -0.13266423240325953]

  0%|          | 0/30 [00:00<?, ?it/s]
  3%|3         | 1/30 [00:11<05:39, 11.72s/it]
  7%|6         | 2/30 [00:20<04:36,  9.86s/it]
 10%|#         | 3/30 [00:28<04:09,  9.25s/it]
 13%|#3        | 4/30 [00:37<03:52,  8.95s/it]
 17%|#6        | 5/30 [00:45<03:41,  8.86s/it]
 20%|##        | 6/30 [00:54<03:29,  8.75s/it]
 23%|##3       | 7/30 [01:03<03:19,  8.68s/it]
 27%|##6       | 8/30 [01:11<03:10,  8.67s/it]
 30%|###       | 9/30 [01:20<03:01,  8.64s/it]
 33%|###3      | 10/30 [01:28<02:52,  8.61s/it]
 37%|###6      | 11/30 [01:37<02:43,  8.58s/it]
 40%|####      | 12/30 [01:45<02:34,  8.57s/it]
 43%|####3     | 13/30 [01:54<02:25,  8.56s/it]
 47%|####6     | 14/30 [02:02<02:16,  8.55s/it]
 50%|#####     | 15/30 [02:11<02:08,  8.55s/it]
 53%|#####3    | 16/30 [02:20<01:59,  8.54s/it]
 57%|#####6    | 17/30 [02:28<01:51,  8.55s/it]
 60%|######    | 18/30 [02:37<01:42,  8.57s/it]
 63%|######3   | 19/30 [02:45<01:34,  8.57s/it]
 67%|######6   | 20/30 [02:54<01:25,  8.56s/it]
 70%|#####

  4%|▍         | 2/50 [09:04<3:36:09, 270.20s/trial, best loss: -0.13266423240325953]

  0%|          | 0/30 [00:00<?, ?it/s]


In [None]:
final_model = LightFM(
    no_components=best_params['no_components'],
    loss=best_params['loss'],
    k=best_params['k'],
    user_alpha=best_params['user_alpha'],
    item_alpha=best_params['item_alpha'],
    learning_rate=best_params['learning_rate'],
    random_state=42
)

# Train longer (e.g., 100 epochs)
fit(final_model, name='tuned_model', epochs=100)

In [5]:
model = LightFM(
  no_components=64,
  loss='warp',
  k=20,
  learning_rate=0.01,
  random_state=42
)

fit(model, '3', 300)

Epoch 1: Value of Recall@20 = 0.0833
Epoch 2: Value of Recall@20 = 0.0920
Epoch 3: Value of Recall@20 = 0.0997
Epoch 4: Value of Recall@20 = 0.1042
Epoch 5: Value of Recall@20 = 0.1074
Epoch 6: Value of Recall@20 = 0.1081
Epoch 7: Value of Recall@20 = 0.1115
Epoch 8: Value of Recall@20 = 0.1149
Epoch 9: Value of Recall@20 = 0.1162
Epoch 10: Value of Recall@20 = 0.1170
Epoch 11: Value of Recall@20 = 0.1182
Epoch 12: Value of Recall@20 = 0.1189
Epoch 13: Value of Recall@20 = 0.1197
Epoch 14: Value of Recall@20 = 0.1203
Epoch 15: Value of Recall@20 = 0.1228
Epoch 16: Value of Recall@20 = 0.1245
Epoch 17: Value of Recall@20 = 0.1257
Epoch 18: Value of Recall@20 = 0.1261
Epoch 19: Value of Recall@20 = 0.1278
Epoch 20: Value of Recall@20 = 0.1284
Epoch 21: Value of Recall@20 = 0.1293
Epoch 22: Value of Recall@20 = 0.1296
Epoch 23: Value of Recall@20 = 0.1292
Epoch 24: Value of Recall@20 = 0.1299
Epoch 25: Value of Recall@20 = 0.1309
Epoch 26: Value of Recall@20 = 0.1317
Epoch 27: Value of Re

KeyboardInterrupt: 

## 6. Model prediction / recommendation

In [5]:
model = loadModel('64')

In [6]:
# Calculate popularity as interaction counts (train set only)
train_popularity = np.array(interactions.sum(axis=0)).flatten()

# Apply Laplace smoothing to avoid zero-division errors
train_popularity += 1

# Normalize to [0,1] using log scaling (handles long-tail distribution)
log_popularity = np.log(train_popularity)
popularity_weights = (log_popularity - log_popularity.min()) / (log_popularity.max() - log_popularity.min())

In [7]:
def recommendD(user_id, k, alpha=0.8):
  # Get items already interacted with IN TRAINING DATA
  _, known_items = interactions[user_id].nonzero()
  
  # Get all possible candidate items
  all_items = np.arange(interactions.shape[1])
  candidate_items = np.setdiff1d(all_items, known_items)
  
  # Score only unseen items
  scores = model.predict(
      user_ids=np.full(len(candidate_items), user_id),
      item_ids=candidate_items,
      num_threads=20
  )
  
  # Get popularity scores for candidates
  pop_scores = popularity_weights[candidate_items]
  
  # Blend scores
  combined_scores = alpha * scores + (1 - alpha) * pop_scores
  
  # Get top-k items
  top_k_indices = np.argsort(-scores)[:k]
  return candidate_items[top_k_indices]

In [11]:
# -----------------=[ Prediction ]=------------------

def listUserLikedGames(user_id, matrix):
  user_index = mapUserId[user_id]
  user_ratings = matrix[user_index].toarray()[0]

  games = []

  for idx, rating in enumerate(user_ratings):
    if rating == 1:
      games.append(mapGameIndex[idx])

  return games


def embed_user(user_id):
  user_games = listUserLikedGames(user_id, interactions)

  if len(user_games) == 0:
    return np.zeros(64)
  
  game_indices = [mapGameId[game_id] for game_id in user_games]
  game_embeddings = model.item_embeddings[game_indices]

  user_embedding = np.mean(game_embeddings, axis=0)

  
  return user_embedding

searcher = scann.scann_ops_pybind.builder(model.item_embeddings, 20, "dot_product").score_ah(6, hash_type="lut256", training_iterations=11).build()
def recommend2(user_id, k):
    user_embedding = embed_user(user_id)
    indices, scores = searcher.search(user_embedding)

    sorted_indices = np.argsort(-scores)
    sorted_item_indices = [indices[i] for i in sorted_indices]

    return sorted_item_indices

2025-01-24 16:50:48.275478: I scann/base/single_machine_factory_scann.cc:153] Single-machine AH training with dataset size = 50872, 20 thread(s).


## 7. Various metrics testing

In [8]:
def ndcg_k(test_interactions, train_interactions, k=20):
  test_users = np.unique(test_interactions.nonzero()[0])
  ndcg_scores = []

  for user_id in tqdm(test_users):
    true_positives = test_interactions[user_id].indices
    train_positives = train_interactions[user_id].indices
    true_positives = np.setdiff1d(true_positives, train_positives)

    recommended_items = recommend(user_id, k)

    relevance = np.isin(recommended_items, true_positives).astype(float)
    
    dcg = 0.0
    for pos, rel in enumerate(relevance):
      dcg += rel / np.log2(pos + 2)
    
    ideal_relevance = np.zeros_like(relevance)
    ideal_relevance[:min(k, len(true_positives))] = 1.0
    idcg = 0.0
    for pos, rel in enumerate(ideal_relevance):
      idcg += rel / np.log2(pos + 2)
    
    ndcg = (dcg / idcg) if idcg > 0 else 0.0
    ndcg_scores.append(ndcg)

  return np.mean(ndcg_scores) if ndcg_scores else 0.0

In [9]:
def hitrate_k(test_interactions, train_interactions, k=20):
  test_users = np.unique(test_interactions.nonzero()[0])
  hits = 0

  for user_id in tqdm(test_users):
    true_positives = test_interactions[user_id].indices
    train_positives = train_interactions[user_id].indices
    true_positives = np.setdiff1d(true_positives, train_positives)
    
    recommended_items = recommend(user_id, k)

    if len(np.intersect1d(recommended_items, true_positives)) > 0:
      hits += 1

  return hits / len(test_users) if len(test_users) > 0 else 0.0

In [10]:
import recmetrics

def test_metrics(test_interactions, train_interactions, k):
  test_users = np.unique(test_interactions.nonzero()[0])
  y_true = []
  y_pred = []

  for user_id in test_users:
    true_positives = test_interactions[user_id].indices
    train_positives = train_interactions[user_id].indices
    
    y_true.append(np.setdiff1d(true_positives, train_positives))
    y_pred.append(recommend(user_id, k))

  recall = recmetrics.recommender_recall(y_pred, y_true)
  hitrate = hitrate_k(test_interactions, train_interactions, k)
  precision = recmetrics.recommender_precision(y_pred, y_true)  
  ndcg = ndcg_k(test_interactions, train_interactions, k)

  return recall, hitrate, precision, ndcg


recall, hitrate, precision, ndcg = test_metrics(rest_test, interactions, 20)


print(f'Test RECALL@{20}: {recall}')
print(f'Test HITRATE@{20}: {hitrate}')
print(f'Test PRECISION@{20}: {precision}')
print(f'Test NDCG@{20}: {ndcg}')

100%|██████████| 1932/1932 [00:21<00:00, 88.60it/s]
100%|██████████| 1932/1932 [00:21<00:00, 87.86it/s]

Test RECALL@20: 0.1473103519668737
Test HITRATE@20: 0.5419254658385093
Test PRECISION@20: 0.05372670807453417
Test NDCG@20: 0.10876538702622637





In [14]:
model = loadModel('64')

In [9]:
import sys
sys.path.append("../")  # Replace with your actual path

from metrics import *

class modelL:
  def __init__(self, model):
    self.model = model
  
  def recommend(self, user_id, k):
    return recommendD(user_id, k, 0.8)

print(test_metrics(modelL(model), 20))

100%|██████████| 1932/1932 [00:19<00:00, 97.98it/s]
100%|██████████| 1932/1932 [00:20<00:00, 94.72it/s]

(0.1473103519668737, 0.5419254658385093, 0.05372670807453417, 0.10876538702622637)



