In [1]:
import numpy as np
import pandas as pd
import scann
from lightfm import LightFM
from tqdm import tqdm
from scipy.sparse import load_npz

2025-01-14 10:16:34.822859: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-14 10:16:34.923581: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-14 10:16:35.238932: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-14 10:16:35.241538: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# -----------------=[ Data reading ]=----------------

# Base data
users = pd.read_csv('./data/users.csv')
games = pd.read_csv('./data/games.csv')
recommendations = pd.read_csv('./data/recommendations.csv')
games_metadata = pd.read_json('./data/games_metadata.json', lines=True)

# User and Games interaction matrix
interactions = load_npz('./data/rating_matrix_sparse.npz')

In [3]:
# -------------------=[ Mappers ]=-------------------

# Extract unique user and game IDs
unique_user_ids = users['user_id'].unique()
unique_game_ids = games['app_id'].unique()

# Create mappers
user_id_to_index = {user_id: idx for idx, user_id in enumerate(unique_user_ids)}
game_id_to_index = {game_id: idx for idx, game_id in enumerate(unique_game_ids)}
index_to_user_id = {idx: user_id for user_id, idx in user_id_to_index.items()}
index_to_game_id = {idx: game_id for game_id, idx in game_id_to_index.items()}
game_id_to_title = lambda game_id: games[games['app_id'] == game_id]['title'].values[0]

In [4]:
# -------------------=[ Data preparation ]=-------------------

def fit(model, interaction_matrix, epochs=10):
  for epoch in tqdm(range(1, epochs + 1)):
    model.fit_partial(interaction_matrix, epochs=1, num_threads=20)
    
    item_embeddings = model.item_embeddings
    user_embeddings = model.user_embeddings
    item_biases = model.item_biases
    user_biases = model.user_biases
    
    np.save('./data/model/item_embeddings.npy', item_embeddings)
    np.save('./data/model/user_embeddings.npy', user_embeddings)
    np.save('./data/model/item_biases.npy', item_biases)
    np.save('./data/model/user_biases.npy', user_biases)

def loadModel() -> LightFM:
  item_embeddings = np.load('./data/model/item_embeddings.npy')
  user_embeddings = np.load('./data/model/user_embeddings.npy')
  
  item_biases = np.load('./data/model/item_biases.npy')
  user_biases = np.load('./data/model/user_biases.npy')

  model = LightFM(learning_schedule='adagrad', loss='warp')

  model.item_embeddings = item_embeddings
  model.user_embeddings = user_embeddings
  model.item_biases = item_biases
  model.user_biases = user_biases

  return model

In [5]:
# ----------------=[ Model training ]=---------------

model = loadModel()
#fit(model, interactions, epochs=1)

In [6]:
# -----------------=[ Prediction ]=------------------

def recommend_with_user(user_id, searcher, model):
  user_index = user_id_to_index[user_id]
  user_embedding = model.user_embeddings[user_index]

  indecies, scores = searcher.search(user_embedding)

  return [index_to_game_id[index] for index in indecies]


def recommend_with_game(game_id, searcher, model):
  game_index = game_id_to_index[game_id]
  game_embedding = model.item_embeddings[game_index]
  indecies, scores = searcher.search(game_embedding)

  return [index_to_game_id[idx] for idx in indecies]

In [7]:
interactions = interactions.tocsr()

In [8]:
def list_user_liked_games(user_id):
  user_index = user_id_to_index[user_id]
  user_ratings = interactions[user_index].toarray()[0]

  games = []

  for idx, rating in enumerate(user_ratings):
    if rating == 1:
      games.append(index_to_game_id[idx])

  return games

In [9]:
def test_recommend(user_id):
  '''
  Tests recommendations by comparing real liked games with model predictions.

  Args:
    user_id (int): The user's ID.

  Returns:
    dict: A comparison of real liked games and model-predicted games.
  '''
  real = list_user_liked_games(user_id)  # Actual liked games
  user_index = user_id_to_index[user_id]

  # Extract user embedding and bias
  user_embedding = model.user_embeddings[user_index]
  user_bias = model.user_biases[user_index]

  # Compute scores for all items
  scores = (
    np.dot(model.item_embeddings, user_embedding)  # Dot product with item embeddings
    + user_bias                                        # Add user bias
    + model.item_biases                           # Add item biases
  )

  # Sort items by score in descending order and retrieve top recommendations
  recommended_indices = np.argsort(-scores)[:5]
  recommended_games = [index_to_game_id[idx] for idx in recommended_indices]

  return len(set(recommended_games).intersection(set(real)))

In [18]:
# ----------------=[ ScaNN ]=---------------

searcher = scann.scann_ops_pybind.builder(model.item_embeddings, 20, "dot_product").score_ah(2).build()

2025-01-14 10:20:23.914643: I scann/base/single_machine_factory_scann.cc:153] Single-machine AH training with dataset size = 50872, 20 thread(s).


In [11]:
# ----------------=[ Testing Users ]=------------------

user_id = 7423292

predicted = list(recommend_with_user(user_id, searcher, model))
real = list(list_user_liked_games(user_id))



print(f'Predicted: {list(map(game_id_to_title, predicted))}')
print(f'Real: {list(map(game_id_to_title, real))}')

print(f'Intersection: {len(set(predicted).intersection(set(real)))}')

Predicted: ['Under Lock', 'Deadly Flare', 'Jaws Of Extinction™', 'Remnants', 'MetaPhysical', 'Chicken Feet', 'SOMNI', 'Sinister Night', 'Marauders', 'Sign of Silence']
Real: ['Sinister Night', 'Breakwaters', 'The Callisto Protocol™', 'Deadly Flare', 'Marauders', 'Jaws Of Extinction™', 'Hood: Outlaws & Legends', 'Sign of Silence']
Intersection: 5


In [14]:
def weight(game_id):
  pass

In [19]:
# ----------------=[ Testing Games ]=------------------

game_title = 'Half-Life 2'

game_id = games[games['title'] == game_title]['app_id'].values[0]

# user


predicted = recommend_with_game(game_id, searcher, model)

print(list(map(game_id_to_title, predicted)))

['Half-Life 2: Episode Two', 'Half-Life: Opposing Force', 'Half-Life 2', 'Half-Life 2: Episode One', 'Half-Life: Blue Shift', 'Half-Life 2: Lost Coast', 'Half-Life', 'Black Mesa', 'Half-Life: Source', 'Half-Life 2: DownFall', 'Half-Life 2: Deathmatch', 'Half-Life: Caged', 'MINERVA', 'Half-Life 2: Year Long Alarm', 'Prospekt', 'Transmissions: Element 120', 'Amalgam', "Half-Life 2: Genry's Great Escape From City 13", 'Team Fortress Classic', 'Deathmatch Classic']
