In [1]:
import numpy as np
import pandas as pd
import scann
from lightfm import LightFM
from scipy.sparse import coo_matrix, save_npz, load_npz

2025-01-11 14:16:26.286585: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-11 14:16:26.287768: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-11 14:16:26.311566: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-11 14:16:26.312425: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# -----------------=[ Data reading ]=----------------

usersM = pd.read_csv('./data/users.csv')
gamesM = pd.read_csv('./data/games.csv')
recommendationsM = pd.read_csv('./data/recommendations.csv')
gamesFeaturesM = pd.read_json('./data/games_metadata.json', lines=True)


In [5]:
gamesFeaturesM.head()

Unnamed: 0,app_id,description,tags
0,13500,Enter the dark underworld of Prince of Persia ...,"[Action, Adventure, Parkour, Third Person, Gre..."
1,22364,,[Action]
2,113020,Monaco: What's Yours Is Mine is a single playe...,"[Co-op, Stealth, Indie, Heist, Local Co-Op, St..."
3,226560,Escape Dead Island is a Survival-Mystery adven...,"[Zombies, Adventure, Survival, Action, Third P..."
4,249050,Dungeon of the Endless is a Rogue-Like Dungeon...,"[Roguelike, Strategy, Tower Defense, Pixel Gra..."


In [4]:
gamesM.head()

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck
0,13500,Prince of Persia: Warrior Within™,2008-11-21,True,False,False,Very Positive,84,2199,9.99,9.99,0.0,True
1,22364,BRINK: Agents of Change,2011-08-03,True,False,False,Positive,85,21,2.99,2.99,0.0,True
2,113020,Monaco: What's Yours Is Mine,2013-04-24,True,True,True,Very Positive,92,3722,14.99,14.99,0.0,True
3,226560,Escape Dead Island,2014-11-18,True,False,False,Mixed,61,873,14.99,14.99,0.0,True
4,249050,Dungeon of the ENDLESS™,2014-10-27,True,True,False,Very Positive,88,8784,11.99,11.99,0.0,True


In [5]:
recommendationsM.head()

Unnamed: 0,app_id,helpful,funny,date,is_recommended,hours,user_id,review_id
0,975370,0,0,2022-12-12,True,36.3,51580,0
1,304390,4,0,2017-02-17,False,11.5,2586,1
2,1085660,2,0,2019-11-17,True,336.5,253880,2
3,703080,0,0,2022-09-23,True,27.4,259432,3
4,526870,0,0,2021-01-10,True,7.9,23869,4


In [6]:
# gamesMetadataM.head()

In [7]:
# -------------------=[ Mappers ]=-------------------

usersV = usersM['user_id'].unique()
gamesV = gamesM['app_id'].unique()

uID_to_idx = {userId: idx for idx, userId in enumerate(usersV)}
gID_to_idx = {gameId: idx for idx, gameId in enumerate(gamesV)}
idx_to_uID = {idx: userId for idx, userId in enumerate(usersV)}
idx_to_gID = {idx: gameId for idx, gameId in enumerate(gamesV)}

In [8]:
# -------------=[ User x Game Matrix ]=---------------

USERxGAME = load_npz('./data/rating_matrix_sparse.npz').tocsr()

In [9]:
# -------------=[ Game feature Matrix ]=-------------
# TODO

In [10]:
def fit(model, userxgame, epochs=10):
  for epoch in range(epochs):
    model.fit_partial(userxgame, epochs=1, num_threads=4)
    itemEmbeddings = model.item_embeddings
    userEmbeddings = model.user_embeddings
    itemBiases = model.item_biases
    userBiases = model.user_biases

    print(f'Epoch {epoch} completed!')
    
    np.save('./data/model/item_embeddings.npy', itemEmbeddings)
    np.save('./data/model/user_embeddings.npy', userEmbeddings)
    np.save('./data/model/item_biases.npy', itemBiases)
    np.save('./data/model/user_biases.npy', userBiases)

def loadModel():
  itemEmbeddings = np.load('./data/model/item_embeddings.npy')
  userEmbeddings = np.load('./data/model/user_embeddings.npy')
  itemBiases = np.load('./data/model/item_biases.npy')
  userBiases = np.load('./data/model/user_biases.npy')

  model = LightFM(learning_schedule='adagrad', loss='warp')

  model.item_embeddings = itemEmbeddings
  model.user_embeddings = userEmbeddings
  model.item_biases = itemBiases
  model.user_biases = userBiases

  return model

In [24]:
# ----------------=[ Model training ]=---------------

model = loadModel()
model = LightFM(learning_schedule='adagrad', loss='warp')
#fit(model, USERxGAME, epochs=10)

In [17]:
# -----------------=[ Prediction ]=------------------

def datasetPredict(userId, searcher, model):
  userIndex = uID_to_idx[userId]
  userVector = model.user_embeddings[userIndex]
  indecies, scores = searcher.search(userVector)

  return [idx_to_gID[idx] for idx in indecies]

In [18]:
def listUserGames(userId):
  userIndex = uID_to_idx[userId]
  userVector = USERxGAME[userIndex].toarray()[0]
  print(userVector.sum())
  games = []

  for idx, rating in enumerate(userVector):
    if rating == 1:
      games.append(idx_to_gID[idx])

  return games

In [19]:
def listUserRatings(userId):
  userIndex = uID_to_idx[userId]
  userVector = USERxGAME[userIndex].toarray()[0]
  
  return userVector

In [20]:
# ----------------=[ ScaNN ]=---------------

searcher = scann.scann_ops_pybind.builder(model.item_embeddings, 10, "dot_product").score_ah(2).build()

2025-01-11 13:18:26.420902: I scann/base/single_machine_factory_scann.cc:153] Single-machine AH training with dataset size = 50872, 20 thread(s).


In [21]:
def gID_to_name(gameId):
  return gamesM[gamesM['app_id'] == gameId]['title'].values[0]

In [22]:
# ------------------=[ Metrics ]=--------------------

def confusionMatrix(searcher, model):
  confusion = np.zeros((2,2))

  for userId in usersV:
    prediction = datasetPredict(userId, searcher, model)

In [26]:
# ------------------=[ Testing ]=--------------------

predicted = list(datasetPredict(4820647, searcher, model))
real = list(listUserGames(4820647))

fiveClosest = list(searcher.search(model.user_embeddings[uID_to_idx[4820647]])[0])

print(f'Predicted: {list(map(gID_to_name, predicted))}')
print(f'Real: {list(map(gID_to_name, real))}')
print(f'Intersection: {len(set(predicted).intersection(set(real)))}')
print(f'Intersection2: {len(set(fiveClosest).intersection(set(real)))}')

4
Predicted: ['Warframe', "Tom Clancy's Rainbow Six® Siege", 'Paladins®', 'Dota Underlords', 'Watch_Dogs® 2', 'Planet Zoo', 'Mirror 2: Project X', 'Scrap Mechanic', 'Cultivation Tales', 'Grand Theft Auto V']
Real: ['Dota Underlords', 'Screeps: World', 'Slay the Spire', 'Papers Please']
Intersection: 1
Intersection2: 0
