# GAME RECOMMENDATION SYSTEM

## PREPROCESSING

In [None]:
import os
import json
import pandas as pd
import numpy as np
from google.colab import drive, userdata, files

from sklearn.preprocessing import MultiLabelBinarizer, MinMaxScaler
from scipy.sparse import coo_matrix, csr_matrix
!pip install lightfm
from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k

Collecting lightfm
  Downloading lightfm-1.17.tar.gz (316 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/316.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.6/316.4 kB[0m [31m6.5 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m307.2/316.4 kB[0m [31m6.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.4/316.4 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lightfm
  Building wheel for lightfm (setup.py) ... [?25l[?25hdone
  Created wheel for lightfm: filename=lightfm-1.17-cp311-cp311-linux_x86_64.whl size=831160 sha256=dfb45b38a4809a28b2b02f948a8ba76abfac4164cb82abaecb90a2cec43f77ff
  Stored in directory: /root/.cache/pip/wheels/b9/0d/8a/0729d2e6e3ca2a898ba55201f905da7db

In [None]:
drive.mount('/content/drive')
drive_base_path = '/content/drive/My Drive/503Nproj/game-rec'
file_path = os.path.join(drive_base_path, 'synthetic_training_data.json')

with open(file_path, 'r') as file:
    data = json.load(file)

Mounted at /content/drive


In [None]:
# lists initialized for interaction matrix
player_ids = []
game_ids = []
hours_played = []

# lists initialized for game features
game_features = {}

In [None]:
# data processing
for player, details in data.items():
    for game_id, game_data in details["games"].items():
        # interaction matrix
        player_ids.append(player)
        game_ids.append(game_id)
        hours_played.append(game_data["hours"])

        # collection of game features
        if game_id not in game_features:
            game_features[game_id] = {
                "rating_ratio": game_data["rating_ratio"],
                "price": float(game_data["price"]),
                "genre": game_data["genre"],
                "tags": game_data["tags"]
            }

In [None]:
# creation of interaction matrix
player_idx = {player: i for i, player in enumerate(set(player_ids))}
game_idx = {game: i for i, game in enumerate(set(game_ids))}
rows = [player_idx[p] for p in player_ids]
cols = [game_idx[g] for g in game_ids]
data = hours_played
interaction_matrix = coo_matrix((data, (rows, cols)), shape=(len(player_idx), len(game_idx))).tocsr()

In [None]:
# game features ->> DataFrame
game_df = pd.DataFrame.from_dict(game_features, orient='index')

In [None]:
# check extracted genres & tags
all_genres = set()
all_tags = set()

for game in game_features.values():
    all_genres.update(game["genre"])
    all_tags.update(game["tags"])

print("Expected number of unique genres:", len(all_genres))  # should be 12
print("Expected number of unique tags:", len(all_tags))      # should be 242

Expected number of unique genres: 12
Expected number of unique tags: 242


In [None]:
# one-hot encoding of genres & tags
mlb_genre = MultiLabelBinarizer()
mlb_tags = MultiLabelBinarizer()
genre_df = pd.DataFrame(mlb_genre.fit_transform(game_df['genre']),
                         columns=[f"genre_{col}" for col in mlb_genre.classes_], index=game_df.index)
tags_df = pd.DataFrame(mlb_tags.fit_transform(game_df['tags']),
                        columns=[f"tag_{col}" for col in mlb_tags.classes_], index=game_df.index)

In [None]:
print("Genres found by MultiLabelBinarizer:", len(mlb_genre.classes_))  # should be 12
print("Tags found by MultiLabelBinarizer:", len(mlb_tags.classes_))      # should be 242

Genres found by MultiLabelBinarizer: 12
Tags found by MultiLabelBinarizer: 242


In [None]:
game_df = game_df.join(genre_df).join(tags_df)

print("Game features shape after merge:", game_df.shape)  # should be (99, 256)


Game features shape after merge: (99, 258)


In [None]:
# drop original lists
game_df.drop(columns=['genre', 'tags'], inplace=True) #

# price & rating ratio normalization
scaler = MinMaxScaler()
game_df[['rating_ratio', 'price']] = scaler.fit_transform(game_df[['rating_ratio', 'price']])

print("Interaction matrix shape:", interaction_matrix.shape)
print("Game features shape:", game_df.shape)

Interaction matrix shape: (50000, 99)
Game features shape: (99, 256)


In [None]:
game_features_sparse = csr_matrix(game_df.values)

In [None]:
# train-test split (80-20)
train, test = random_train_test_split(interaction_matrix, test_percentage=0.2)

# training over 10 epochs
model = LightFM(loss='warp')
model.fit(train, epochs=10, num_threads=4, item_features=game_features_sparse)

<lightfm.lightfm.LightFM at 0x799bf5289a90>

In [None]:
# model evaluation
train_precision = precision_at_k(model, train, k=5, item_features=game_features_sparse).mean()
test_precision = precision_at_k(model, test, k=5, item_features=game_features_sparse).mean()
print(f"Train precision at k=5: {train_precision:.4f}")
print(f"Test precision at k=5: {test_precision:.4f}")

Train precision at k=5: 0.3638
Test precision at k=5: 0.0213


In [None]:
# recommend games
def recommend_games(player_id, model, interaction_matrix, game_features_sparse, game_df, top_n=5):
    scores = model.predict(player_id, np.arange(interaction_matrix.shape[1]), item_features=game_features_sparse)
    top_games = np.argsort(-scores)[:top_n]  # get top "N" game indices
    recommended_games = [list(game_df.index)[i] for i in top_games]

    return recommended_games

player_to_recommend = 0  # example: player 0
recommended_games = recommend_games(player_to_recommend, model, interaction_matrix, game_features_sparse, game_df)
print("Recommended game IDs:", recommended_games)




Recommended game IDs: ['359550', '477160', '570', '8930', '227940']


In [None]:
print("Interaction matrix Shape:", interaction_matrix.shape)
print("Game features Shape:", game_df.shape)

Interaction matrix Shape: (50000, 99)
Game features Shape: (99, 256)
