In [1]:
# Import libraries

import warnings
from resources import *
import datetime
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras import metrics, losses, optimizers
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from keras import regularizers

warnings.filterwarnings("ignore")



In [2]:
!python resources.py



In [3]:
# # Load user items data
# recdata = pd.read_csv('recdata.csv', index_col=0)
# recdata = recdata.rename(columns={'variable': 'id', 'value': 'owned'})
# recdata.head()

## Preprocessing

In [4]:
# Use create_interaction_matrix function
data = create_interaction_matrix(
    df=pd.read_csv('recdata.csv', index_col=0).rename(columns={'variable': 'id', 'value': 'owned'}),
    user_col="uid", item_col="id", rating_col="owned"
)
data.head()

id,10,20,30,40,50,60,70,80,130,220,...,526790,527340,527440,527510,527520,527810,527890,527900,528660,530720
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Dictionaries

In [5]:
# Create user dictionary using helper function
# user_dict = create_user_dict(interactions=interactions)

# # Load games data
# gamesdf = pd.read_csv('gamesdata.csv', index_col=0)

# # Create game dictionary using helper function
# games_dict = create_item_dict(df=gamesdf, id_col='id', name_col='title')

## Train TF model

In [6]:
# Get recs functions

def get_game_title(game_id, game_data):
    title = None
    if game_id in game_data.index:
        title = game_data.loc[game_id, 'title']
    return title

def get_recommendations(model, data, user_id, n, scaler, pca, item_titles):
    # Get the index of the user in the data
    user_index = data.index.get_loc(user_id)

    # Get the user's interactions with the games
    interactions = data.iloc[user_index].values

    # Scale and reduce the interactions using the same scaler and PCA used for training
    interactions_scaled = scaler.transform([interactions])
    interactions_reduced = pca.transform(interactions_scaled)

    # Generate the input features for the model
    input_features = interactions_reduced.reshape(1, -1)

    # Make predictions using the model
    predictions = model.predict(input_features)[0]

    # Get the indices of recommendations
    top_indices = predictions.argsort()[::-1]


    # Print the user's games and the top N recommendations
    print("User's games:")
    user_games = data.iloc[user_index][data.iloc[user_index] != 0].index.tolist()

    for game_id in user_games:
        game_title = item_titles.get(game_id)
        if game_title is not None:
            print("- {}".format(game_title))


    # Get the titles of the top N recommendations
    game_titles = []
    for i in top_indices:
        game_id = data.columns[i]
        game_title = item_titles.get(game_id)
        if game_title is not None and game_id not in user_games:
            game_titles.append(game_title)    

    print("Top recommendations:")
    for i, game_title in enumerate(game_titles):
        print("{}. {}".format(i + 1, game_title))



In [14]:
def recall_at_k(y_true, y_pred, k=5):
    # Sort the predicted scores in descending order and get the top k indices
    _, indices = tf.math.top_k(y_pred, k=k)
    
    # Convert the indices to a boolean tensor indicating whether each game was in the top k
    top_k = tf.reduce_any(tf.equal(indices, tf.expand_dims(y_true, axis=-1)), axis=-1)
    
    # Compute the recall by taking the mean of the boolean tensor
    recall = tf.reduce_mean(tf.cast(top_k, dtype=tf.float32))
    
    return recall


def precision_at_k(y_true, y_pred, k=5):
    # Get top-k predicted items
    y_pred_top_k = tf.math.top_k(y_pred, k=k).indices
    # Convert predictions to binary 0/1 values
    y_pred_binary = tf.cast(tf.greater(y_pred, 0.5), tf.int32)
    # Calculate true positives by taking element-wise multiplication of y_true and y_pred_binary
    true_positives = tf.reduce_sum(tf.cast(tf.multiply(y_true, y_pred_binary), tf.float32), axis=1)
    # Calculate precision at k as the ratio of true positives to k
    precision_at_k = tf.reduce_mean(tf.divide(true_positives, k))
    return precision_at_k


In [8]:
# Prepare data & create model
train_data, test_data = train_test_split(
    data, test_size=0.15, random_state=42
)

# Create sparse matrices for evaluation
train_sparse = sparse.csr_matrix(train_data.values)

# Add X users to Test so that the number of rows in Train match Test
N = train_data.shape[0]                      # Rows in Train set
n, m = train_data.shape                       # Rows & columns in Test set
z = np.zeros([(N - n), m])              # Create the necessary rows of zeros with m columns


test = np.vstack((test_data, z))        # Vertically stack Test on top of the blank users
test_sparse = sparse.csr_matrix(test)   # Convert back to sparse

print(train_sparse.get_shape())

(22281, 8657)


In [9]:
# Define the model architecture
model = Sequential([
    Dense(128, activation='relu', input_shape=(data.shape[1],)),
    Dense(64, activation='relu',kernel_regularizer=regularizers.l2(0.01)),
    Dense(data.shape[1], activation='sigmoid')
])


In [12]:
# Split train_data into input and target
train_input = train_data.values
train_target = train_sparse.todense()

# Split test_data into input and target
test_input = test_data.values
test_target = test_sparse.todense()


In [15]:
# Train model
from keras.metrics import Precision, Recall, AUC

# Log tensorboard
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)


metr = (
    precision_at_k,
    recall_at_k
)
loss = (losses.BinaryCrossentropy())
optimizer = optimizers.Adam(learning_rate=0.005)

# Compile the model
model.compile(optimizer=optimizer, loss=loss, metrics=metr)

# Train the model on the training data
model.fit(
    train_input,
    train_target,
    epochs=400,
    batch_size=16,
    validation_data=(test_input, test_target),
    callbacks=[tensorboard_callback],
)


Epoch 1/400


TypeError: in user code:

    File "e:\DS-projects\venv38-64\lib\site-packages\keras\engine\training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\al\AppData\Local\Temp\ipykernel_16244\3869483884.py", line 20, in precision_at_k  *
        true_positives = tf.reduce_sum(tf.cast(tf.multiply(y_true, y_pred_binary), tf.float32), axis=1)

    TypeError: Input 'y' of 'Mul' Op has type int32 that does not match type float32 of argument 'x'.


In [None]:
# Fit the model on the training data
history = model.fit(train_input, train_target, epochs=10, batch_size=32, validation_data=(test_input, test_target))

# Evaluate the model on the test data
test_loss, test_precision, test_recall = model.evaluate(test_input, test_target)

print(f"Test loss: {test_loss:.4f}")
print(f"Test precision at k=10: {test_precision:.4f}")
print(f"Test recall at k=10: {test_recall:.4f}")


In [61]:
# Save model
pickle.dump(model, open('model.pkl', 'wb'))

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers
......dense
.........vars
............0
............1
......dense_1
.........vars
............0
............1
......dense_2
.........vars
............0
............1
...metrics
......auc
.........vars
............0
............1
............2
............3
......mean
.........vars
............0
............1
......precision
.........vars
............0
............1
......recall
.........vars
............0
............1
...vars
Keras model archive saving:
File Name                                             Modified             Size
metadata.json                                  2023-03-14 16:44:10           64
config.json                                    2023-03-14 16:44:10         2955
variables.h5                                   2023-03-14 16:44:10       100312


### Load model

In [62]:
# Load model
pickled_model = pickle.load(open('model.pkl', 'rb'))

Keras model archive loading:
File Name                                             Modified             Size
metadata.json                                  2023-03-14 16:44:10           64
config.json                                    2023-03-14 16:44:10         2955
variables.h5                                   2023-03-14 16:44:10       100312
Keras weights file (<HDF5 file "variables.h5" (mode r)>) loading:
...layers
......dense
.........vars
............0
............1
......dense_1
.........vars
............0
............1
......dense_2
.........vars
............0
............1
...metrics
......auc
.........vars
............0
............1
............2
............3
......mean
.........vars
............0
............1
......precision
.........vars
............0
............1
......recall
.........vars
............0
............1
...vars


### Get recs

In [94]:
user_id = 10
num_recommendations = 5
recommendations = get_recommendations(
    model=model,
    data=data,
    user_id=user_id,
    n=num_recommendations,
    scaler=scaler,
    pca=pca,
    item_titles=games_dict
)


User's games:
- Counter-Strike
- Team Fortress Classic
- Half-Life: Source
- Alien Swarm
- Mare Nostrum
- Iron Warriors: T - 72 Tank Command
- Quake IV
- QUAKE
- QUAKE II
- QUAKE II Mission Pack: The Reckoning
- X: Tension
- 688(I) Hunter/Killer
- Xpand Rally
- Bejeweled 2 Deluxe
- Zuma Deluxe
- Bejeweled Deluxe
- Escape Rosecliff Island
- Zuma's Revenge!
- Garry's Mod
- Silverfall
- Company of Heroes - Legacy Edition
- Indiana Jones® and the Fate of Atlantis™
- Shank
- Vegas: Make It Big™
- Bloodline Champions
- Commandos 2: Men of Courage
- Deus Ex: Game of the Year Edition
- Jade Empire™: Special Edition
- Sid Meier's Railroads!
- Railroad Tycoon II Platinum
- Sam &amp; Max 106: Bright Side of the Moon
- Sam &amp; Max 202: Moai Better Blues
- Sam &amp; Max 204: Chariots of the Dogs
- Telltale Texas Hold ‘Em
- RACE 07: Andy Priaulx Crowne Plaza Raceway (Free DLC)
- Civilization IV: Beyond the Sword
- Freedom Force vs. the Third Reich
- Champions Online
- Haunted House™
- Call of Duty