NEURAL COLLABORATIVE FILTERING

In [3]:
import pandas as pd
import tensorflow as tf 
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dot, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.activations import relu, sigmoid

In [4]:
# Load data
df = pd.read_csv('datasets/training_set_recommendations.csv')

df

Unnamed: 0,app_id,hours,user_id
0,975370,0.036362,51580
1,304390,0.011520,2586
2,1085660,0.337073,253880
3,703080,0.027447,259432
4,526870,0.007913,23869
...,...,...,...
15998,260230,0.007012,9417270
15999,329430,0.002003,2465811
16000,633230,0.038065,1893954
16001,341940,0.001002,2465811


In [5]:
# Define the NeuMF Model with Functional API
def build_neumf_model(num_users, num_items, latent_dim_gmf, latent_dim_mlp, mlp_layers, dropout_rate_gmf, dropout_rate_mlp):
    # Inputs
    user_input = Input(shape=(1,), dtype='int32', name='user_input')
    item_input = Input(shape=(1,), dtype='int32', name='item_input')

    # GMF Embeddings
    user_embedding_gmf = Embedding(input_dim=num_users, output_dim=latent_dim_gmf, name='gmf_user_embedding')(user_input)
    item_embedding_gmf = Embedding(input_dim=num_items, output_dim=latent_dim_gmf, name='gmf_item_embedding')(item_input)
    gmf_vector = tf.multiply(user_embedding_gmf, item_embedding_gmf)
    gmf_vector = Flatten()(gmf_vector)
    gmf_vector = Dropout(dropout_rate_gmf)(gmf_vector)

    # MLP Embeddings
    user_embedding_mlp = Embedding(input_dim=num_users, output_dim=latent_dim_mlp, name='mlp_user_embedding')(user_input)
    item_embedding_mlp = Embedding(input_dim=num_items, output_dim=latent_dim_mlp, name='mlp_item_embedding')(item_input)
    mlp_vector = Concatenate()([user_embedding_mlp, item_embedding_mlp])
    mlp_vector = Flatten()(mlp_vector)

    # MLP Layers
    for in_size, out_size in zip(mlp_layers[:-1], mlp_layers[1:]):
        mlp_vector = Dense(out_size, activation='relu')(mlp_vector)
    mlp_vector = Dropout(dropout_rate_mlp)(mlp_vector)

    # Combine GMF and MLP vectors
    combined_vector = Concatenate()([gmf_vector, mlp_vector])
    logits = Dense(1, activation='sigmoid')(combined_vector)

    # Define the model
    model = Model(inputs=[user_input, item_input], outputs=logits)
    return model

In [6]:
# Parameters
df['user_id'], user_id_mapping = pd.factorize(df['user_id'])
df['app_id'], app_id_mapping = pd.factorize(df['app_id'])
n_users = df['user_id'].nunique()   # 2000
n_items = df['app_id'].nunique()    # 3723
embedding_dim_gmf = 10
embedding_dim_mlp = 20

# Parameters for MLP layers
mlp_layers = [embedding_dim_mlp * 2, 128, 64] 
dropout_rate_gmf = 0.2
dropout_rate_mlp = 0.2

# Build and compile the NeuMF model
model = build_neumf_model(
    num_users=n_users,
    num_items=n_items,
    latent_dim_gmf=embedding_dim_gmf,
    latent_dim_mlp=embedding_dim_mlp,
    mlp_layers=mlp_layers,
    dropout_rate_gmf=dropout_rate_gmf,
    dropout_rate_mlp=dropout_rate_mlp
)

In [7]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [8]:
import numpy as np

# Preparing training data
user_indices = np.array(df['user_id'])
item_indices = np.array(df['app_id'])
y_train = np.array(df['hours'])  
y_train_binary = (y_train > 0.5).astype(int) 

In [9]:
# Train the model
history = model.fit(
    [user_indices, item_indices], 
    y_train_binary,
    epochs=10, 
    batch_size=64, 
    validation_split=0.1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
# Evaluate the model
loss, accuracy = model.evaluate([user_indices, item_indices], y_train_binary)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')

Loss: 0.030765559524297714
Accuracy: 0.990939199924469


In [11]:
def recommend_items_for_user(model, user_id, all_item_ids, df, num_recommendations=10):
    # Get all item_ids user hasn't interacted with yet
    interacted_items = df[df['user_id'] == user_id]['app_id'].tolist()
    items_to_predict = [item for item in all_item_ids if item not in interacted_items]
    
    user_array = np.array([user_id] * len(items_to_predict))
    item_array = np.array(items_to_predict)
    
    # Predict the scores for this user and the remaining items
    predictions = model.predict([user_array, item_array])
    
    # Sort the predictions to get the top items
    top_indices = predictions.flatten().argsort()[-num_recommendations:][::-1]
    top_item_ids = item_array[top_indices]
    
    return top_item_ids

all_item_ids = df['app_id'].unique()

# Example: Recommend items for a specific user
user_id = 4  # USER INPUT: Replace with the user_id you want recommendations for
top_recommendations = recommend_items_for_user(model, user_id, all_item_ids, df, num_recommendations=10)

print(f"Top recommended items for user {user_id}: {top_recommendations}")



Top recommended items for user 4: [  77 2711  159 1215 1394   47  307 1332  121   65]
