In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf

data = pd.read_csv('../data/event_attendees_processed.csv')

users = data['user'].unique()
events = data['event'].unique()
user_to_idx = {user: idx for idx, user in enumerate(users)}
event_to_idx = {event: idx for idx, event in enumerate(events)}

data['user_idx'] = data['user'].map(user_to_idx)
data['event_idx'] = data['event'].map(event_to_idx)

train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

In [4]:

class NCF(tf.keras.Model):
    def __init__(self, num_users, num_events, embedding_dim=96):
        super(NCF, self).__init__()
        self.user_embed = tf.keras.layers.Embedding(num_users, embedding_dim)
        self.event_embed = tf.keras.layers.Embedding(num_events, embedding_dim)
        self.mlp = tf.keras.Sequential([
            tf.keras.layers.Dense(128, activation='swish'),
            tf.keras.layers.Dense(256, activation='swish'),
            tf.keras.layers.Dense(128, activation='swish'),
            tf.keras.layers.Dense(1, activation='sigmoid')
        ])

    def call(self, inputs):
        user_idx, event_idx = inputs
        user_emb = self.user_embed(user_idx)
        event_emb = self.event_embed(event_idx)
        concat_emb = tf.concat([user_emb, event_emb], axis=1)
        output = self.mlp(concat_emb)
        return output


num_users = len(users)
num_events = len(events)
model = NCF(num_users, num_events)

model.compile(optimizer='adam', loss='mean_squared_error', metrics=[tf.keras.metrics.RootMeanSquaredError(), 'accuracy'])


2024-04-03 20:31:42.691464: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-03 20:31:42.691629: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-04-03 20:31:42.691703: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

In [5]:
train_user_idx = tf.constant(train_data['user_idx'].values, dtype=tf.int32)
train_event_idx = tf.constant(train_data['event_idx'].values, dtype=tf.int32)
train_target = tf.constant(train_data['weighted_score'].values, dtype=tf.float32)

In [7]:
model.fit([train_user_idx, train_event_idx], train_target, epochs=10, batch_size=512)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f5fb3507220>

In [None]:
val_user_idx = tf.constant(val_data['user_idx'].values, dtype=tf.int32)
val_event_idx = tf.constant(val_data['event_idx'].values, dtype=tf.int32)
val_target = tf.constant(val_data['weighted_score'].values, dtype=tf.float32)

val_loss = model.evaluate([val_user_idx, val_event_idx], val_target)

Validation Loss: [0.13287292420864105, 0.3645173907279968, 0.503816545009613]
