In [177]:
# !pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0  --extra-index-url https://download.pytorch.org/whl/cu126
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from tqdm.rich import tqdm
import warnings
warnings.filterwarnings("ignore")

# Load Dataset

In [179]:
movies_df = pd.read_csv("./movies.dat", 
                        header = None, 
                        sep='::', 
                        names=['item', 'title', 'genre'],
                        engine='python',
                        encoding='latin-1')
movies_df.head()

Unnamed: 0,item,title,genre
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [180]:
movies_df['year'] = movies_df['title'].apply(lambda x: int(x[-5: -1]))
movies_df['name'] = movies_df['title'].apply(lambda x: x[: -7])
movies_df.drop(columns=['title'], inplace=True)
movies_df.head()

Unnamed: 0,item,genre,year,name
0,1,Animation|Children's|Comedy,1995,Toy Story
1,2,Adventure|Children's|Fantasy,1995,Jumanji
2,3,Comedy|Romance,1995,Grumpier Old Men
3,4,Comedy|Drama,1995,Waiting to Exhale
4,5,Comedy,1995,Father of the Bride Part II


In [181]:
genres_raw  = movies_df['genre'].str.split('|', expand=True)
list_genres = []
for i in range(6):
    list_genres += genres_raw[i].tolist()
set_genres = set(list_genres)
set_genres.remove(None)
set_genres

{'Action',
 'Adventure',
 'Animation',
 "Children's",
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Fantasy',
 'Film-Noir',
 'Horror',
 'Musical',
 'Mystery',
 'Romance',
 'Sci-Fi',
 'Thriller',
 'War',
 'Western'}

In [182]:
for genre in set_genres:
    movies_df[genre] = movies_df['genre'].apply(lambda x: 1 if genre in x else 0)
movies_df.drop(columns=['genre'], inplace=True)
movies_df.head()

Unnamed: 0,item,year,name,Adventure,Romance,Action,Documentary,Fantasy,Horror,Musical,...,Thriller,Drama,Sci-Fi,Comedy,War,Crime,Mystery,Western,Children's,Film-Noir
0,1,1995,Toy Story,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,0
1,2,1995,Jumanji,1,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,1,0
2,3,1995,Grumpier Old Men,0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,4,1995,Waiting to Exhale,0,0,0,0,0,0,0,...,0,1,0,1,0,0,0,0,0,0
4,5,1995,Father of the Bride Part II,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0


In [183]:
ratings_df = pd.read_csv("./ratings.dat", 
                        header = None, 
                        sep='::', 
                        names=['user', 'item', 'label', 'timestamp'],
                        engine='python',
                        encoding='latin-1')
ratings_df.head()

Unnamed: 0,user,item,label,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [184]:
users_df = pd.read_csv("./users.dat", 
                       sep='::', 
                       header=None, 
                       names=['user', 'gender', 'age', 'occupation', 'zipcode'], 
                       engine='python', 
                       encoding='latin-1')
users_df.head()

Unnamed: 0,user,gender,age,occupation,zipcode
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [185]:
colab_df = pd.merge(ratings_df, users_df, on='user', how='inner')
colab_df

Unnamed: 0,user,item,label,timestamp,gender,age,occupation,zipcode
0,1,1193,5,978300760,F,1,10,48067
1,1,661,3,978302109,F,1,10,48067
2,1,914,3,978301968,F,1,10,48067
3,1,3408,4,978300275,F,1,10,48067
4,1,2355,5,978824291,F,1,10,48067
...,...,...,...,...,...,...,...,...
1000204,6040,1091,1,956716541,M,25,6,11106
1000205,6040,1094,5,956704887,M,25,6,11106
1000206,6040,562,5,956704746,M,25,6,11106
1000207,6040,1096,4,956715648,M,25,6,11106


In [186]:
merged_df = pd.merge(colab_df, movies_df, on='item', how='inner')
merged_df 

Unnamed: 0,user,item,label,timestamp,gender,age,occupation,zipcode,year,name,...,Thriller,Drama,Sci-Fi,Comedy,War,Crime,Mystery,Western,Children's,Film-Noir
0,1,1193,5,978300760,F,1,10,48067,1975,One Flew Over the Cuckoo's Nest,...,0,1,0,0,0,0,0,0,0,0
1,2,1193,5,978298413,M,56,16,70072,1975,One Flew Over the Cuckoo's Nest,...,0,1,0,0,0,0,0,0,0,0
2,12,1193,4,978220179,M,25,12,32793,1975,One Flew Over the Cuckoo's Nest,...,0,1,0,0,0,0,0,0,0,0
3,15,1193,4,978199279,M,25,7,22903,1975,One Flew Over the Cuckoo's Nest,...,0,1,0,0,0,0,0,0,0,0
4,17,1193,5,978158471,M,50,1,95350,1975,One Flew Over the Cuckoo's Nest,...,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1000204,5949,2198,5,958846401,M,18,17,47901,1998,Modulations,...,0,0,0,0,0,0,0,0,0,0
1000205,5675,2703,3,976029116,M,35,14,30030,1998,Broken Vessels,...,0,1,0,0,0,0,0,0,0,0
1000206,5780,2845,1,958153068,M,18,17,92886,1999,White Boys,...,0,1,0,0,0,0,0,0,0,0
1000207,5851,3607,5,957756608,F,18,20,55410,1973,One Little Indian,...,0,1,0,1,0,0,0,1,0,0


In [187]:
merged_df.columns

Index(['user', 'item', 'label', 'timestamp', 'gender', 'age', 'occupation',
       'zipcode', 'year', 'name', 'Adventure', 'Romance', 'Action',
       'Documentary', 'Fantasy', 'Horror', 'Musical', 'Animation', 'Thriller',
       'Drama', 'Sci-Fi', 'Comedy', 'War', 'Crime', 'Mystery', 'Western',
       'Children's', 'Film-Noir'],
      dtype='object')

In [188]:
list(set_genres)

['Adventure',
 'Romance',
 'Action',
 'Documentary',
 'Fantasy',
 'Horror',
 'Musical',
 'Animation',
 'Thriller',
 'Drama',
 'Sci-Fi',
 'Comedy',
 'War',
 'Crime',
 'Mystery',
 'Western',
 "Children's",
 'Film-Noir']

In [189]:
# import tensorflow as tf

# class DeepFM(tf.keras.Model):
#     """
#     DeepFM: Combines a Factorization Machine (FM) component for low-order
#     feature interactions with a deep neural network for high-order interactions.
#     """
#     def __init__(
#         self,
#         field_dims,
#         embed_dim=16,
#         mlp_dims=(128, 64),
#         dropout_rate=0.2,
#         **kwargs
#     ):
#         super(DeepFM, self).__init__(**kwargs)
#         self.num_fields = len(field_dims)
#         self.embed_dim = embed_dim

#         # Embeddings for each categorical field
#         self.embeddings = [
#             tf.keras.layers.Embedding(input_dim=dim, output_dim=embed_dim)
#             for dim in field_dims
#         ]

#         # First-order linear term
#         self.linear = tf.keras.layers.Dense(1)

#         # MLP for high-order interactions
#         mlp_layers = []
#         input_dim = self.num_fields * embed_dim
#         for dim in mlp_dims:
#             mlp_layers.append(tf.keras.layers.Dense(dim, activation='relu'))
#             mlp_layers.append(tf.keras.layers.Dropout(dropout_rate))
#             input_dim = dim
#         self.mlp = tf.keras.Sequential(mlp_layers)
#         self.fc = tf.keras.layers.Dense(1)

#     def call(self, inputs):
#         # inputs: Tensor of shape (batch_size, num_fields), dtype int32
#         # Embedding lookup
#         embed_list = [
#             self.embeddings[i](inputs[:, i]) for i in range(self.num_fields)
#         ]  # list of (batch, embed_dim)
#         embed_stack = tf.stack(embed_list, axis=1)  # (batch, num_fields, embed_dim)

#         # Linear term (first-order)
#         flat = tf.reshape(embed_stack, [-1, self.num_fields * self.embed_dim])
#         linear_out = self.linear(flat)  # (batch, 1)

#         # FM second-order term
#         summed = tf.reduce_sum(embed_stack, axis=1)             # (batch, embed_dim)
#         summed_sq = tf.square(summed)                           # (batch, embed_dim)
#         sq_embed = tf.square(embed_stack)                       # (batch, num_fields, embed_dim)
#         sq_sum = tf.reduce_sum(sq_embed, axis=1)                # (batch, embed_dim)
#         second_order = 0.5 * tf.reduce_sum(summed_sq - sq_sum, axis=1, keepdims=True)

#         # Deep component
#         deep_out = self.mlp(flat)
#         deep_out = self.fc(deep_out)  # (batch, 1)

#         # Final output
#         logits = linear_out + second_order + deep_out
#         return tf.nn.sigmoid(logits)

# # Example usage:
# field_dims = [1000, 500, 100]  # cardinalities of each categorical feature
# model = DeepFM(field_dims, embed_dim=8, mlp_dims=[64, 32], dropout_rate=0.1)
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['AUC'])

# # Dummy data
# X = train_data.drop('rating', axis=1)
# y = train_data['rating']
# model.fit(train_data, y, epochs=5, batch_size=256)


In [190]:
merged_df['label'].max()

5

In [191]:
merged_df.head()

Unnamed: 0,user,item,label,timestamp,gender,age,occupation,zipcode,year,name,...,Thriller,Drama,Sci-Fi,Comedy,War,Crime,Mystery,Western,Children's,Film-Noir
0,1,1193,5,978300760,F,1,10,48067,1975,One Flew Over the Cuckoo's Nest,...,0,1,0,0,0,0,0,0,0,0
1,2,1193,5,978298413,M,56,16,70072,1975,One Flew Over the Cuckoo's Nest,...,0,1,0,0,0,0,0,0,0,0
2,12,1193,4,978220179,M,25,12,32793,1975,One Flew Over the Cuckoo's Nest,...,0,1,0,0,0,0,0,0,0,0
3,15,1193,4,978199279,M,25,7,22903,1975,One Flew Over the Cuckoo's Nest,...,0,1,0,0,0,0,0,0,0,0
4,17,1193,5,978158471,M,50,1,95350,1975,One Flew Over the Cuckoo's Nest,...,0,1,0,0,0,0,0,0,0,0


In [192]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score
from sklearn.metrics import mean_squared_error, mean_absolute_error

# 1. Assume merged_df is your full DataFrame with interactions
#    and a binary 'label' column (0/1) or rating column.
#    Also define your feature columns:
user_col    = ["user"]
item_col    = ["item"]
sparse_cols = ["gender", "occupation", "zipcode"]  # example sparse
dense_cols  = ["age", "year"]                     # example dense
label_col   = "label"  # or 'rating' if doing regression

# 2. Encode categorical fields to integer indices
for col in user_col + item_col + sparse_cols:
    le = LabelEncoder()
    merged_df[col] = le.fit_transform(merged_df[col].astype(str))

# 3. Build field dimensions and data arrays
cat_cols   = user_col + item_col + sparse_cols
field_dims = [merged_df[c].nunique() for c in cat_cols]
dense_dim  = len(dense_cols)

X_cat = merged_df[cat_cols].values.astype(np.int64)   # (n_samples, n_cat_fields)
X_den = merged_df[dense_cols].values.astype(np.float32)
y      = merged_df[label_col].values.astype(np.int64)

# 4. Train/Validation split (e.g. 80/20)
from sklearn.model_selection import train_test_split
Xc_train, Xc_val, Xd_train, Xd_val, y_train, y_val = train_test_split(
    X_cat, X_den, y, test_size=0.2, random_state=42
)

# 5. PyTorch Dataset and DataLoader
data_train = torch.utils.data.TensorDataset(
    torch.from_numpy(Xc_train), torch.from_numpy(Xd_train), torch.from_numpy(y_train)
)
data_val = torch.utils.data.TensorDataset(
    torch.from_numpy(Xc_val),   torch.from_numpy(Xd_val),   torch.from_numpy(y_val)
)
train_loader = DataLoader(data_train, batch_size=256, shuffle=True, num_workers=4, pin_memory=True, )
val_loader   = DataLoader(data_val,   batch_size=256, num_workers=4, pin_memory=True,)



# Model Definition

In [194]:
# 6. DeepFM model definition
class DeepFM(nn.Module):
    def __init__(self, field_dims, dense_dim, embed_dim=16, mlp_dims=[128, 64], dropout=0.2):
        super().__init__()
        self.num_fields = len(field_dims)
        self.embed_dim  = embed_dim
        # Embedding layers for FM 1st & 2nd order\         
        self.linear_emb = nn.ModuleList([nn.Embedding(fd, 1) for fd in field_dims])
        self.embeddings = nn.ModuleList([nn.Embedding(fd, embed_dim) for fd in field_dims])
        # Linear for dense features
        self.dense_lin = nn.Linear(dense_dim, 1)
        # MLP for deep part
        input_dim = self.num_fields * embed_dim + dense_dim
        layers = []
        for dim in mlp_dims:
            layers += [nn.Linear(input_dim, dim), nn.ReLU(), nn.Dropout(dropout)]
            input_dim = dim
        self.mlp = nn.Sequential(*layers)
        n_classes = 5
        self.fc = nn.Linear(input_dim, 1)

    def forward(self, x_cat, x_den):
        # Linear term: sum of linear embeddings + dense linear
        lin_terms = [emb(x_cat[:, i]).squeeze(1) for i, emb in enumerate(self.linear_emb)]
        lin_cat = torch.stack(lin_terms, dim=1).sum(dim=1, keepdim=True)
        lin_den = self.dense_lin(x_den)
        linear = lin_cat + lin_den

        # FM 2nd-order term
        embeds = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        stack = torch.stack(embeds, dim=1)  # (batch, num_fields, embed_dim)
        sum_emb = stack.sum(dim=1)
        sum_emb_sq = sum_emb * sum_emb
        sq_emb = stack * stack
        sq_sum = sq_emb.sum(dim=1)
        fm = 0.5 * (sum_emb_sq - sq_sum).sum(dim=1, keepdim=True)

        # Deep part
        deep_in = torch.cat([stack.view(x_cat.size(0), -1), x_den], dim=1)
        deep_h = self.mlp(deep_in)
        deep_o = self.fc(deep_h)

        # Output
        out = linear + fm + deep_o
        return (linear + fm + deep_o).squeeze(1)




In [195]:
class cDeepFM(nn.Module):
    def __init__(
        self,
        field_dims,
        dense_dim,
        embed_dim=16,
        mlp_dims=[128, 64],
        dropout=0.2,
        compression_dim=32,
    ):
        super().__init__()
        self.num_fields = len(field_dims)
        self.embed_dim  = embed_dim
        # FM embeddings
        self.linear_emb = nn.ModuleList([nn.Embedding(fd, 1) for fd in field_dims])
        self.embeddings = nn.ModuleList([nn.Embedding(fd, embed_dim) for fd in field_dims])
        # Dense linear
        self.dense_lin = nn.Linear(dense_dim, 1)
        # MLP for deep features
        input_dim = self.num_fields * embed_dim + dense_dim
        layers = []
        for dim in mlp_dims:
            layers += [nn.Linear(input_dim, dim), nn.ReLU(), nn.Dropout(dropout)]
            input_dim = dim
        self.mlp = nn.Sequential(*layers)
        # Compression: combine linear, fm, and deep into a smaller vector
        last_dim = mlp_dims[-1] if isinstance(mlp_dims, (list, tuple)) else mlp_dims
        comp_in = 2 + last_dim  # linear scalar + fm scalar + deep vector length
        self.compression = nn.Sequential(
            nn.Linear(comp_in, compression_dim),
            nn.ReLU()
        )
        # Final output layer
        n_classes = 6
        self.fc = nn.Linear(compression_dim, n_classes)
        # self.fc = nn.Linear(compression_dim, 1)

    def forward(self, x_cat, x_den):
        # 1) Linear term
        lin_sparse = [emb(x_cat[:, i]).squeeze(1) for i, emb in enumerate(self.linear_emb)]
        lin_cat = torch.stack(lin_sparse, dim=1).sum(dim=1, keepdim=True)
        lin_den = self.dense_lin(x_den)
        linear = lin_cat + lin_den

        # 2) FM 2nd-order term
        embeds = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        stack = torch.stack(embeds, dim=1)
        sum_emb = stack.sum(dim=1)
        sum_sq  = sum_emb * sum_emb
        sq_sum   = (stack * stack).sum(dim=1)
        fm = 0.5 * (sum_sq - sq_sum).sum(dim=1, keepdim=True)

        # 3) Deep part
        deep_in = torch.cat([stack.view(x_cat.size(0), -1), x_den], dim=1)
        deep_h  = self.mlp(deep_in)

        # 4) Compression + output
        concat_out = torch.cat([linear, fm, deep_h], dim=1)
        comp = self.compression(concat_out)
        out = self.fc(comp)
        return out




# Training

In [204]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [208]:
# 7. Training loop
import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
from sklearn.metrics import roc_auc_score, accuracy_score
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = DeepFM(field_dims, dense_dim, embed_dim=16, mlp_dims=[128,64], dropout=0.2).to(device)

# optimizer = optim.Adam(model.parameters(), lr=1e-3)
# criterion = nn.BCEWithLogitsLoss()

model_list = [DeepFM, cDeepFM]
model_list = [ DeepFM]
for model_class in model_list:
    print("Starting", model_class.__name__)
    # 1) Instantiate model and optimizer *inside* the loop
    if model_class is DeepFM:
        model = DeepFM(field_dims, dense_dim,
                       embed_dim=16, mlp_dims=[128,64], dropout=0.2)
    else:
        model = cDeepFM(field_dims, dense_dim,
                        embed_dim=16, mlp_dims=[128,64],
                        dropout=0.2, compression_dim=32)
    model.to(device)

    # Regression loss & optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    for epoch in range(1, 6):  # e.g. 5 epochs
        # — Training —
        model.train()
        total_loss = 0.0
        for xc, xd, yb in train_loader:
            xc = xc.to(device)
            xd = xd.to(device)
            yb = yb.to(device)  # floats 1–5

            optimizer.zero_grad()
            preds = model(xc, xd)  # (batch,)
            loss  = criterion(preds, yb)
            loss.backward()
            optimizer.step()

            total_loss += loss.detach().item() * yb.size(0)

        train_mse = total_loss / len(train_loader.dataset)

        # # — Validation —
        # model.eval()
        # all_preds, all_y = [], []
        # with torch.no_grad():
        #     for xc, xd, yb in val_loader:
        #         xc = xc.to(device, non_blocking=True)
        #         xd = xd.to(device, non_blocking=True)
        #         yb = yb.to(device, non_blocking=True)

        #         preds = model(xc, xd)
        #         all_preds.append(preds.cpu().numpy())
        #         all_y.append(yb.cpu().numpy())

        # all_preds = np.concatenate(all_preds)
        # all_y     = np.concatenate(all_y)

        # # regression metrics
        # rmse = np.sqrt(mean_squared_error(all_y, all_preds))
        # mae  = mean_absolute_error(all_y, all_preds)

        # print(
        #     f"Epoch {epoch}: "
        #     f"train_mse={train_mse:.4f}, "
        #     f"val_rmse={rmse:.4f}, "
        #     f"val_mae={mae:.4f}"
        # )

Starting DeepFM


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
