In [6]:
from typing import Tuple, Callable, List

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error
import os
import wandb

from helper import read_data_df, read_data_matrix, evaluate, make_submission

#Seed need to be set for all experiments
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

In [7]:
def training_df_to_tensor() -> torch.Tensor:
    train_df = read_data_df()[0]
    raw_matrix = read_data_matrix(train_df) 
    raw_matrix = np.nan_to_num(raw_matrix, nan=0.0)
    mat = torch.from_numpy(raw_matrix).float()
    return mat 

In [8]:
#class HybridLoss(nn.Module):
    #def __init__(self, max_rating, bpr_weight=0.5):
        #super().__init__()
        #self.max_rating = max_rating
        #self.bpr_weight = bpr_weight

    #def forward(self, preds, ratings, 
                #implicit_pos_preds=None, implicit_neg_preds=None):
        ## 1) explicit normalized‐CE
        #norm_targets = ratings.float() / self.max_rating
        #loss_explicit = F.binary_cross_entropy(preds, norm_targets)

        ## 2) implicit BPR (if we have pos/neg pairs)
        #if implicit_pos_preds is not None:
            ## BPR: −log σ(ŷ_pos − ŷ_neg)
            #diff = implicit_pos_preds - implicit_neg_preds
            #loss_implicit = -torch.log(torch.sigmoid(diff) + 1e-8).mean()
        #else:
            #loss_implicit = 0.0

        #return (1 - self.bpr_weight)*loss_explicit + self.bpr_weight*loss_implicit


class DMFModel(nn.Module):
    """
    Deep Matrix Factorization model with separate MLPs for users and items.
    """
    def __init__(
        self,
        num_user_inputs: int,
        num_item_inputs: int,
        hidden_dims: List[int],
        embedding_dim: int
    ):
        super().__init__()
        # build a little helper to avoid repetition
        def build_mlp(dims: List[int]) -> nn.Sequential:
            layers = []
            for i in range(len(dims) - 1):
                layers.append(nn.Linear(dims[i], dims[i+1]))
                # add ReLU after every layer except the last
                if i < len(dims) - 2:
                    layers.append(nn.ReLU())
            return nn.Sequential(*layers)

        # user‐network: input is a row of length P
        user_dims = [num_user_inputs] + hidden_dims + [embedding_dim]
        self.user_net = build_mlp(user_dims)

        # item‐network: input is a column of length S
        item_dims = [num_item_inputs] + hidden_dims + [embedding_dim]
        self.item_net = build_mlp(item_dims)

    def forward(self, row: torch.Tensor, col: torch.Tensor) -> torch.Tensor:
        # row: (batch,) or (batch, P)
        # col: (batch,) or (batch, S)
        u = self.user_net(row)
        v = self.item_net(col)
        # cosine similarity
        sim = F.cosine_similarity(u, v, dim=-1)
        return sim.clamp(min=1e-6)


class InteractionDataset(Dataset):
    """
    Dataset yielding (i, j, label) triplets for positive and sampled negative interactions.
    """
    def __init__(
        self,
        matrix: torch.Tensor,
        neg_ratio: int = 1
    ):
        # matrix: S x P
        pos = (matrix > 0).nonzero(as_tuple=False)
        neg = (matrix == 0).nonzero(as_tuple=False)
        pos_idx = pos.tolist()
        # sample negatives
        num_neg = len(pos_idx) * neg_ratio
        neg_idx = neg[torch.randperm(len(neg))[:num_neg]].tolist()
        self.interactions = [(i,j, matrix[i,j].item()) for i,j in pos_idx]
        self.interactions += [(i,j, 0.0) for i,j in neg_idx]

    def __len__(self) -> int:
        return len(self.interactions)

    def __getitem__(self, idx: int):
        i, j, r = self.interactions[idx]
        return i, j, r


def train_dmf(
    memorize_model: bool,
    matrix: torch.Tensor,
    hidden_dims: List[int],
    embedding_dim: int,
    use_memorized_model: bool = True,
    neg_ratio: int = 7,
    batch_size: int = 256,
    lr: float = 1e-4,
    epochs: int = 10,
    device: torch.device = torch.device('cpu')
) -> DMFModel:
    """
    Train the DMF model using normalized cross-entropy loss (nce).
    """
    S, P = matrix.size()
    model = DMFModel(
        num_user_inputs = P,
        num_item_inputs = S,
        hidden_dims      = hidden_dims,
        embedding_dim    = embedding_dim
    ).to(device)

    MODEL_PATH = "dmf_weights.pth"
    if os.path.exists(MODEL_PATH) and use_memorized_model:
        model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
        print(" Loaded existing model")
        return model

    wandb.watch(model, log="all", log_freq=100)
    

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    dataset = InteractionDataset(matrix, neg_ratio)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    print(f"Batches per epoch: {len(loader)}")

    max_rating = matrix.max().item()      # e.g. 5.0
    for epoch in range(1, epochs+1):
        running_loss = 0.0
        current_number = 0
        for batch_idx, (i_idxs, j_idxs, ratings) in enumerate(loader, start=1):
            current_number += 1
            print("current number", current_number)
            # gather inputs
            rows = matrix[i_idxs].to(device)
            cols = matrix[:, j_idxs].t().to(device)
            # forward
            # forward
            preds = model(rows, cols)
            # normalize ratings to [0,1] *in float32*
            labels    = ratings.float().to(device)  # force float32
            norm_r    = labels / max_rating
            loss      = F.binary_cross_entropy(preds, norm_r)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * ratings.size(0)

            if batch_idx % 50 == 0:
                wandb.log({
                    "train/batch_loss": loss.item(),
                    "epoch": epoch,
                    "batch_idx": batch_idx
                })

        epoch_loss = running_loss / len(dataset)
        print(f"Epoch {epoch}/{epochs}, Loss: {epoch_loss:.4f}")
        # log epoch-level loss
        wandb.log({
            "train/epoch_loss": epoch_loss,
            "epoch": epoch
        })


    torch.save(model.state_dict(), MODEL_PATH)
    print(f"💾 Model saved to {MODEL_PATH}")

    return model




In [9]:
train_df, valid_df = read_data_df()
train_matrix = read_data_matrix(train_df)
train_matrix = np.nan_to_num(train_matrix, nan=0.0)
train_tensor = torch.from_numpy(train_matrix).float()


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
max_rating = train_matrix.max().item()


wandb.init(
    entity= "louis-barinka-eth-z-rich", 
    project="cil_dmf",
    config={
        "hidden_dims": [128, 128],
        "embedding_dim": 16,
        "neg_ratio": 1,
        "batch_size": 256,
        "lr": 1e-3,
        "epochs": 1
    }
)

config = wandb.config


model = train_dmf(
    matrix         = train_tensor,
    hidden_dims    = config.hidden_dims,
    embedding_dim  = config.embedding_dim,
    neg_ratio      = config.neg_ratio,
    batch_size     = config.batch_size,
    lr             = config.lr,
    epochs         = config.epochs,
    device         = device,
    memorize_model = False,
    use_memorized_model = False 
)


def pred_fn(sids: np.ndarray, pids: np.ndarray) -> np.ndarray:
    # convert to torch
    s = torch.from_numpy(sids).long().to(device)
    p = torch.from_numpy(pids).long().to(device)

    # grab the full row & column vectors for each pair
    # row vectors: train_mat[s] has shape (batch, P)
    rows = train_tensor[s]                      # (N, P)
    # col vectors: train_mat[:, p] is (S, N) so we transpose
    cols = train_tensor[:, p].t().to(device)               # (N, S)

    # forward through your model
    with torch.no_grad():
        preds_norm = model(rows, cols).cpu().numpy()  # in [0,1]

    # rescale back to rating space
    return preds_norm * train_matrix.max().item()


rmse = evaluate(valid_df, pred_fn)
print(f"Validation RMSE: {rmse:.4f}")
wandb.log({"val/rmse": rmse})



[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


Batches per epoch: 6611
current number 1
current number 2
current number 3
current number 4
current number 5
current number 6
current number 7
current number 8
current number 9
current number 10
current number 11
current number 12
current number 13
current number 14
current number 15
current number 16
current number 17
current number 18
current number 19
current number 20
current number 21
current number 22
current number 23
current number 24
current number 25
current number 26
current number 27
current number 28
current number 29
current number 30
current number 31
current number 32
current number 33
current number 34
current number 35
current number 36
current number 37
current number 38
current number 39
current number 40
current number 41
current number 42
current number 43
current number 44
current number 45
current number 46
current number 47
current number 48
current number 49
current number 50
current number 51
current number 52
current number 53
current number 54
current numbe

TODO:
- Extract columns and rows into a dataloader
- create the model