In [14]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset

In [None]:
class CoffeeDataset(Dataset):
    def __init__(self, interactions, recipes, users):
        
        self.user_map = users.set_index('user_id')[['taste_pref_bitterness', 'taste_pref_sweetness', 'taste_pref_acidity', 'taste_pref_body']].T.to_dict('list')
        self.recipe_map = recipes.set_index('recipe_id')[['taste_bitterness', 'taste_sweetness', 'taste_acidity', 'taste_body']].T.to_dict('list')
        
        valid_interactions = []
        for _, row in interactions.iterrows():
            if row['user_id'] in self.user_map and row['recipe_id'] in self.recipe_map:
                valid_interactions.append(row)
        
        self.data = pd.DataFrame(valid_interactions)
        self.u_ids = self.data['user_id'].values
        self.r_ids = self.data['recipe_id'].values
        
        self.targets = (self.data['rating'].values / 5.0).astype(np.float32)
        self.raw_ratings = self.data['rating'].values.astype(np.float32)

    def __len__(self): return len(self.targets)

    def __getitem__(self, idx):
        u_feat = np.array(self.user_map[self.u_ids[idx]], dtype=np.float32)
        r_feat = np.array(self.recipe_map[self.r_ids[idx]], dtype=np.float32)
        return u_feat, r_feat, self.targets[idx], self.raw_ratings[idx]

In [16]:
class TwoTowerModel(nn.Module):
    def __init__(self, user_dim=4, item_dim=4, embedding_dim=32): 
        super(TwoTowerModel, self).__init__()
        
        self.user_mlp = nn.Sequential(
            nn.Linear(user_dim, 64),
            nn.ReLU(),
            nn.Linear(64, embedding_dim)  
        )
        
        self.item_mlp = nn.Sequential(
            nn.Linear(item_dim, 64),
            nn.ReLU(),
            nn.Linear(64, embedding_dim)
        )

    def forward(self, user_features, item_features):
        user_embedding = self.user_mlp(user_features)
        item_embedding = self.item_mlp(item_features)
        
        score = (user_embedding * item_embedding).sum(dim=1)
        
        return torch.sigmoid(score)


In [17]:
def evaluate_ndcg(model, val_df, recipes, users, k=5):
    model.eval()
    user_ndcgs = []
    
    u_map = users.set_index('user_id')[['taste_pref_bitterness', 'taste_pref_sweetness', 'taste_pref_acidity', 'taste_pref_body']].T.to_dict('list')
    r_map = recipes.set_index('recipe_id')[['taste_bitterness', 'taste_sweetness', 'taste_acidity', 'taste_body']].T.to_dict('list')
    
    valid_val_df = val_df[val_df['user_id'].isin(u_map.keys()) & val_df['recipe_id'].isin(r_map.keys())]
    grouped = valid_val_df.groupby('user_id')
    
    with torch.no_grad():
        for user_id, group in grouped:
            if len(group) < 2: continue
            
            u_feat = torch.tensor([u_map[user_id]] * len(group), dtype=torch.float32)
            r_feat = torch.tensor([r_map[r] for r in group['recipe_id'].values], dtype=torch.float32)
            
            true_ratings = torch.tensor(group['rating'].values, dtype=torch.float32)
            
            preds = model(u_feat, r_feat)
            
            _, indices = torch.sort(preds, descending=True)
            relevance_at_k = true_ratings[indices[:k]]
            
            ideal_relevance, _ = torch.sort(true_ratings, descending=True)
            ideal_relevance = ideal_relevance[:k]
            
            discounts = torch.log2(torch.arange(2, len(relevance_at_k) + 2).float())
            dcg = torch.sum(relevance_at_k / discounts)
            idcg = torch.sum(ideal_relevance / discounts)
            
            ndcg = (dcg / idcg) if idcg > 0 else torch.tensor(0.0)
            user_ndcgs.append(ndcg.item())
            
    return np.mean(user_ndcgs) if user_ndcgs else 0.0

In [18]:

users_df = pd.read_csv('../student_data/users.csv').fillna(0)
recipes_df = pd.read_csv('../student_data/recipes.csv').fillna(0)
interactions_df = pd.read_csv('../student_data/interactions_train.csv').fillna(2.5)
val_csv = pd.read_csv('../student_data/interactions_val.csv').fillna(2.5)

In [19]:
train_df, internal_val = train_test_split(interactions_df, test_size=0.1, random_state=42)
    
train_dataset = CoffeeDataset(train_df, recipes_df, users_df)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [20]:
model = TwoTowerModel(user_dim=4, item_dim=4, embedding_dim=32)
    
criterion = nn.MSELoss() 
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [21]:
print(f"Training MLP on {len(train_df)} samples...")

for epoch in range(20):
    model.train()
    total_loss = 0
    
    for u_feat, r_feat, target, _ in train_loader:
        optimizer.zero_grad()
        logits = model(u_feat, r_feat)
        loss = criterion(logits, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()   
    
    val_ndcg = evaluate_ndcg(model, internal_val, recipes_df, users_df)
    print(f"Epoch {epoch+1} | Loss: {total_loss/len(train_loader):.4f} | Train-Val NDCG: {val_ndcg:.4f}")

Training MLP on 68305 samples...
Epoch 1 | Loss: 0.0342 | Train-Val NDCG: 0.9039
Epoch 2 | Loss: 0.0337 | Train-Val NDCG: 0.9043
Epoch 3 | Loss: 0.0335 | Train-Val NDCG: 0.9062
Epoch 4 | Loss: 0.0333 | Train-Val NDCG: 0.9061
Epoch 5 | Loss: 0.0332 | Train-Val NDCG: 0.9058
Epoch 6 | Loss: 0.0331 | Train-Val NDCG: 0.9051
Epoch 7 | Loss: 0.0331 | Train-Val NDCG: 0.9094
Epoch 8 | Loss: 0.0330 | Train-Val NDCG: 0.9076
Epoch 9 | Loss: 0.0330 | Train-Val NDCG: 0.9109
Epoch 10 | Loss: 0.0329 | Train-Val NDCG: 0.9079
Epoch 11 | Loss: 0.0329 | Train-Val NDCG: 0.9082
Epoch 12 | Loss: 0.0329 | Train-Val NDCG: 0.9077
Epoch 13 | Loss: 0.0328 | Train-Val NDCG: 0.9089
Epoch 14 | Loss: 0.0328 | Train-Val NDCG: 0.9082
Epoch 15 | Loss: 0.0327 | Train-Val NDCG: 0.9084
Epoch 16 | Loss: 0.0327 | Train-Val NDCG: 0.9103
Epoch 17 | Loss: 0.0327 | Train-Val NDCG: 0.9095
Epoch 18 | Loss: 0.0327 | Train-Val NDCG: 0.9101
Epoch 19 | Loss: 0.0326 | Train-Val NDCG: 0.9104
Epoch 20 | Loss: 0.0326 | Train-Val NDCG: 0.9

In [29]:
torch.save(model.state_dict(), '../model_weights.pth')

In [22]:
final_ndcg = evaluate_ndcg(model, val_csv, recipes_df, users_df)
print(f"FINAL NDCG (Provided Val Set): {final_ndcg:.4f}")

FINAL NDCG (Provided Val Set): 0.8173


In [27]:
val_cold_csv = pd.read_csv('../student_data/interactions_val_cold.csv').fillna(0)

final_ndcg = evaluate_ndcg(model, val_cold_csv, recipes_df, users_df)
print(f"FINAL NDCG (Provided Val Cold Set): {final_ndcg:.4f}")

FINAL NDCG (Provided Val Cold Set): 0.6107
