In [0]:
# ==============================================================================
# 1. IMPORT LIBRATIES
# ==============================================================================

import numpy as np
import pandas as pd
from pyspark.sql import DataFrame
from pyspark.sql import SparkSession, functions as F
from pyspark.sql.functions import lit
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import mlflow
from mlflow.models.signature import ModelSignature, Schema, ColSpec
import pickle

In [0]:
# ==============================================================================
# 2. CONFIGURATION AND UTILS
# ==============================================================================

pg_host = dbutils.secrets.get(scope="postgres-secrets", key="host")
pg_port = dbutils.secrets.get(scope="postgres-secrets", key="port")
pg_database = dbutils.secrets.get(scope="postgres-secrets", key="database")
pg_username = dbutils.secrets.get(scope="postgres-secrets", key="username")
pg_password = dbutils.secrets.get(scope="postgres-secrets", key="password")

jdbc_url = f"jdbc:postgresql://{pg_host}:{pg_port}/{pg_database}"
CONNECTION_PROPERTIES = {
    "user": pg_username,
    "password": pg_password,
    "driver": "org.postgresql.Driver"
}

# --- Target Table Name ---
INPUT_TABLE_NAME = "retail_feature_engg_done"
OUTPUT_TABLE_NAME = "two_tower_trained_model_metadata"

#Embedding dimensions
embedding_dim = 64


In [0]:
# ==============================================================================
# 3. READ AND CONVERT TO PANDAS
# ==============================================================================

def read_and_convert_data(table_name: str, spark: SparkSession) -> pd.DataFrame:
    """
    Reads data from PostgreSQL using PySpark and converts it to a 
    single Pandas DataFrame for in-memory processing/training.
    """
    print(f"Reading data from PostgreSQL table: {table_name}...")
    
    # 1. Read using Spark
    spark_df = spark.read.jdbc(url=jdbc_url, table=table_name, properties=CONNECTION_PROPERTIES)
    
    # 2. Convert to Pandas - NECESSARY STEP FOR YOUR PYTORCH/PANDAS CODE
    # WARNING: This step can fail if the data is too large for a single node's memory.
    df_pandas = spark_df.toPandas()
    
    print(f"Read and converted {len(df_pandas):,} records to Pandas.")
    return df_pandas

In [0]:
# ==============================================================================
# 4. MODEl/FEATURE CONFIGURATION
# ==============================================================================

# User Numerical Features
USER_NUM_BASE = [
    'total_payment_value_mean', 'total_payment_value_sum',
    'item_price_mean', 'item_price_median', 'item_price_max', 'item_price_sum',
    'days_since_last_purchase', 
    'order_id_nunique', 'product_category_nunique', 'product_id_nunique',
    'product_weight_kg_mean'
]

# Items Numerical Features
ITEMS_NUM_BASE = [
    'item_price_mean', 'item_price_median', 'item_price_max', 'item_price_min', 
    'item_price_std', 'item_price_sum', 
    'product_weight_kg_mean',
    'unique_orders', 'unique_customers', 'order_count',
    'days_since_last_sale']

# User Categorical Features
USER_CAT_BASE = [
    'customer_key_int' 
]

# Items Categorical Features
ITEMS_CAT_BASE = [
    'product_id_int', 
    'product_category_mode_int', 
    'seller_id_mode_int'
]


def prepare_two_tower_data(df: pd.DataFrame):
    """
    Performs all Pandas feature engineering, scaling, and prepares PyTorch DataLoaders.
    """
    # 1. Initial Cleaning and Date Conversion
    df['purchase_datetime'] = pd.to_datetime(df['purchase_datetime'])

    # 2. User Features Aggregation and Flattening
    user_features = df.groupby("customer_key").agg({
        "total_payment_value":["mean","sum"],
        "review_score":"mean",
        "order_id":"nunique",
        "product_category":"nunique",
        "product_id":"nunique",
        "product_weight_kg":"mean",
        "purchase_time_segment_late_night":"mean",
        "purchase_time_segment_morning":"mean",
        "purchase_time_segment_afternoon":"mean",
        "item_price":["mean", "median", "max", "sum"],
        "purchase_time_segment_evening":"mean"}).reset_index()
    
    user_features.columns = [f"{col[0]}_{col[1]}" if isinstance(col,tuple) and col[1] else col[0] for col in user_features.columns]
    
    # 3. User Recency
    reference_date_user_feat_recency = df['purchase_datetime'].max()
    last_purchase = df.groupby("customer_key")["purchase_datetime"].max().reset_index().rename(columns={"purchase_datetime":"last_purchase_datetime"})
    last_purchase["days_since_last_purchase"] = ((reference_date_user_feat_recency - last_purchase["last_purchase_datetime"]).dt.days)
    user_features = user_features.merge(last_purchase[["customer_key","days_since_last_purchase"]], on="customer_key", how="left")
    
    # 4. Item Features Aggregation
    items_features = df.groupby("product_id").agg(
        product_category_mode = ("product_category", lambda x: x.mode()[0]),
        seller_id_mode = ("seller_id", lambda x: x.mode()[0]),
        item_price_mean = ("item_price", "mean"),
        item_price_median = ("item_price", "median"),
        item_price_max = ("item_price", "max"),
        item_price_min = ("item_price", "min"),
        item_price_std = ("item_price", "std"),
        item_price_sum = ("item_price", "sum"),
        product_weight_kg_mean = ("product_weight_kg", "mean"),
        review_score_mean = ("review_score", "mean"),
        unique_orders = ("order_id", "nunique"),
        unique_customers = ("customer_key", "nunique"),
        order_count = ("order_id", "size"),
        last_purchase = ("purchase_datetime", "max")
    ).reset_index()

    # 5. Item Recency
    reference_date_items_feat_recency = df['purchase_datetime'].max()
    last_sale = df.groupby("product_id")["purchase_datetime"].max().reset_index().rename(columns={"purchase_datetime":"last_sale_datetime"})
    last_sale["days_since_last_sale"] = ((reference_date_items_feat_recency - last_sale["last_sale_datetime"]).dt.days)
    items_features = items_features.merge(last_sale[["product_id","days_since_last_sale"]], on="product_id", how="left")
    
    # 6. Categorical Indexing - DO THIS BEFORE THE TRAIN/TEST SPLIT
    user_features["customer_key_int"] = user_features["customer_key"].astype('category').cat.codes
    items_features["product_id_int"] = items_features["product_id"].astype('category').cat.codes
    items_features["product_category_mode_int"] = items_features["product_category_mode"].astype('category').cat.codes
    items_features["seller_id_mode_int"] = items_features["seller_id_mode"].astype('category').cat.codes
    
    # 7. Scaling
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    user_features[USER_NUM_BASE] = scaler.fit_transform(user_features[USER_NUM_BASE])
    
    scaler_items = StandardScaler()
    items_features[ITEMS_NUM_BASE] = scaler_items.fit_transform(items_features[ITEMS_NUM_BASE]) 
    
    # 8. Final Merge 
    user_cat_cols = USER_CAT_BASE
    items_cat_cols = ITEMS_CAT_BASE

    user_cols_to_merge_base = ['customer_key'] + user_cat_cols + USER_NUM_BASE
    items_cols_to_merge_base = ['product_id'] + items_cat_cols + ITEMS_NUM_BASE 

    user_features_to_add = user_features[user_cols_to_merge_base].copy()
    user_num_rename_map = {col: f"{col}_user" for col in USER_NUM_BASE}
    user_features_to_add.rename(columns=user_num_rename_map, inplace=True)

    items_features_to_add = items_features[items_cols_to_merge_base].copy()
    item_num_rename_map = {col: f"{col}_items" for col in ITEMS_NUM_BASE} 
    items_features_to_add.rename(columns=item_num_rename_map, inplace=True)

    interaction_data = df[['customer_key', 'product_id']].copy()
    interaction_data.drop_duplicates(inplace=True)

    interaction_data = interaction_data.merge(
        user_features_to_add, 
        on="customer_key", 
        how="left"
    ) 

    interaction_data = interaction_data.merge(
        items_features_to_add, 
        on="product_id", 
        how="left"
    )
    
    user_num_cols_final = list(user_num_rename_map.values())
    item_num_cols_final = list(item_num_rename_map.values())
    
    interaction_data = interaction_data.drop(columns = ['customer_key', 'product_id']) 
    interaction_data['target'] = 1.0

    # 9. Final Cleanup
    interaction_data['item_price_std_items'] = interaction_data['item_price_std_items'].fillna(
        interaction_data['item_price_std_items'].median()
    )
    
    # CRITICAL: Calculate cat_dims on the FULL interaction_data BEFORE split
    cat_dims = {col: interaction_data[col].nunique() for col in user_cat_cols + items_cat_cols}
    user_cat_dims_map = {col : cat_dims[col] for col in user_cat_cols}
    items_cat_dims_map = {col : cat_dims[col] for col in items_cat_cols}
    
    # Now split
    from torch.utils.data import DataLoader
    from sklearn.model_selection import train_test_split
    
    df_train, df_test = train_test_split(interaction_data, test_size = 0.2, random_state = 142)

    # Create DataLoaders
    train_loader = DataLoader(
        TwoTowerDataset(
            df_train, 
            user_num_cols=user_num_cols_final, 
            items_num_cols=item_num_cols_final, 
            user_cat_cols=user_cat_cols, 
            items_cat_cols=items_cat_cols
        ), 
        batch_size=256, 
        shuffle=True
    )
    test_loader = DataLoader(
        TwoTowerDataset(
            df_test, 
            user_num_cols=user_num_cols_final, 
            items_num_cols=item_num_cols_final, 
            user_cat_cols=user_cat_cols, 
            items_cat_cols=items_cat_cols
        ), 
        batch_size=256, 
        shuffle=True
    )
    
    print(f"Final training samples: {len(df_train):,}, Test samples: {len(df_test):,}")
    
    return train_loader, test_loader, user_cat_dims_map, items_cat_dims_map, scaler, scaler_items

In [0]:
# ==============================================================================
# 5. CLASS TWO TOWER DATASET
# ==============================================================================
import torch
from torch.utils.data import Dataset # Ensure Dataset is imported

class TwoTowerDataset(Dataset):
    """ Custom Dataset to handle user and item features separately. """
    
    def __init__(
        self, 
        df, 
        user_num_cols, 
        items_num_cols, 
        user_cat_cols, 
        items_cat_cols
    ):
        # 1. Store the column lists as instance attributes
        self.user_num_cols = user_num_cols
        self.items_num_cols = items_num_cols
        self.user_cat_cols = user_cat_cols
        self.items_cat_cols = items_cat_cols

        # 2. Convert to PyTorch Tensors
        self.user_num = torch.tensor(df[self.user_num_cols].values, dtype=torch.float32)
        self.items_num = torch.tensor(df[self.items_num_cols].values, dtype=torch.float32)
        self.user_cat = torch.tensor(df[self.user_cat_cols].values, dtype=torch.long)
        self.items_cat = torch.tensor(df[self.items_cat_cols].values, dtype=torch.long)
        self.target = torch.tensor(df['target'].values, dtype=torch.float32).unsqueeze(1)

    def __len__(self):
        return len(self.target)

    def __getitem__(self, idx):
        return (
            (self.user_num[idx], self.user_cat[idx]),
            (self.items_num[idx], self.items_cat[idx]),
            self.target[idx]
        )

In [0]:
# ==============================================================================
# 6. CLASS TOWER
# ==============================================================================

class Tower(nn.Module):
    """ A single Tower network, used for both User and Item feature vectors. """
    def __init__(self, num_dim, cat_dims_map, output_dim=embedding_dim):
        super().__init__()
        self.cat_embeddings = nn.ModuleList()
        self.total_cat_emb_dim = 0
        
        for name, num_embeddings in cat_dims_map.items():
            emb_dim = max(10, min(50, (num_embeddings // 2) + 1))
            self.cat_embeddings.append(nn.Embedding(num_embeddings, emb_dim))
            self.total_cat_emb_dim += emb_dim
            
        input_dim = num_dim + self.total_cat_emb_dim
        
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, input_dim * 2),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(input_dim * 2, input_dim),
            nn.ReLU(),
            nn.Linear(input_dim, output_dim)
        )

    def forward(self, num_features, cat_features):
        cat_emb_outputs = []
        for i, embedding_layer in enumerate(self.cat_embeddings):
            if cat_features.dim() == 1:
                cat_input = cat_features
            else:
                cat_input = cat_features[:, i]
            cat_emb_outputs.append(embedding_layer(cat_input))
            
        if cat_emb_outputs:
            cat_features_combined = torch.cat(cat_emb_outputs, dim=1)
            combined_features = torch.cat([num_features, cat_features_combined], dim=1)
        else:
            combined_features = num_features

        embedding = self.mlp(combined_features)
        return embedding




In [0]:
# ==============================================================================
# 7. CLASS TWO TOWER MODEL
# ==============================================================================
import torch.nn as nn
import torch.nn.functional as F

class TwoTowerModel(nn.Module):
    """ The main Two-Tower Model. """

    
    def __init__(
        self, 
        user_cat_dims_map, 
        items_cat_dims_map,
        user_num_count,      
        item_num_count,      
        user_cat_cols_list,  
        items_cat_cols_list  
    ):
        super().__init__()
        
        # 1. User Tower Initialization
        self.user_tower = Tower(
            num_dim = user_num_count,
            cat_dims_map = {k : user_cat_dims_map[k] for k in user_cat_cols_list} 
        )
        
        # 2. Items Tower Initialization
        self.items_tower = Tower(
            num_dim = item_num_count,
            cat_dims_map = {k : items_cat_dims_map[k] for k in items_cat_cols_list}
        )
        
        # Ensuring the output embedding dimensions match
        assert self.user_tower.mlp[-1].out_features == self.items_tower.mlp[-1].out_features
        
    # The forward method
    def forward(self, user_inputs, items_inputs):
        user_num, user_cat = user_inputs
        items_num, items_cat = items_inputs

        user_embedding = self.user_tower(user_num, user_cat)
        items_embedding = self.items_tower(items_num, items_cat)

        user_embedding = F.normalize(user_embedding, p=2, dim=1)
        items_embedding = F.normalize(items_embedding, p=2, dim=1)

        score_matrix = torch.matmul(user_embedding, items_embedding.T)
        return score_matrix, user_embedding, items_embedding

In [0]:
# ==============================================================================
# 8. TRAIN TWO TOWER MODEL
# ==============================================================================

def train_model(model, train_loader, criterion, optimizer, epochs=5):
    """ Training loop with In-Batch Negative Sampling. """

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    print(f"\n--- Starting Training (In-Batch Negative Sampling) on {device} ---")

    epoch_losses = []

    for epoch in range(epochs):
        model.train()
        total_loss = 0.0

        for user_inputs, items_inputs, targets in train_loader:
            user_num, user_cat = user_inputs
            items_num, items_cat = items_inputs

            user_inputs_device  = (user_num.to(device), user_cat.to(device))
            items_inputs_device = (items_num.to(device), items_cat.to(device))

            score_matrix, _, _ = model(user_inputs_device, items_inputs_device)
            targets = torch.arange(score_matrix.shape[0]).to(device)

            optimizer.zero_grad()
            loss = criterion(score_matrix, targets)

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        epoch_losses.append(avg_loss)
        print(f"Epoch {epoch+1}/{epochs}, Loss (CrossEntropy): {avg_loss:.4f}")

    print("--- Training Complete ---")
    return epoch_losses

In [0]:
# ==============================================================================
# 9. EVALUATE TWO TOWER MODEL
# ==============================================================================

def evaluate_model(model, test_loader, k_values=[1, 5, 10]):
    """ Evaluate the Two Tower model using standard recommendation metrics. """
    model.eval()
    device = next(model.parameters()).device
    
    all_hits = {k: [] for k in k_values}
    all_reciprocal_ranks = []
    all_ndcg = {k: [] for k in k_values}
    
    # (Implementation detail: The remaining evaluation logic is unchanged)
    with torch.no_grad():
        for user_inputs, item_inputs, _ in test_loader:
            user_num, user_cat = user_inputs
            items_num, items_cat = item_inputs
            
            user_num = user_num.to(device)
            user_cat = user_cat.to(device)
            items_num = items_num.to(device)
            items_cat = items_cat.to(device)
            
            user_emb = model.user_tower(user_num, user_cat)
            items_emb = model.items_tower(items_num, items_cat)
            
            user_emb = F.normalize(user_emb, p=2, dim=1)
            items_emb = F.normalize(items_emb, p=2, dim=1)
            
            score_matrix = torch.matmul(user_emb, items_emb.T)
            
            batch_size = score_matrix.shape[0]
            _, indices = torch.sort(score_matrix, dim=1, descending=True)
            
            for i in range(batch_size):
                ranking = indices[i].cpu().numpy()
                correct_item_rank = np.where(ranking == i)[0][0] + 1
                
                for k in k_values:
                    hit = 1 if correct_item_rank <= k else 0
                    all_hits[k].append(hit)
                
                all_reciprocal_ranks.append(1.0 / correct_item_rank)
                
                for k in k_values:
                    if correct_item_rank <= k:
                        dcg = 1.0 / np.log2(correct_item_rank + 1)
                        idcg = 1.0 / np.log2(2)
                        ndcg = dcg / idcg
                    else:
                        ndcg = 0.0
                    all_ndcg[k].append(ndcg)
    
    results = {}
    for k in k_values:
        results[f'HR@{k}'] = np.mean(all_hits[k])
        results[f'NDCG@{k}'] = np.mean(all_ndcg[k])
    results['MRR'] = np.mean(all_reciprocal_ranks)
    
    return results


In [0]:
# ==============================================================================
# 10. EXECUTION BLOCK WITH MULTI-SEED TRAINING
# ==============================================================================

import random
import numpy as np
import torch

def set_seed(seed=42):
    """Set seeds for reproducibility"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Try multiple seeds and keep the best model
SEEDS_TO_TRY = [42, 123, 456, 789, 2024]
best_hr10 = 0
best_model_state = None
best_seed = None
all_results = []

print("=" * 70)
print("TRAINING WITH MULTIPLE RANDOM SEEDS")
print("=" * 70)

for seed_idx, seed in enumerate(SEEDS_TO_TRY):
    print(f"\n{'='*70}")
    print(f"SEED {seed_idx + 1}/{len(SEEDS_TO_TRY)}: {seed}")
    print(f"{'='*70}")
    
    set_seed(seed)
    
    with mlflow.start_run(run_name=f"Two-Tower-Seed-{seed}") as run:
        
        print(f"--- Starting Two-Tower Model Pipeline (Seed: {seed}) ---")
        
        # 1. Read Data (only once, outside the seed loop would be better)
        if seed_idx == 0:  # Read data only on first iteration
            raw_df_pandas = read_and_convert_data(INPUT_TABLE_NAME, spark)
            train_loader, test_loader, user_cat_dims_map, items_cat_dims_map, user_scaler, item_scaler = prepare_two_tower_data(raw_df_pandas)
            
            user_num_cols = train_loader.dataset.user_num_cols
            items_num_cols = train_loader.dataset.items_num_cols
            user_cat_cols = train_loader.dataset.user_cat_cols
            items_cat_cols = train_loader.dataset.items_cat_cols
            
            user_num_count = len(user_num_cols)
            items_num_count = len(items_num_cols)
        
        # Log parameters
        mlflow.log_params({
            "random_seed": seed,
            "epochs": 10,
            "batch_size": 256,
            "learning_rate": 1e-3,
            "embedding_dim": embedding_dim,
            "user_cat_count": len(user_cat_dims_map),
            "item_cat_count": len(items_cat_dims_map)
        })
        
        # 2. Initialize Model (fresh initialization with new seed)
        model = TwoTowerModel(
            user_cat_dims_map = user_cat_dims_map,
            items_cat_dims_map = items_cat_dims_map,
            user_num_count     = user_num_count,      
            item_num_count     = items_num_count,     
            user_cat_cols_list = user_cat_cols,       
            items_cat_cols_list= items_cat_cols       
        )
        
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=1e-3)
        
        # 3. Train
        epoch_losses = train_model(model, train_loader, criterion, optimizer, epochs=10)
        
        # Log losses
        for i, loss in enumerate(epoch_losses):
            mlflow.log_metric("train_loss", loss, step=i+1)
        
        # 4. Evaluate
        model.eval()
        metrics = evaluate_model(model, test_loader, k_values=[1, 5, 10, 20])
        mlflow.log_metrics(metrics)
        
        # Store results
        all_results.append({
            'seed': seed,
            'metrics': metrics,
            'model_state': model.state_dict().copy()
        })
        
        print(f"\n📊 Results for Seed {seed}:")
        print(f"  HR@1:  {metrics['HR@1']:.4f}")
        print(f"  HR@10: {metrics['HR@10']:.4f}")
        print(f"  MRR:   {metrics['MRR']:.4f}")
        
        # Track best model
        if metrics['HR@10'] > best_hr10:
            best_hr10 = metrics['HR@10']
            best_model_state = model.state_dict().copy()
            best_seed = seed
            print(f"  ✅ NEW BEST MODEL! (HR@10: {best_hr10:.4f})")

# ==============================================================================
# SUMMARY AND SAVE BEST MODEL
# ==============================================================================

print("\n" + "=" * 70)
print("MULTI-SEED TRAINING SUMMARY")
print("=" * 70)

# Calculate statistics
hr1_scores = [r['metrics']['HR@1'] for r in all_results]
hr10_scores = [r['metrics']['HR@10'] for r in all_results]
mrr_scores = [r['metrics']['MRR'] for r in all_results]

print(f"\n📈 Performance Statistics Across {len(SEEDS_TO_TRY)} Seeds:")
print(f"  HR@1:  {np.mean(hr1_scores):.4f} ± {np.std(hr1_scores):.4f}")
print(f"  HR@10: {np.mean(hr10_scores):.4f} ± {np.std(hr10_scores):.4f}")
print(f"  MRR:   {np.mean(mrr_scores):.4f} ± {np.std(mrr_scores):.4f}")

print(f"\n🏆 Best Model:")
print(f"  Seed:  {best_seed}")
print(f"  HR@10: {best_hr10:.4f}")

# Load best model and save it
model.load_state_dict(best_model_state)

# Save best model to MLflow
with mlflow.start_run(run_name=f"Two-Tower-BEST-Seed-{best_seed}") as best_run:
    
    mlflow.log_params({
        "random_seed": best_seed,
        "epochs": 10,
        "batch_size": 256,
        "learning_rate": 1e-3,
        "embedding_dim": embedding_dim,
        "user_cat_count": len(user_cat_dims_map),
        "item_cat_count": len(items_cat_dims_map),
        "selection_method": "best_of_5_seeds"
    })
    
    # Log best model's metrics
    best_metrics = [r['metrics'] for r in all_results if r['seed'] == best_seed][0]
    mlflow.log_metrics(best_metrics)
    
    # Create signature for best model
    try:
        sample_batch = next(iter(train_loader))
        user_inputs, items_inputs, _ = sample_batch
        user_num, user_cat = user_inputs
        items_num, items_cat = items_inputs
        
        input_example_np = {
            "user_num": user_num[:1].cpu().numpy(),
            "user_cat": user_cat[:1].cpu().numpy(),
            "items_num": items_num[:1].cpu().numpy(),
            "items_cat": items_cat[:1].cpu().numpy(),
        }
        
        device = next(model.parameters()).device
        user_inputs_device = (user_num[:1].to(device), user_cat[:1].to(device))
        items_inputs_device = (items_num[:1].to(device), items_cat[:1].to(device))
        
        with torch.no_grad():
            score_matrix_out, user_emb_out, items_emb_out = model(user_inputs_device, items_inputs_device)
        
        input_schema = Schema([
            ColSpec("double", "user_num"),
            ColSpec("long", "user_cat"),
            ColSpec("double", "items_num"),
            ColSpec("long", "items_cat"),
        ])
        
        output_schema = Schema([
            ColSpec("double", "score_matrix"),
            ColSpec("double", "user_embedding"),
            ColSpec("double", "item_embedding"),
        ])
        
        model_signature = ModelSignature(inputs=input_schema, outputs=output_schema)
        
    except Exception as e:
        print(f"ERROR: Failed to create signature: {e}")
        model_signature = None
        input_example_np = None
    
    # Log the best model
    mlflow.pytorch.log_model(
        pytorch_model=model, 
        artifact_path="two_tower_model",
        registered_model_name="Retail_TwoTower_Recommender",
        input_example=input_example_np,
        signature=model_signature
    )
    
    # Log scalers
    with open("user_scaler.pkl", "wb") as f:
        pickle.dump(user_scaler, f)
    with open("item_scaler.pkl", "wb") as f:
        pickle.dump(item_scaler, f)
    
    mlflow.log_artifact("user_scaler.pkl", "preprocessing")
    mlflow.log_artifact("item_scaler.pkl", "preprocessing")
    
    print(f"\n✅ Best model saved to MLflow (Run ID: {best_run.info.run_uuid})")

print("\n" + "=" * 70)

TRAINING WITH MULTIPLE RANDOM SEEDS

SEED 1/5: 42
--- Starting Two-Tower Model Pipeline (Seed: 42) ---
Reading data from PostgreSQL table: retail_feature_engg_done...
Read and converted 104,023 records to Pandas.
Final training samples: 79,783, Test samples: 19,946

--- Starting Training (In-Batch Negative Sampling) on cpu ---
Epoch 1/10, Loss (CrossEntropy): 5.0274
Epoch 2/10, Loss (CrossEntropy): 4.8525
Epoch 3/10, Loss (CrossEntropy): 4.8179
Epoch 4/10, Loss (CrossEntropy): 4.7939
Epoch 5/10, Loss (CrossEntropy): 4.7735
Epoch 6/10, Loss (CrossEntropy): 4.7604
Epoch 7/10, Loss (CrossEntropy): 4.7518
Epoch 8/10, Loss (CrossEntropy): 4.7461
Epoch 9/10, Loss (CrossEntropy): 4.7405
Epoch 10/10, Loss (CrossEntropy): 4.7316
--- Training Complete ---

📊 Results for Seed 42:
  HR@1:  0.3344
  HR@10: 0.8190
  MRR:   0.4960
  ✅ NEW BEST MODEL! (HR@10: 0.8190)
🏃 View run Two-Tower-Seed-42 at: https://dbc-9a4f847e-bb18.cloud.databricks.com/ml/experiments/1054156121569192/runs/8b8dbd0827ea44c3a60

  "inputs": {
    "user_num": [
      [
        -0.18218335509300232,
        -0.21573451161384583,
        -0.35019975900650024,
        -0.3498169183731079,
        -0.35586977005004883,
        -0.3592550456523895,
        -1.108251929283142,
        -0.1604270190000534,
        -0.14910228550434113,
        -0.21633760631084442,
        0.17039991915225983
      ]
    ],
    "user_cat": [
      [
        49783
      ]
    ],
    "items_num": [
      [
        -0.35160961747169495,
        -0.3513142466545105,
        -0.35710036754608154,
        -0.3465733230113983,
        -0.2786429226398468,
        0.2788151204586029,
        0.11060188710689545,
        1.049331545829773,
        1.055850863456726,
        1.0033655166625977,
        -1.2459255456924438
      ]
    ],
    "items_cat": [
      [
        15687,
        56,
        2456
      ]
    ]
  }
}. Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To ensure the i


✅ Best model saved to MLflow (Run ID: ecf69201fee541be812a46a973bbb1b7)
🏃 View run Two-Tower-BEST-Seed-456 at: https://dbc-9a4f847e-bb18.cloud.databricks.com/ml/experiments/1054156121569192/runs/ecf69201fee541be812a46a973bbb1b7
🧪 View experiment at: https://dbc-9a4f847e-bb18.cloud.databricks.com/ml/experiments/1054156121569192

