# Experiment 113: Domain-Adversarial Training

## Goal
Learn solvent-invariant features that generalize better to unseen solvents.

## Key Insight
- ALL approaches fall on the SAME CV-LB line: LB = 4.09 × CV + 0.0546
- The intercept (0.0546) > target (0.0347) - target is UNREACHABLE with current approaches
- We need to CHANGE THE RELATIONSHIP, not improve CV

## Domain-Adversarial Strategy
1. Feature extractor: Shared MLP that extracts features
2. Predictor: Predicts yields from features
3. Domain discriminator: Predicts which solvent the sample came from
4. Training: Feature extractor is trained to FOOL the discriminator (gradient reversal)

## Why This Might Work
- Forces the model to learn features that don't depend on solvent identity
- These features should generalize better to unseen solvents
- Could reduce the extrapolation error (intercept)

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import sys
import os
import warnings
warnings.filterwarnings('ignore')

# Add data path
sys.path.append('/home/data/')

from utils import (
    INPUT_LABELS_FULL_SOLVENT, INPUT_LABELS_SINGLE_SOLVENT, 
    INPUT_LABELS_NUMERIC, INPUT_LABELS_SINGLE_FEATURES, 
    INPUT_LABELS_FULL_FEATURES,
    generate_leave_one_out_splits, generate_leave_one_ramp_out_splits
)

# Override load functions to use local paths
DATA_PATH = '/home/data/'
TARGET_LABELS = ['Product 2', 'Product 3', 'SM']

def load_data(name="full"):
    assert name in ["full", "single_solvent"]
    if name == "full":
        df = pd.read_csv(f'{DATA_PATH}catechol_full_data_yields.csv')
        X = df[INPUT_LABELS_FULL_SOLVENT]
    else:
        df = pd.read_csv(f'{DATA_PATH}catechol_single_solvent_yields.csv')
        X = df[INPUT_LABELS_SINGLE_SOLVENT]
    Y = df[TARGET_LABELS]
    return X, Y

def load_features(name="spange_descriptors"):
    assert name in ["spange_descriptors", "acs_pca_descriptors", "drfps_catechol", "fragprints", "smiles"]
    features = pd.read_csv(f'{DATA_PATH}{name}_lookup.csv', index_col=0)
    return features

print("Imports successful")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

Imports successful
PyTorch version: 2.2.0+cu118
CUDA available: True


In [2]:
# Base classes and feature engineering
from abc import ABC, abstractmethod
from functools import reduce

torch.set_default_dtype(torch.double)

class SmilesFeaturizer(ABC):
    def __init__(self):
        raise NotImplementedError
    def featurize(X, Y):
        raise NotImplementedError

class BaseModel(ABC):
    def __init__(self):
        pass
    def train_model(self, X_train, y_train):
        raise NotImplementedError
    def predict(self):
        raise NotImplementedError

_SOLVENT_TABLE_CACHE = None

def feature_priority(name: str) -> int:
    if name.startswith("spange_"): return 5
    if name.startswith("acs_"): return 4
    if name.startswith("drfps_"): return 3
    if name.startswith("frag_"): return 2
    if name.startswith("smiles_"): return 1
    return 0

def filter_correlated_features(df, threshold=0.8):
    numeric_df = df.select_dtypes(include=[np.number])
    if numeric_df.shape[1] == 0:
        return df, []
    std = numeric_df.std(axis=0)
    constant_cols = std[std == 0].index.tolist()
    if constant_cols:
        numeric_df = numeric_df.drop(columns=constant_cols)
    corr = numeric_df.corr().abs()
    upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool)).fillna(0.0)
    cols = upper.columns.tolist()
    to_drop = set()
    high_corr_pairs = []
    for i, col_i in enumerate(cols):
        for j in range(i + 1, len(cols)):
            col_j = cols[j]
            cval = upper.iloc[i, j]
            if cval > threshold:
                high_corr_pairs.append((col_i, col_j, cval))
    for col_i, col_j, cval in high_corr_pairs:
        if col_i in to_drop or col_j in to_drop:
            continue
        p_i = feature_priority(col_i)
        p_j = feature_priority(col_j)
        if p_i > p_j:
            drop = col_j
        elif p_j > p_i:
            drop = col_i
        else:
            idx_i = df.columns.get_loc(col_i)
            idx_j = df.columns.get_loc(col_j)
            drop = col_i if idx_i > idx_j else col_j
        to_drop.add(drop)
    all_to_drop = list(set(constant_cols).union(to_drop))
    df_filtered = df.drop(columns=all_to_drop, errors="ignore")
    return df_filtered, all_to_drop

def add_numeric_features(X_numeric):
    X_num = X_numeric.copy()
    cols = set(X_num.columns)
    if {"Temperature", "Residence Time"} <= cols:
        X_num["Temperature"] = X_num["Temperature"] + 273.15
        T = X_num["Temperature"]
        rt = X_num["Residence Time"]
        X_num["T_x_RT"] = T * rt
        X_num["RT_log"] = np.log(rt + 1e-6)
        X_num["T_inv"] = 1 / T
        X_num["RT_scaled"] = rt / rt.mean()
    return X_num

print("Feature engineering functions defined")

Feature engineering functions defined


In [3]:
def build_solvent_feature_table(threshold=0.90):
    global _SOLVENT_TABLE_CACHE
    if _SOLVENT_TABLE_CACHE is not None:
        return _SOLVENT_TABLE_CACHE
    print(">>> Building solvent feature table...")
    sources = ["spange_descriptors", "acs_pca_descriptors", "drfps_catechol", "fragprints"]
    dfs = []
    for src in sources:
        df_src = load_features(src).copy()
        if "SOLVENT NAME" not in df_src.columns:
            df_src = df_src.reset_index().rename(columns={"index": "SOLVENT NAME"})
        if src in ["drfps_catechol", "fragprints"]:
            prefix = "drfps" if src == "drfps_catechol" else "frag"
            df_src = df_src.loc[:, (df_src != 0).any(axis=0)]
            df_src = df_src.loc[:, (df_src != 1).any(axis=0)]
            values = df_src.drop(columns={"SOLVENT NAME"})
            count = values.sum(axis=0).T
            drop_cols = count[count == 1].index
            df_src = df_src.drop(columns=drop_cols)
            cols_to_rename = [c for c in df_src.columns if c != "SOLVENT NAME"]
            df_src = df_src.rename(columns={c: f"{prefix}_{c}" for c in cols_to_rename})
        else:
            if src == "spange_descriptors":
                prefix = "spange"
            elif src == "acs_pca_descriptors":
                prefix = "acs"
            else:
                prefix = src
            cols_to_rename = [c for c in df_src.columns if c != "SOLVENT NAME"]
            df_src = df_src.rename(columns={c: f"{prefix}_{c}" for c in cols_to_rename})
        dfs.append(df_src)
    combined = reduce(lambda left, right: pd.merge(left, right, on="SOLVENT NAME", how="outer"), dfs)
    combined = combined.set_index("SOLVENT NAME")
    print(f"Combined feature table shape (before corr filter): {combined.shape}")
    combined, _ = filter_correlated_features(combined, threshold=threshold)
    print(f"Final solvent feature table shape: {combined.shape}")
    _SOLVENT_TABLE_CACHE = combined
    return combined

print("build_solvent_feature_table defined")

build_solvent_feature_table defined


In [4]:
# Featurizers with WEIGHTED AVERAGE for mixtures
class PrecomputedFeaturizer:
    def __init__(self):
        self.featurizer = build_solvent_feature_table()
        dummy_num = pd.DataFrame([[0] * len(INPUT_LABELS_NUMERIC)], columns=INPUT_LABELS_NUMERIC)
        numeric_dim = add_numeric_features(dummy_num).shape[1]
        self.feats_dim = numeric_dim + self.featurizer.shape[1]

    def featurize(self, X):
        X_numeric = add_numeric_features(X[INPUT_LABELS_NUMERIC].copy())
        X_solvent = self.featurizer.loc[X["SOLVENT NAME"]]
        X_out = np.concatenate([X_numeric.values, X_solvent.values], axis=1)
        return torch.tensor(X_out, dtype=torch.double)
    
    def get_solvent_ids(self, X):
        """Get solvent IDs for domain classification"""
        solvent_names = X["SOLVENT NAME"].values
        unique_solvents = sorted(self.featurizer.index.tolist())
        solvent_to_id = {s: i for i, s in enumerate(unique_solvents)}
        return torch.tensor([solvent_to_id.get(s, 0) for s in solvent_names], dtype=torch.long)

class PrecomputedFeaturizerMixed:
    def __init__(self):
        self.featurizer = build_solvent_feature_table()
        dummy_num = pd.DataFrame([[0] * len(INPUT_LABELS_NUMERIC)], columns=INPUT_LABELS_NUMERIC)
        numeric_dim = add_numeric_features(dummy_num).shape[1]
        self.feats_dim = numeric_dim + self.featurizer.shape[1]

    def featurize(self, X):
        X_numeric = add_numeric_features(X[INPUT_LABELS_NUMERIC].copy())
        A = self.featurizer.loc[X["SOLVENT A NAME"]].values
        B = self.featurizer.loc[X["SOLVENT B NAME"]].values
        frac_B = X["SolventB%"].values.reshape(-1, 1)
        frac_A = 1 - frac_B
        mixed = A * frac_A + B * frac_B  # WEIGHTED AVERAGE
        X_out = np.concatenate([X_numeric.values, mixed], axis=1)
        return torch.tensor(X_out, dtype=torch.double)
    
    def get_solvent_ids(self, X):
        """Get solvent IDs for domain classification (use primary solvent A)"""
        solvent_names = X["SOLVENT A NAME"].values
        unique_solvents = sorted(self.featurizer.index.tolist())
        solvent_to_id = {s: i for i, s in enumerate(unique_solvents)}
        return torch.tensor([solvent_to_id.get(s, 0) for s in solvent_names], dtype=torch.long)

print("Featurizers defined")

Featurizers defined


In [5]:
# Gradient Reversal Layer for domain-adversarial training
class GradientReversalFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha
        return x.view_as(x)
    
    @staticmethod
    def backward(ctx, grad_output):
        return grad_output.neg() * ctx.alpha, None

class GradientReversalLayer(nn.Module):
    def __init__(self, alpha=1.0):
        super().__init__()
        self.alpha = alpha
    
    def forward(self, x):
        return GradientReversalFunction.apply(x, self.alpha)

print("Gradient Reversal Layer defined")

Gradient Reversal Layer defined


In [6]:
# Domain-Adversarial Neural Network
class DANNModel(nn.Module):
    def __init__(self, input_dim, n_solvents, hidden_dim=64, alpha=1.0):
        super().__init__()
        
        # Feature extractor
        self.feature_extractor = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, hidden_dim),
            nn.ReLU(),
        )
        
        # Yield predictor
        self.predictor = nn.Sequential(
            nn.Linear(hidden_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 3),
            nn.Sigmoid()  # Yields are in [0, 1]
        )
        
        # Domain discriminator with gradient reversal
        self.gradient_reversal = GradientReversalLayer(alpha)
        self.discriminator = nn.Sequential(
            nn.Linear(hidden_dim, 32),
            nn.ReLU(),
            nn.Linear(32, n_solvents)
        )
    
    def forward(self, x):
        features = self.feature_extractor(x)
        predictions = self.predictor(features)
        
        # Domain classification with gradient reversal
        reversed_features = self.gradient_reversal(features)
        domain_logits = self.discriminator(reversed_features)
        
        return predictions, domain_logits
    
    def predict_only(self, x):
        features = self.feature_extractor(x)
        predictions = self.predictor(features)
        return predictions

print("DANNModel defined")

DANNModel defined


In [7]:
# Domain-Adversarial Model wrapper
class DomainAdversarialModel(BaseModel):
    """
    Uses domain-adversarial training to learn solvent-invariant features.
    
    The key idea is to train a feature extractor that:
    1. Produces features that are good for predicting yields
    2. Produces features that are BAD for predicting which solvent the sample came from
    
    This forces the model to learn features that don't depend on solvent identity,
    which should generalize better to unseen solvents.
    """
    def __init__(self, data="single", alpha=1.0, n_epochs=100, lr=0.001, verbose=False):
        self.data_mode = data
        self.alpha = alpha  # Strength of domain adversarial loss
        self.n_epochs = n_epochs
        self.lr = lr
        self.verbose = verbose
        
        if data == "single":
            self.featurizer = PrecomputedFeaturizer()
        else:
            self.featurizer = PrecomputedFeaturizerMixed()
        
        self.model = None
        self.n_solvents = 26  # Number of unique solvents
    
    def train_model(self, train_X, train_Y, device=None, verbose=False):
        # Featurize
        X_tensor = self.featurizer.featurize(train_X)
        Y_tensor = torch.tensor(train_Y.values, dtype=torch.double)
        domain_ids = self.featurizer.get_solvent_ids(train_X)
        
        input_dim = X_tensor.shape[1]
        
        # Create model
        self.model = DANNModel(input_dim, self.n_solvents, alpha=self.alpha)
        self.model.double()
        
        # Optimizers
        optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        
        # Loss functions
        yield_criterion = nn.MSELoss()
        domain_criterion = nn.CrossEntropyLoss()
        
        # Training loop
        self.model.train()
        for epoch in range(self.n_epochs):
            optimizer.zero_grad()
            
            # Forward pass
            yield_pred, domain_logits = self.model(X_tensor)
            
            # Compute losses
            yield_loss = yield_criterion(yield_pred, Y_tensor)
            domain_loss = domain_criterion(domain_logits, domain_ids)
            
            # Total loss (domain loss is already reversed by gradient reversal layer)
            total_loss = yield_loss + domain_loss
            
            # Backward pass
            total_loss.backward()
            optimizer.step()
            
            if self.verbose and (epoch + 1) % 20 == 0:
                print(f"Epoch {epoch+1}/{self.n_epochs}, Yield Loss: {yield_loss.item():.6f}, Domain Loss: {domain_loss.item():.6f}")
        
        self.model.eval()
    
    def predict(self, X):
        X_tensor = self.featurizer.featurize(X)
        
        with torch.no_grad():
            predictions = self.model.predict_only(X_tensor)
        
        # Clip and normalize
        out = predictions.numpy()
        out = np.clip(out, a_min=0.0, a_max=None)
        if out.shape[1] > 1:
            totals = out.sum(axis=1, keepdims=True)
            divisor = np.maximum(totals, 1.0)
            out = out / divisor
        
        return torch.tensor(out, dtype=torch.double)

print("DomainAdversarialModel defined")

DomainAdversarialModel defined


In [8]:
# Quick CV evaluation
import tqdm

def evaluate_cv(model_class, **kwargs):
    """Evaluate using leave-one-out CV"""
    # Single solvent
    X, Y = load_data("single_solvent")
    split_generator = generate_leave_one_out_splits(X, Y)
    all_preds_single = []
    all_true_single = []
    
    for fold_idx, split in tqdm.tqdm(enumerate(split_generator), desc="single"):
        (train_X, train_Y), (test_X, test_Y) = split
        model = model_class(data='single', **kwargs)
        model.train_model(train_X, train_Y)
        predictions = model.predict(test_X)
        all_preds_single.append(predictions.numpy())
        all_true_single.append(test_Y.values)
    
    preds_single = np.vstack(all_preds_single)
    true_single = np.vstack(all_true_single)
    mse_single = np.mean((preds_single - true_single) ** 2)
    print(f"Single Solvent MSE: {mse_single:.6f}")
    
    # Full data
    X, Y = load_data("full")
    split_generator = generate_leave_one_ramp_out_splits(X, Y)
    all_preds_full = []
    all_true_full = []
    
    for fold_idx, split in tqdm.tqdm(enumerate(split_generator), desc="full"):
        (train_X, train_Y), (test_X, test_Y) = split
        model = model_class(data='full', **kwargs)
        model.train_model(train_X, train_Y)
        predictions = model.predict(test_X)
        all_preds_full.append(predictions.numpy())
        all_true_full.append(test_Y.values)
    
    preds_full = np.vstack(all_preds_full)
    true_full = np.vstack(all_true_full)
    mse_full = np.mean((preds_full - true_full) ** 2)
    print(f"Full Data MSE: {mse_full:.6f}")
    
    # Combined
    n_single = preds_single.shape[0] * preds_single.shape[1]
    n_full = preds_full.shape[0] * preds_full.shape[1]
    combined_mse = (mse_single * n_single + mse_full * n_full) / (n_single + n_full)
    print(f"\nCombined MSE (CV score): {combined_mse:.6f}")
    
    return combined_mse, mse_single, mse_full

print("Evaluation function defined")

Evaluation function defined


In [9]:
# Test DomainAdversarialModel with different alpha values
test_configs = [
    {'alpha': 0.5, 'n_epochs': 100, 'lr': 0.001},
    {'alpha': 1.0, 'n_epochs': 100, 'lr': 0.001},
    {'alpha': 2.0, 'n_epochs': 100, 'lr': 0.001},
]

results = []

print("="*60)
print("Testing DomainAdversarialModel with different alpha values")
print("="*60)

for config in test_configs:
    print(f"\n--- Config: {config} ---")
    cv, single_mse, full_mse = evaluate_cv(DomainAdversarialModel, **config)
    expected_lb = 4.09 * cv + 0.0546
    print(f"Expected LB from line: {expected_lb:.4f}")
    results.append({
        **config,
        'cv': cv,
        'single_mse': single_mse,
        'full_mse': full_mse,
        'expected_lb': expected_lb
    })

results_df = pd.DataFrame(results)
print("\n" + "="*60)
print("Summary of Results")
print("="*60)
print(results_df.to_string(index=False))

Testing DomainAdversarialModel with different alpha values

--- Config: {'alpha': 0.5, 'n_epochs': 100, 'lr': 0.001} ---


single: 0it [00:00, ?it/s]

>>> Building solvent feature table...
Combined feature table shape (before corr filter): (26, 113)
Final solvent feature table shape: (26, 66)


single: 1it [00:00,  1.11it/s]

single: 2it [00:01,  1.98it/s]

single: 3it [00:01,  2.69it/s]

single: 4it [00:01,  3.24it/s]

single: 5it [00:01,  3.60it/s]

single: 6it [00:01,  3.91it/s]

single: 7it [00:02,  4.11it/s]

single: 8it [00:02,  4.23it/s]

single: 9it [00:02,  4.38it/s]

single: 10it [00:02,  4.48it/s]

single: 11it [00:03,  4.51it/s]

single: 12it [00:03,  4.60it/s]

single: 13it [00:03,  4.61it/s]

single: 14it [00:03,  4.66it/s]

single: 15it [00:03,  4.70it/s]

single: 16it [00:04,  4.66it/s]

single: 17it [00:04,  4.65it/s]

single: 18it [00:04,  4.68it/s]

single: 19it [00:04,  4.73it/s]

single: 20it [00:04,  4.68it/s]

single: 21it [00:05,  4.69it/s]

single: 22it [00:05,  4.68it/s]

single: 23it [00:05,  4.68it/s]

single: 24it [00:05,  4.51it/s]

single: 24it [00:05,  4.09it/s]




Single Solvent MSE: 0.153842


full: 0it [00:00, ?it/s]

full: 1it [00:00,  3.40it/s]

full: 2it [00:00,  3.12it/s]

full: 3it [00:00,  3.24it/s]

full: 4it [00:01,  3.35it/s]

full: 5it [00:01,  3.34it/s]

full: 6it [00:01,  3.43it/s]

full: 7it [00:02,  3.47it/s]

full: 8it [00:02,  3.61it/s]

full: 9it [00:02,  3.68it/s]

full: 10it [00:02,  3.76it/s]

full: 11it [00:03,  3.73it/s]

full: 12it [00:03,  3.70it/s]

full: 13it [00:03,  3.65it/s]

full: 13it [00:03,  3.54it/s]




Full Data MSE: 0.196189

Combined MSE (CV score): 0.181436
Expected LB from line: 0.7967

--- Config: {'alpha': 1.0, 'n_epochs': 100, 'lr': 0.001} ---


single: 0it [00:00, ?it/s]

single: 1it [00:00,  4.85it/s]

single: 2it [00:00,  4.81it/s]

single: 3it [00:00,  4.83it/s]

single: 4it [00:00,  4.80it/s]

single: 5it [00:01,  4.78it/s]

single: 6it [00:01,  4.74it/s]

single: 7it [00:01,  4.75it/s]

single: 8it [00:01,  4.83it/s]

single: 9it [00:01,  4.44it/s]

single: 10it [00:02,  4.47it/s]

single: 11it [00:02,  4.53it/s]

single: 12it [00:02,  4.53it/s]

single: 13it [00:02,  4.64it/s]

single: 14it [00:02,  4.67it/s]

single: 15it [00:03,  4.71it/s]

single: 16it [00:03,  4.72it/s]

single: 17it [00:03,  4.74it/s]

single: 18it [00:03,  4.74it/s]

single: 19it [00:04,  4.75it/s]

single: 20it [00:04,  4.73it/s]

single: 21it [00:04,  4.66it/s]

single: 22it [00:04,  4.69it/s]

single: 23it [00:04,  4.75it/s]

single: 24it [00:05,  4.75it/s]

single: 24it [00:05,  4.70it/s]




Single Solvent MSE: 0.156925


full: 0it [00:00, ?it/s]

full: 1it [00:00,  3.62it/s]

full: 2it [00:00,  3.72it/s]

full: 3it [00:00,  3.74it/s]

full: 4it [00:01,  3.74it/s]

full: 5it [00:01,  3.83it/s]

full: 6it [00:01,  3.81it/s]

full: 7it [00:01,  3.75it/s]

full: 8it [00:02,  3.69it/s]

full: 9it [00:02,  3.75it/s]

full: 10it [00:02,  3.74it/s]

full: 11it [00:02,  3.76it/s]

full: 12it [00:03,  3.73it/s]

full: 13it [00:03,  3.69it/s]

full: 13it [00:03,  3.73it/s]




Full Data MSE: 0.174328

Combined MSE (CV score): 0.168265
Expected LB from line: 0.7428

--- Config: {'alpha': 2.0, 'n_epochs': 100, 'lr': 0.001} ---


single: 0it [00:00, ?it/s]

single: 1it [00:00,  4.97it/s]

single: 2it [00:00,  4.87it/s]

single: 3it [00:00,  4.83it/s]

single: 4it [00:00,  4.86it/s]

single: 5it [00:01,  4.83it/s]

single: 6it [00:01,  4.80it/s]

single: 7it [00:01,  4.79it/s]

single: 8it [00:01,  4.81it/s]

single: 9it [00:01,  4.74it/s]

single: 10it [00:02,  4.72it/s]

single: 11it [00:02,  4.71it/s]

single: 12it [00:02,  4.71it/s]

single: 13it [00:02,  4.71it/s]

single: 14it [00:02,  4.71it/s]

single: 15it [00:03,  4.71it/s]

single: 16it [00:03,  4.63it/s]

single: 17it [00:03,  4.44it/s]

single: 18it [00:03,  4.51it/s]

single: 19it [00:04,  4.59it/s]

single: 20it [00:04,  4.65it/s]

single: 21it [00:04,  4.54it/s]

single: 22it [00:04,  4.55it/s]

single: 23it [00:04,  4.63it/s]

single: 24it [00:05,  4.67it/s]

single: 24it [00:05,  4.68it/s]




Single Solvent MSE: 0.146408


full: 0it [00:00, ?it/s]

full: 1it [00:00,  3.97it/s]

full: 2it [00:00,  3.94it/s]

full: 3it [00:00,  3.84it/s]

full: 4it [00:01,  3.91it/s]

full: 5it [00:01,  3.87it/s]

full: 6it [00:01,  3.85it/s]

full: 7it [00:01,  3.88it/s]

full: 8it [00:02,  3.85it/s]

full: 9it [00:02,  3.86it/s]

full: 10it [00:02,  3.81it/s]

full: 11it [00:02,  3.78it/s]

full: 12it [00:03,  3.82it/s]

full: 13it [00:03,  3.75it/s]

full: 13it [00:03,  3.83it/s]

Full Data MSE: 0.188014

Combined MSE (CV score): 0.173519
Expected LB from line: 0.7643

Summary of Results
 alpha  n_epochs    lr       cv  single_mse  full_mse  expected_lb
   0.5       100 0.001 0.181436    0.153842  0.196189     0.796674
   1.0       100 0.001 0.168265    0.156925  0.174328     0.742805
   2.0       100 0.001 0.173519    0.146408  0.188014     0.764294





In [None]:
# Choose the best configuration
best_idx = results_df['cv'].idxmin()
best_config = results_df.loc[best_idx]

print(f"\nBest configuration:")
print(f"  alpha: {best_config['alpha']}")
print(f"  CV: {best_config['cv']:.6f}")
print(f"  Expected LB from line: {best_config['expected_lb']:.4f}")

best_alpha = best_config['alpha']
best_n_epochs = best_config['n_epochs']
best_lr = best_config['lr']

In [None]:
# Save metrics
import json

metrics = {
    'cv_score': float(best_config['cv']),
    'mse_single': float(best_config['single_mse']),
    'mse_full': float(best_config['full_mse']),
    'alpha': float(best_alpha),
    'n_epochs': int(best_n_epochs),
    'lr': float(best_lr),
    'all_results': results,
    'notes': 'DomainAdversarialModel - learns solvent-invariant features using gradient reversal.'
}

with open('/home/code/experiments/113_domain_adversarial/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("Metrics saved")

## Submission Cells (CORRECT FORMAT)

In [None]:
########### DO NOT CHANGE ANYTHING IN THIS CELL OTHER THAN THE MODEL #################
########### THIS MUST BE THE THIRD LAST CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

import tqdm

X, Y = load_data("single_solvent")

split_generator = generate_leave_one_out_splits(X, Y)
all_predictions = []

for fold_idx, split in tqdm.tqdm(enumerate(split_generator)):
    (train_X, train_Y), (test_X, test_Y) = split

    model = DomainAdversarialModel(data='single', alpha=best_alpha, n_epochs=best_n_epochs, lr=best_lr)  # CHANGE THIS LINE ONLY
    model.train_model(train_X, train_Y)
    predictions = model.predict(test_X)

    # Move to CPU and convert to numpy
    predictions_np = predictions.detach().cpu().numpy()

    # Add metadata and flatten to long format
    for row_idx, row in enumerate(predictions_np):
        all_predictions.append({
            "task": 0,
            "fold": fold_idx,
            "row": row_idx,
            "target_1": row[0],
            "target_2": row[1],
            "target_3": row[2]
        })

# Save final submission
submission_single_solvent = pd.DataFrame(all_predictions)

########### DO NOT CHANGE ANYTHING IN THIS CELL OTHER THAN THE MODEL #################
########### THIS MUST BE THE THIRD LAST CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

In [None]:
########### DO NOT CHANGE ANYTHING IN THIS CELL OTHER THAN THE MODEL #################
########### THIS MUST BE THE SECOND LAST CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

X, Y = load_data("full")

split_generator = generate_leave_one_ramp_out_splits(X, Y)
all_predictions = []

for fold_idx, split in tqdm.tqdm(enumerate(split_generator)):
    (train_X, train_Y), (test_X, test_Y) = split

    model = DomainAdversarialModel(data='full', alpha=best_alpha, n_epochs=best_n_epochs, lr=best_lr)  # CHANGE THIS LINE ONLY
    model.train_model(train_X, train_Y)
    predictions = model.predict(test_X)

    # Move to CPU and convert to numpy
    predictions_np = predictions.detach().cpu().numpy()

    # Add metadata and flatten to long format
    for row_idx, row in enumerate(predictions_np):
        all_predictions.append({
            "task": 1,
            "fold": fold_idx,
            "row": row_idx,
            "target_1": row[0],
            "target_2": row[1],
            "target_3": row[2]
        })

# Save final submission
submission_full_data = pd.DataFrame(all_predictions)

########### DO NOT CHANGE ANYTHING IN THIS CELL OTHER THAN THE MODEL #################
########### THIS MUST BE THE SECOND LAST CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

In [None]:
########### DO NOT CHANGE ANYTHING IN THIS CELL #################
########### THIS MUST BE THE FINAL CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

submission = pd.concat([submission_single_solvent, submission_full_data])
submission = submission.reset_index(drop=True)
submission.index.name = "id"
submission.to_csv("/home/code/experiments/113_domain_adversarial/submission.csv", index=True)

# Also copy to main submission folder
import shutil
shutil.copy("/home/code/experiments/113_domain_adversarial/submission.csv", "/home/submission/submission.csv")

print(f"Submission shape: {submission.shape}")
print(f"Submission columns: {submission.columns.tolist()}")
print(f"\nFirst 5 rows:")
print(submission.head())
print(f"\nLast 5 rows:")
print(submission.tail())

# Read back and verify format
sub_check = pd.read_csv("/home/submission/submission.csv")
print(f"\nRead back columns: {sub_check.columns.tolist()}")
expected_cols = ['id', 'task', 'fold', 'row', 'target_1', 'target_2', 'target_3']
assert list(sub_check.columns) == expected_cols, f"Wrong columns: {list(sub_check.columns)}"
print(f"\n✅ FORMAT VERIFIED: {expected_cols}")
print(f"\n✅ MODEL CLASS: DomainAdversarialModel (matches CV computation)")

########### DO NOT CHANGE ANYTHING IN THIS CELL #################
########### THIS MUST BE THE FINAL CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################