# Experiment 055: Minimal Submission (Mean Predictor)

**Goal:** Create a MINIMAL submission to debug the evaluation error.

**Approach:** Use the EXACT official template code, only changing the model to predict the mean.

**Purpose:** If this works, we know the submission format is correct. If it fails, there's something else wrong.

In [1]:
# EXACT imports from official template
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from abc import ABC, abstractmethod
import tqdm

torch.set_default_dtype(torch.double)

# Data path for local execution
DATA_PATH = "/home/data"

print("Imports complete.")

Imports complete.


In [2]:
# EXACT constants from official template
INPUT_LABELS_FULL_SOLVENT = [
    "Residence Time",
    "Temperature",
    "SOLVENT A NAME",
    "SOLVENT B NAME",
    "SolventB%",
]

INPUT_LABELS_SINGLE_SOLVENT = [
    "Residence Time",
    "Temperature",
    "SOLVENT NAME",
]

INPUT_LABELS_NUMERIC = [
    "Residence Time",
    "Temperature",
]

INPUT_LABELS_SINGLE_FEATURES = [
    "SOLVENT NAME",
]

INPUT_LABELS_FULL_FEATURES = [
    "SOLVENT A NAME",
    "SOLVENT B NAME",
    "SolventB%"
]

TARGET_LABELS = [
    "Product 2",
    "Product 3",
    "SM",
]

print("Constants defined.")

Constants defined.


In [3]:
# EXACT data loading functions from official template (adapted for local paths)
def load_data(name="full"):
    assert name in ["full", "single_solvent"]
    if name == "full":
        df = pd.read_csv(f'{DATA_PATH}/catechol_full_data_yields.csv')
        X = df[INPUT_LABELS_FULL_SOLVENT]
    else:
        df = pd.read_csv(f'{DATA_PATH}/catechol_single_solvent_yields.csv')
        X = df[INPUT_LABELS_SINGLE_SOLVENT]
    Y = df[TARGET_LABELS]
    return X, Y

def load_features(name="spange_descriptors"):
    assert name in ["spange_descriptors", "acs_pca_descriptors", "drfps_catechol", "fragprints", "smiles"]
    features = pd.read_csv(f'{DATA_PATH}/{name}_lookup.csv', index_col=0)
    return features

# EXACT CV functions from official template
def generate_leave_one_out_splits(X, Y):
    """Generate all leave-one-out splits across the solvents."""
    all_solvents = X["SOLVENT NAME"].unique()
    for solvent_name in sorted(all_solvents):
        train_idcs_mask = X["SOLVENT NAME"] != solvent_name
        yield (
            (X[train_idcs_mask], Y[train_idcs_mask]),
            (X[~train_idcs_mask], Y[~train_idcs_mask]),
        )

def generate_leave_one_ramp_out_splits(X, Y):
    """Generate all leave-one-out splits across the solvent ramps."""
    all_solvent_ramps = X[["SOLVENT A NAME", "SOLVENT B NAME"]].drop_duplicates()
    all_solvent_ramps = all_solvent_ramps.sort_values(by=["SOLVENT A NAME", "SOLVENT B NAME"])
    for _, solvent_pair in all_solvent_ramps.iterrows():
        train_idcs_mask = (X[["SOLVENT A NAME", "SOLVENT B NAME"]] != solvent_pair).any(axis=1)
        yield (
            (X[train_idcs_mask], Y[train_idcs_mask]),
            (X[~train_idcs_mask], Y[~train_idcs_mask]),
        )

print("Data loading and CV functions defined.")

Data loading and CV functions defined.


In [4]:
# EXACT base classes from official template
class SmilesFeaturizer(ABC):
    def __init__(self):
        raise NotImplementedError

    def featurize(self, X):
        raise NotImplementedError

class BaseModel(ABC):
    def __init__(self):
        pass

    def train_model(self, X_train, y_train):
        raise NotImplementedError

    def predict(self):
        raise NotImplementedError

print("Base classes defined.")

Base classes defined.


In [5]:
# MINIMAL MODEL: Just predict the mean
class MeanPredictor(BaseModel):
    """Simplest possible model - just predicts the training mean."""
    
    def __init__(self, data='single'):
        self.data = data
        self.mean = None
    
    def train_model(self, train_X, train_Y, device=None, verbose=False):
        # Just store the mean of the training targets
        self.mean = train_Y.mean().values
    
    def predict(self, X):
        # Return the mean for all samples
        n_samples = len(X)
        predictions = np.tile(self.mean, (n_samples, 1))
        # Clip to [0, 1]
        predictions = np.clip(predictions, 0, 1)
        return torch.tensor(predictions, dtype=torch.double)

print("MeanPredictor model defined.")

MeanPredictor model defined.


In [6]:
# Quick test
print("Testing MeanPredictor...")
X, Y = load_data("single_solvent")
print(f"Single solvent data: X={X.shape}, Y={Y.shape}")

# Test one fold
split_gen = generate_leave_one_out_splits(X, Y)
(train_X, train_Y), (test_X, test_Y) = next(split_gen)

model = MeanPredictor()
model.train_model(train_X, train_Y)
preds = model.predict(test_X)

print(f"Predictions shape: {preds.shape}")
print(f"Predictions: {preds[0]}")
print(f"Training mean: {model.mean}")
print("Test passed!")

Testing MeanPredictor...
Single solvent data: X=(656, 3), Y=(656, 3)
Predictions shape: torch.Size([37, 3])
Predictions: tensor([0.1398, 0.1137, 0.5432])
Training mean: [0.13978304 0.11369471 0.54321009]
Test passed!


In [7]:
########### DO NOT CHANGE ANYTHING IN THIS CELL OTHER THAN THE MODEL #################
########### THIS MUST BE THE THIRD LAST CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

import tqdm

X, Y = load_data("single_solvent")

split_generator = generate_leave_one_out_splits(X, Y)
all_predictions = []

for fold_idx, split in tqdm.tqdm(enumerate(split_generator)):
    (train_X, train_Y), (test_X, test_Y) = split

    model = MeanPredictor() # CHANGE THIS LINE ONLY
    model.train_model(train_X, train_Y)

    predictions = model.predict(test_X)  # Shape: [N, 3]

    # Move to CPU and convert to numpy
    predictions_np = predictions.detach().cpu().numpy()

    # Add metadata and flatten to long format
    for row_idx, row in enumerate(predictions_np):
        all_predictions.append({
            "task": 0,
            "fold": fold_idx,
            "row": row_idx,
            "target_1": row[0],
            "target_2": row[1],
            "target_3": row[2]
        })

# Save final submission
submission_single_solvent = pd.DataFrame(all_predictions)

########### DO NOT CHANGE ANYTHING IN THIS CELL OTHER THAN THE MODEL #################
########### THIS MUST BE THE THIRD LAST CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

print(f"Single solvent predictions: {len(submission_single_solvent)}")
print(f"Unique folds: {submission_single_solvent['fold'].nunique()}")

0it [00:00, ?it/s]

24it [00:00, 1571.24it/s]

Single solvent predictions: 656
Unique folds: 24





In [8]:
########### DO NOT CHANGE ANYTHING IN THIS CELL OTHER THAN THE MODEL #################
########### THIS MUST BE THE SECOND LAST CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

X, Y = load_data("full")

split_generator = generate_leave_one_ramp_out_splits(X, Y)
all_predictions = []

for fold_idx, split in tqdm.tqdm(enumerate(split_generator)):
    (train_X, train_Y), (test_X, test_Y) = split

    model = MeanPredictor(data = 'full') # CHANGE THIS LINE ONLY
    model.train_model(train_X, train_Y)

    predictions = model.predict(test_X)  # Shape: [N, 3]

    # Move to CPU and convert to numpy
    predictions_np = predictions.detach().cpu().numpy()

    # Add metadata and flatten to long format
    for row_idx, row in enumerate(predictions_np):
        all_predictions.append({
            "task": 1,
            "fold": fold_idx,
            "row": row_idx,
            "target_1": row[0],
            "target_2": row[1],
            "target_3": row[2]
        })

# Save final submission
submission_full_data = pd.DataFrame(all_predictions)

########### DO NOT CHANGE ANYTHING IN THIS CELL OTHER THAN THE MODEL #################
########### THIS MUST BE THE SECOND LAST CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

print(f"Full data predictions: {len(submission_full_data)}")
print(f"Unique folds: {submission_full_data['fold'].nunique()}")

0it [00:00, ?it/s]

13it [00:00, 674.39it/s]

Full data predictions: 1227
Unique folds: 13





In [9]:
########### DO NOT CHANGE ANYTHING IN THIS CELL #################
########### THIS MUST BE THE FINAL CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

submission = pd.concat([submission_single_solvent, submission_full_data])
submission = submission.reset_index()
submission.index.name = "id"
submission.to_csv("/home/submission/submission.csv", index=True)

########### DO NOT CHANGE ANYTHING IN THIS CELL #################
########### THIS MUST BE THE FINAL CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

print(f"Submission saved to /home/submission/submission.csv")
print(f"Total rows: {len(submission)}")

Submission saved to /home/submission/submission.csv
Total rows: 1883


In [10]:
# THOROUGH VERIFICATION
print("\n" + "="*60)
print("SUBMISSION VERIFICATION")
print("="*60)

df = pd.read_csv('/home/submission/submission.csv')

print(f"\nColumns: {df.columns.tolist()}")
print(f"Total rows: {len(df)}")

print(f"\nTask 0 (single solvent):")
task0 = df[df['task'] == 0]
print(f"  Rows: {len(task0)}")
print(f"  Folds: {task0['fold'].nunique()}")
print(f"  Fold range: {task0['fold'].min()} to {task0['fold'].max()}")
print(f"  Fold values: {sorted(task0['fold'].unique())}")

print(f"\nTask 1 (full data):")
task1 = df[df['task'] == 1]
print(f"  Rows: {len(task1)}")
print(f"  Folds: {task1['fold'].nunique()}")
print(f"  Fold range: {task1['fold'].min()} to {task1['fold'].max()}")
print(f"  Fold values: {sorted(task1['fold'].unique())}")

print(f"\nTarget statistics:")
for col in ['target_1', 'target_2', 'target_3']:
    print(f"  {col}: min={df[col].min():.6f}, max={df[col].max():.6f}, mean={df[col].mean():.6f}")
    print(f"    Values > 1: {(df[col] > 1).sum()}")
    print(f"    Values < 0: {(df[col] < 0).sum()}")
    print(f"    NaN values: {df[col].isna().sum()}")

print(f"\nData types:")
print(df.dtypes)

print(f"\nFirst 5 rows:")
print(df.head())

print(f"\nLast 5 rows:")
print(df.tail())


SUBMISSION VERIFICATION

Columns: ['id', 'index', 'task', 'fold', 'row', 'target_1', 'target_2', 'target_3']
Total rows: 1883

Task 0 (single solvent):
  Rows: 656
  Folds: 24
  Fold range: 0 to 23
  Fold values: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]

Task 1 (full data):
  Rows: 1227
  Folds: 13
  Fold range: 0 to 12
  Fold values: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

Target statistics:
  target_1: min=0.139783, max=0.175118, mean=0.158796
    Values > 1: 0
    Values < 0: 0
    NaN values: 0
  target_2: min=0.113695, max=0.154595, mean=0.136127
    Values > 1: 0
    Values < 0: 0
    NaN values: 0
  target_3: min=0.478333, max=0.543210, mean=0.506416
    Values > 1: 0
    Values < 0: 0
    NaN values: 0

Data types:
id            int64
index         int64
task          int64
fold          int64
row           int64
target_1    float64
target_2    float64
target_3    float64
dtype: object

First 5 rows:
   id  index  task  fold  r

In [11]:
# Calculate CV for logging
print("\n" + "="*60)
print("CV CALCULATION")
print("="*60)

# Single solvent CV
X, Y = load_data("single_solvent")
fold_mses = []

for fold_idx, ((train_X, train_Y), (test_X, test_Y)) in enumerate(generate_leave_one_out_splits(X, Y)):
    model = MeanPredictor()
    model.train_model(train_X, train_Y)
    preds = model.predict(test_X).numpy()
    mse = np.mean((preds - test_Y.values) ** 2)
    fold_mses.append(mse)

single_cv = np.mean(fold_mses)
print(f"Single solvent CV MSE: {single_cv:.6f}")

# Full data CV
X, Y = load_data("full")
full_fold_mses = []

for fold_idx, ((train_X, train_Y), (test_X, test_Y)) in enumerate(generate_leave_one_ramp_out_splits(X, Y)):
    model = MeanPredictor(data='full')
    model.train_model(train_X, train_Y)
    preds = model.predict(test_X).numpy()
    mse = np.mean((preds - test_Y.values) ** 2)
    full_fold_mses.append(mse)

full_cv = np.mean(full_fold_mses)
print(f"Full data CV MSE: {full_cv:.6f}")

print(f"\nFINAL CV FOR LOGGING: {single_cv:.6f}")


CV CALCULATION
Single solvent CV MSE: 0.054253
Full data CV MSE: 0.056957

FINAL CV FOR LOGGING: 0.054253


In [12]:
# Summary
print("\n" + "="*60)
print("EXPERIMENT 055: MINIMAL SUBMISSION SUMMARY")
print("="*60)

print("\nGOAL: Debug submission format by using simplest possible model")
print("\nMODEL: MeanPredictor - just predicts the training mean")

print(f"\nSUBMISSION FORMAT:")
print(f"  Total rows: {len(df)}")
print(f"  Task 0: {len(task0)} rows, {task0['fold'].nunique()} folds (0-{task0['fold'].max()})")
print(f"  Task 1: {len(task1)} rows, {task1['fold'].nunique()} folds (0-{task1['fold'].max()})")
print(f"  All targets in [0, 1]: YES")
print(f"  No NaN values: YES")

print(f"\nCV SCORES:")
print(f"  Single solvent: {single_cv:.6f}")
print(f"  Full data: {full_cv:.6f}")

print("\nPURPOSE:")
print("  If this submission works, the format is correct.")
print("  If it fails, there's something else wrong with the evaluation.")
print("  This is the SIMPLEST possible submission to debug the issue.")


EXPERIMENT 055: MINIMAL SUBMISSION SUMMARY

GOAL: Debug submission format by using simplest possible model

MODEL: MeanPredictor - just predicts the training mean

SUBMISSION FORMAT:
  Total rows: 1883
  Task 0: 656 rows, 24 folds (0-23)
  Task 1: 1227 rows, 13 folds (0-12)
  All targets in [0, 1]: YES
  No NaN values: YES

CV SCORES:
  Single solvent: 0.054253
  Full data: 0.056957

PURPOSE:
  If this submission works, the format is correct.
  If it fails, there's something else wrong with the evaluation.
  This is the SIMPLEST possible submission to debug the issue.


In [13]:
# Check the actual implementation of generate_leave_one_ramp_out_splits
import inspect
print(inspect.getsource(generate_leave_one_ramp_out_splits))

def generate_leave_one_ramp_out_splits(X, Y):
    """Generate all leave-one-out splits across the solvent ramps."""
    all_solvent_ramps = X[["SOLVENT A NAME", "SOLVENT B NAME"]].drop_duplicates()
    all_solvent_ramps = all_solvent_ramps.sort_values(by=["SOLVENT A NAME", "SOLVENT B NAME"])
    for _, solvent_pair in all_solvent_ramps.iterrows():
        train_idcs_mask = (X[["SOLVENT A NAME", "SOLVENT B NAME"]] != solvent_pair).any(axis=1)
        yield (
            (X[train_idcs_mask], Y[train_idcs_mask]),
            (X[~train_idcs_mask], Y[~train_idcs_mask]),
        )



In [14]:
# Compare my implementation with the official one
def generate_leave_one_ramp_out_splits_official(X, Y):
    """Official implementation from utils.py"""
    all_solvent_ramps = X[["SOLVENT A NAME", "SOLVENT B NAME"]].drop_duplicates()
    all_solvent_ramps = all_solvent_ramps.sort_values(by=["SOLVENT A NAME", "SOLVENT B NAME"])
    for _, solvent_pair in all_solvent_ramps.iterrows():
        train_idcs_mask = (X[["SOLVENT A NAME", "SOLVENT B NAME"]] != solvent_pair).all(axis=1)
        yield (
            (X[train_idcs_mask], Y[train_idcs_mask]),
            (X[~train_idcs_mask], Y[~train_idcs_mask]),
        )

# Test both implementations
X, Y = load_data("full")

print("My implementation (.any()):")
my_folds = list(generate_leave_one_ramp_out_splits(X, Y))
print(f"  Number of folds: {len(my_folds)}")
for i, ((train_X, train_Y), (test_X, test_Y)) in enumerate(my_folds):
    print(f"  Fold {i}: train={len(train_X)}, test={len(test_X)}")

print("\nOfficial implementation (.all()):")
official_folds = list(generate_leave_one_ramp_out_splits_official(X, Y))
print(f"  Number of folds: {len(official_folds)}")
for i, ((train_X, train_Y), (test_X, test_Y)) in enumerate(official_folds):
    print(f"  Fold {i}: train={len(train_X)}, test={len(test_X)}")

My implementation (.any()):
  Number of folds: 13
  Fold 0: train=1103, test=124
  Fold 1: train=1102, test=125
  Fold 2: train=1103, test=124
  Fold 3: train=1102, test=125
  Fold 4: train=1123, test=104
  Fold 5: train=1117, test=110
  Fold 6: train=1191, test=36
  Fold 7: train=1100, test=127
  Fold 8: train=1193, test=34
  Fold 9: train=1105, test=122
  Fold 10: train=1192, test=35
  Fold 11: train=1102, test=125
  Fold 12: train=1191, test=36

Official implementation (.all()):
  Number of folds: 13
  Fold 0: train=1103, test=124
  Fold 1: train=1102, test=125
  Fold 2: train=1103, test=124
  Fold 3: train=1102, test=125
  Fold 4: train=1123, test=104
  Fold 5: train=1117, test=110
  Fold 6: train=1191, test=36
  Fold 7: train=1100, test=127
  Fold 8: train=1193, test=34
  Fold 9: train=1105, test=122
  Fold 10: train=1192, test=35
  Fold 11: train=1102, test=125
  Fold 12: train=1191, test=36
