# Experiment 105: Extrapolation-Aware Conservative Predictions

## Goal
Implement extrapolation detection using solvent features and blend predictions toward training mean when extrapolating. This directly targets the CV-LB intercept problem.

## Key Insight
The CV-LB relationship is LB = 4.315 Ã— CV + 0.0525. The intercept (0.0525) represents structural extrapolation error. By detecting when we're predicting for solvents far from the training distribution and making more conservative predictions, we might reduce this intercept.

## Approach
1. Use CatBoost + XGBoost ensemble as base model (from ens-model kernel)
2. Compute distance to nearest training solvent using SOLVENT FEATURES ONLY
3. Blend toward training mean when extrapolating
4. Test different blend weights

In [1]:
import numpy as np
import pandas as pd
import torch
import sys
import os
import warnings
warnings.filterwarnings('ignore')

# Add data path
sys.path.append('/home/data/')

from utils import (
    INPUT_LABELS_FULL_SOLVENT, INPUT_LABELS_SINGLE_SOLVENT, 
    INPUT_LABELS_NUMERIC, INPUT_LABELS_SINGLE_FEATURES, 
    INPUT_LABELS_FULL_FEATURES, load_data, load_features, 
    generate_leave_one_out_splits, generate_leave_one_ramp_out_splits
)

print("Imports successful")

Imports successful


In [2]:
# Base classes
from abc import ABC, abstractmethod
from functools import reduce
from sklearn.neighbors import NearestNeighbors

torch.set_default_dtype(torch.double)

class SmilesFeaturizer(ABC):
    def __init__(self):
        raise NotImplementedError

    def featurize(X, Y):
        raise NotImplementedError

class BaseModel(ABC):
    def __init__(self):
        pass

    def train_model(self, X_train, y_train):
        raise NotImplementedError

    def predict(self):
        raise NotImplementedError

In [3]:
# Feature engineering functions from ens-model kernel
_SOLVENT_TABLE_CACHE = None

def feature_priority(name: str) -> int:
    if name.startswith("spange_"):
        return 5
    if name.startswith("acs_"):
        return 4
    if name.startswith("drfps_"):
        return 3
    if name.startswith("frag_"):
        return 2
    if name.startswith("smiles_"):
        return 1
    return 0

def filter_correlated_features(df, threshold=0.8):
    numeric_df = df.select_dtypes(include=[np.number])
    if numeric_df.shape[1] == 0:
        return df, []
    
    std = numeric_df.std(axis=0)
    constant_cols = std[std == 0].index.tolist()
    if constant_cols:
        numeric_df = numeric_df.drop(columns=constant_cols)
    
    corr = numeric_df.corr().abs()
    upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool)).fillna(0.0)
    
    cols = upper.columns.tolist()
    to_drop = set()
    
    high_corr_pairs = []
    for i, col_i in enumerate(cols):
        for j in range(i + 1, len(cols)):
            col_j = cols[j]
            cval = upper.iloc[i, j]
            if cval > threshold:
                high_corr_pairs.append((col_i, col_j, cval))
    
    for col_i, col_j, cval in high_corr_pairs:
        if col_i in to_drop or col_j in to_drop:
            continue
        p_i = feature_priority(col_i)
        p_j = feature_priority(col_j)
        if p_i > p_j:
            drop = col_j
        elif p_j > p_i:
            drop = col_i
        else:
            idx_i = df.columns.get_loc(col_i)
            idx_j = df.columns.get_loc(col_j)
            drop = col_i if idx_i > idx_j else col_j
        to_drop.add(drop)
    
    all_to_drop = list(set(constant_cols).union(to_drop))
    df_filtered = df.drop(columns=all_to_drop, errors="ignore")
    return df_filtered, all_to_drop

def add_numeric_features(X_numeric):
    X_num = X_numeric.copy()
    cols = set(X_num.columns)
    if {"Temperature", "Residence Time"} <= cols:
        X_num["Temperature"] = X_num["Temperature"] + 273.15
        T = X_num["Temperature"]
        rt = X_num["Residence Time"]
        X_num["T_x_RT"] = T * rt
        X_num["RT_log"] = np.log(rt + 1e-6)
        X_num["T_inv"] = 1 / T
        X_num["RT_scaled"] = rt / rt.mean()
    return X_num

def build_solvent_feature_table(threshold=0.90):
    global _SOLVENT_TABLE_CACHE
    if _SOLVENT_TABLE_CACHE is not None:
        return _SOLVENT_TABLE_CACHE
    
    print(">>> Building solvent feature table...")
    sources = ["spange_descriptors", "acs_pca_descriptors", "drfps_catechol", "fragprints", "smiles"]
    dfs = []
    
    for src in sources:
        df_src = load_features(src).copy()
        if "SOLVENT NAME" not in df_src.columns:
            df_src = df_src.reset_index().rename(columns={"index": "SOLVENT NAME"})
        
        if src in ["drfps_catechol", "fragprints"]:
            prefix = "drfps" if src == "drfps_catechol" else "frag"
            df_src = df_src.loc[:, (df_src != 0).any(axis=0)]
            df_src = df_src.loc[:, (df_src != 1).any(axis=0)]
            values = df_src.drop(columns={"SOLVENT NAME"})
            count = values.sum(axis=0).T
            drop_cols = count[count == 1].index
            df_src = df_src.drop(columns=drop_cols)
            cols_to_rename = [c for c in df_src.columns if c != "SOLVENT NAME"]
            df_src = df_src.rename(columns={c: f"{prefix}_{c}" for c in cols_to_rename})
        else:
            if src == "spange_descriptors":
                prefix = "spange"
            elif src == "acs_pca_descriptors":
                prefix = "acs"
            elif src == "smiles":
                prefix = "smiles"
            else:
                prefix = src
            cols_to_rename = [c for c in df_src.columns if c != "SOLVENT NAME"]
            df_src = df_src.rename(columns={c: f"{prefix}_{c}" for c in cols_to_rename})
        
        smiles_like = [c for c in df_src.columns if "SMILES" in c.upper()]
        df_src = df_src.drop(columns=smiles_like, errors="ignore")
        df_src = df_src.set_index("SOLVENT NAME")
        dfs.append(df_src)
    
    featurizer = reduce(lambda l, r: l.join(r, how="inner"), dfs)
    print(f"Combined feature table shape (before corr filter): {featurizer.shape}")
    
    featurizer_filtered, dropped_cols = filter_correlated_features(featurizer, threshold=threshold)
    print(f"Final solvent feature table shape: {featurizer_filtered.shape}")
    
    _SOLVENT_TABLE_CACHE = featurizer_filtered
    return featurizer_filtered

print("Feature engineering functions defined")

Feature engineering functions defined


In [None]:
# Featurizers
class PrecomputedFeaturizer(SmilesFeaturizer):
    """Featurizer for single-solvent experiments"""
    def __init__(self):
        self.featurizer = build_solvent_feature_table()
        dummy_num = pd.DataFrame([[0] * len(INPUT_LABELS_NUMERIC)], columns=INPUT_LABELS_NUMERIC)
        numeric_dim = add_numeric_features(dummy_num).shape[1]
        self.feats_dim = numeric_dim + self.featurizer.shape[1]

    def featurize(self, X):
        X_numeric = add_numeric_features(X[INPUT_LABELS_NUMERIC].copy())
        X_solvent = self.featurizer.loc[X["SOLVENT NAME"]]
        X_out = np.concatenate([X_numeric.values, X_solvent.values], axis=1)
        return torch.tensor(X_out, dtype=torch.double)
    
    def get_solvent_features(self, X):
        """Get only solvent features for extrapolation detection"""
        return self.featurizer.loc[X["SOLVENT NAME"]].values

class PrecomputedFeaturizerMixed(SmilesFeaturizer):
    """Featurizer for mixed-solvent experiments"""
    def __init__(self):
        self.featurizer = build_solvent_feature_table()
        dummy_num = pd.DataFrame([[0] * len(INPUT_LABELS_NUMERIC)], columns=INPUT_LABELS_NUMERIC)
        numeric_dim = add_numeric_features(dummy_num).shape[1]
        self.feats_dim = numeric_dim + self.featurizer.shape[1]

    def featurize(self, X):
        X_numeric = add_numeric_features(X[INPUT_LABELS_NUMERIC].copy())
        A = self.featurizer.loc[X["SOLVENT A NAME"]].values
        B = self.featurizer.loc[X["SOLVENT B NAME"]].values
        frac_B = X["SolventB%"].values.reshape(-1, 1)
        frac_A = 1 - frac_B
        mixed = A * frac_A + B * frac_B
        X_out = np.concatenate([X_numeric.values, mixed], axis=1)
        return torch.tensor(X_out, dtype=torch.double)
    
    def get_solvent_features(self, X):
        """Get mixed solvent features for extrapolation detection"""
        A = self.featurizer.loc[X["SOLVENT A NAME"]].values
        B = self.featurizer.loc[X["SOLVENT B NAME"]].values
        frac_B = X["SolventB%"].values.reshape(-1, 1)
        frac_A = 1 - frac_B
        return A * frac_A + B * frac_B

print("Featurizers defined")

In [None]:
# CatBoost Model
from catboost import CatBoostRegressor

class CatBoostModel(BaseModel):
    def __init__(self, data="single", verbose=False, random_state=42):
        self.data_mode = data
        self.verbose = verbose
        self.random_state = random_state
        
        if data == "single":
            self.smiles_featurizer = PrecomputedFeaturizer()
            self.cat_params = dict(
                random_seed=random_state, loss_function="MultiRMSE",
                depth=3, learning_rate=0.07, n_estimators=1050,
                l2_leaf_reg=3.5, bootstrap_type="Bayesian",
                bagging_temperature=0.225, grow_policy="SymmetricTree",
                rsm=0.75, verbose=verbose,
            )
        else:
            self.smiles_featurizer = PrecomputedFeaturizerMixed()
            self.cat_params = dict(
                random_seed=random_state, loss_function="MultiRMSE",
                depth=3, learning_rate=0.06, n_estimators=1100,
                l2_leaf_reg=2.5, bootstrap_type="Bayesian",
                bagging_temperature=0.25, grow_policy="SymmetricTree",
                rsm=0.75, verbose=verbose,
            )
        self.model = None
        self.n_targets = None

    def train_model(self, train_X, train_Y, device=None, verbose=False):
        X_tensor = self.smiles_featurizer.featurize(train_X)
        X_np = X_tensor.detach().cpu().numpy()
        Y_np = train_Y.values
        self.n_targets = Y_np.shape[1]
        self.model = CatBoostRegressor(**self.cat_params)
        self.model.fit(X_np, Y_np)

    def predict(self, X):
        X_tensor = self.smiles_featurizer.featurize(X)
        X_np = X_tensor.detach().cpu().numpy()
        out = self.model.predict(X_np)
        out = np.asarray(out)
        if out.ndim == 1:
            out = out.reshape(-1, 1)
        out = np.clip(out, a_min=0.0, a_max=None)
        if out.shape[1] > 1:
            totals = out.sum(axis=1, keepdims=True)
            divisor = np.maximum(totals, 1.0)
            out = out / divisor
        return torch.tensor(out, dtype=torch.double)

print("CatBoostModel defined")

In [None]:
# XGBoost Model
from xgboost import XGBRegressor

class XGBModel(BaseModel):
    def __init__(self, data="single", random_state=42, verbose=False):
        self.data_mode = data
        self.verbose = verbose
        self.random_state = random_state
        
        if data == "single":
            self.smiles_featurizer = PrecomputedFeaturizer()
            self.xgb_params = dict(
                random_state=random_state, objective="reg:squarederror",
                tree_method="hist", subsample=0.5, reg_lambda=0.6,
                reg_alpha=0.0, n_estimators=1000, min_child_weight=1,
                max_depth=4, max_delta_step=1, learning_rate=0.02,
                grow_policy="depthwise", gamma=0.0, colsample_bytree=0.3,
                colsample_bylevel=0.6,
            )
        else:
            self.smiles_featurizer = PrecomputedFeaturizerMixed()
            self.xgb_params = dict(
                random_state=random_state, objective="reg:squarederror",
                tree_method="approx", subsample=0.5, reg_lambda=0.6,
                reg_alpha=0.0, n_estimators=1000, min_child_weight=1,
                max_depth=4, max_delta_step=1, learning_rate=0.02,
                grow_policy="lossguide", gamma=0.0, colsample_bytree=0.3,
                colsample_bylevel=0.6,
            )
        self.models = None
        self.n_targets = None

    def train_model(self, train_X, train_Y, device=None, verbose=False):
        X_tensor = self.smiles_featurizer.featurize(train_X)
        X_np = X_tensor.detach().cpu().numpy()
        Y_np = train_Y.values
        self.n_targets = Y_np.shape[1]
        self.models = []
        for t in range(self.n_targets):
            model_t = XGBRegressor(**self.xgb_params)
            model_t.fit(X_np, Y_np[:, t])
            self.models.append(model_t)

    def predict(self, X):
        X_tensor = self.smiles_featurizer.featurize(X)
        X_np = X_tensor.detach().cpu().numpy()
        preds_list = [m.predict(X_np) for m in self.models]
        out = np.column_stack(preds_list)
        out = np.clip(out, a_min=0.0, a_max=None)
        if out.shape[1] > 1:
            totals = out.sum(axis=1, keepdims=True)
            divisor = np.maximum(totals, 1.0)
            out = out / divisor
        return torch.tensor(out, dtype=torch.double)

print("XGBModel defined")

In [None]:
# Ensemble Model (base model)
class EnsembleModel(BaseModel):
    """Weighted ensemble of CatBoostModel and XGBModel"""
    def __init__(self, data="single", verbose=False):
        self.data_mode = data
        self.verbose = verbose
        
        if data == "single":
            self.weights = {"catboost": 0.65, "xgb": 0.35}
        else:
            self.weights = {"catboost": 0.60, "xgb": 0.40}
        
        self.catboost_model = CatBoostModel(data=data, verbose=verbose)
        self.xgb_model = XGBModel(data=data, verbose=verbose)

    def train_model(self, train_X, train_Y, device=None, verbose=False):
        self.catboost_model.train_model(train_X, train_Y, device, verbose)
        self.xgb_model.train_model(train_X, train_Y, device, verbose)

    def predict(self, X):
        cat_pred = self.catboost_model.predict(X).numpy()
        xgb_pred = self.xgb_model.predict(X).numpy()
        
        w_cat = self.weights["catboost"]
        w_xgb = self.weights["xgb"]
        
        out = w_cat * cat_pred + w_xgb * xgb_pred
        out = np.clip(out, a_min=0.0, a_max=None)
        
        if out.shape[1] > 1:
            totals = out.sum(axis=1, keepdims=True)
            divisor = np.maximum(totals, 1.0)
            out = out / divisor
        
        return torch.tensor(out, dtype=torch.double)

print("EnsembleModel defined")

In [None]:
# Extrapolation-Aware Model - THE KEY INNOVATION
class ExtrapolationAwareModel(BaseModel):
    """
    Wraps a base model and blends predictions toward training mean
    when extrapolating to solvents far from the training distribution.
    
    Key insight: Use SOLVENT FEATURES ONLY for extrapolation detection,
    not the full feature set. This detects when we're predicting for
    a solvent that's structurally different from training solvents.
    """
    def __init__(self, data="single", blend_weight=0.3, n_neighbors=5, verbose=False):
        self.data_mode = data
        self.blend_weight = blend_weight
        self.n_neighbors = n_neighbors
        self.verbose = verbose
        
        # Base model
        self.base_model = EnsembleModel(data=data, verbose=verbose)
        
        # Featurizer for solvent features
        if data == "single":
            self.featurizer = PrecomputedFeaturizer()
        else:
            self.featurizer = PrecomputedFeaturizerMixed()
        
        # Will be set during training
        self.nn_model = None
        self.distance_threshold = None
        self.train_mean = None
        self.train_solvent_features = None

    def train_model(self, train_X, train_Y, device=None, verbose=False):
        # Train base model
        self.base_model.train_model(train_X, train_Y, device, verbose)
        
        # Store training mean for blending
        self.train_mean = train_Y.values.mean(axis=0)
        
        # Get solvent features for training data
        self.train_solvent_features = self.featurizer.get_solvent_features(train_X)
        
        # Fit nearest neighbor model for extrapolation detection
        self.nn_model = NearestNeighbors(n_neighbors=min(self.n_neighbors, len(train_X)))
        self.nn_model.fit(self.train_solvent_features)
        
        # Compute training distances for threshold calibration
        train_distances, _ = self.nn_model.kneighbors(self.train_solvent_features)
        self.distance_threshold = np.percentile(train_distances.mean(axis=1), 90)
        
        if self.verbose or verbose:
            print(f"[ExtrapolationAwareModel] Distance threshold: {self.distance_threshold:.4f}")
            print(f"[ExtrapolationAwareModel] Train mean: {self.train_mean}")

    def predict(self, X):
        # Get base predictions
        base_preds = self.base_model.predict(X).numpy()
        
        # Get solvent features for test data
        test_solvent_features = self.featurizer.get_solvent_features(X)
        
        # Compute extrapolation score
        distances, _ = self.nn_model.kneighbors(test_solvent_features)
        extrapolation_score = distances.mean(axis=1) / (self.distance_threshold + 1e-8)
        
        # Blend toward training mean when extrapolating
        # weight = 0 when extrapolation_score <= 1 (within training distribution)
        # weight increases as extrapolation_score increases
        weight = np.clip((extrapolation_score - 1) * self.blend_weight, 0, 0.5)
        weight = weight.reshape(-1, 1)
        
        # Blend predictions
        final_preds = (1 - weight) * base_preds + weight * self.train_mean
        
        # Ensure valid predictions
        final_preds = np.clip(final_preds, a_min=0.0, a_max=None)
        if final_preds.shape[1] > 1:
            totals = final_preds.sum(axis=1, keepdims=True)
            divisor = np.maximum(totals, 1.0)
            final_preds = final_preds / divisor
        
        return torch.tensor(final_preds, dtype=torch.double)

print("ExtrapolationAwareModel defined")

In [None]:
# Test different blend weights
import tqdm

def evaluate_model(model_class, data_mode, blend_weight=None):
    """Evaluate a model using leave-one-out CV"""
    if data_mode == "single":
        X, Y = load_data("single_solvent")
        split_generator = generate_leave_one_out_splits(X, Y)
    else:
        X, Y = load_data("full")
        split_generator = generate_leave_one_ramp_out_splits(X, Y)
    
    all_predictions = []
    
    for fold_idx, split in tqdm.tqdm(enumerate(split_generator), desc=f"{data_mode}"):
        (train_X, train_Y), (test_X, test_Y) = split
        
        if blend_weight is not None:
            model = model_class(data=data_mode, blend_weight=blend_weight)
        else:
            model = model_class(data=data_mode)
        
        model.train_model(train_X, train_Y)
        predictions = model.predict(test_X)
        
        predictions_df = pd.DataFrame(
            predictions.numpy(),
            columns=test_Y.columns,
            index=test_Y.index
        )
        all_predictions.append(predictions_df)
    
    all_predictions = pd.concat(all_predictions)
    
    if data_mode == "single":
        Y_true = Y.loc[all_predictions.index]
    else:
        Y_true = Y.loc[all_predictions.index]
    
    mse = ((all_predictions - Y_true) ** 2).mean().mean()
    return mse, all_predictions

print("Evaluation function defined")

In [None]:
# First, evaluate baseline EnsembleModel (no extrapolation awareness)
print("=" * 60)
print("Evaluating baseline EnsembleModel (no extrapolation awareness)")
print("=" * 60)

baseline_single_mse, _ = evaluate_model(EnsembleModel, "single")
print(f"\nBaseline Single Solvent MSE: {baseline_single_mse:.6f}")

baseline_full_mse, _ = evaluate_model(EnsembleModel, "full")
print(f"Baseline Full Data MSE: {baseline_full_mse:.6f}")

baseline_combined = (baseline_single_mse * 656 + baseline_full_mse * 1227) / (656 + 1227)
print(f"\nBaseline Combined MSE: {baseline_combined:.6f}")

In [None]:
# Test different blend weights for ExtrapolationAwareModel
blend_weights = [0.1, 0.2, 0.3, 0.4, 0.5]
results = []

print("\n" + "=" * 60)
print("Testing ExtrapolationAwareModel with different blend weights")
print("=" * 60)

for bw in blend_weights:
    print(f"\n--- Blend weight: {bw} ---")
    
    single_mse, _ = evaluate_model(ExtrapolationAwareModel, "single", blend_weight=bw)
    print(f"Single Solvent MSE: {single_mse:.6f}")
    
    full_mse, _ = evaluate_model(ExtrapolationAwareModel, "full", blend_weight=bw)
    print(f"Full Data MSE: {full_mse:.6f}")
    
    combined = (single_mse * 656 + full_mse * 1227) / (656 + 1227)
    print(f"Combined MSE: {combined:.6f}")
    
    results.append({
        'blend_weight': bw,
        'single_mse': single_mse,
        'full_mse': full_mse,
        'combined_mse': combined
    })

results_df = pd.DataFrame(results)
print("\n" + "=" * 60)
print("Summary of Results")
print("=" * 60)
print(results_df.to_string(index=False))

In [None]:
# Find best blend weight
best_idx = results_df['combined_mse'].idxmin()
best_bw = results_df.loc[best_idx, 'blend_weight']
best_mse = results_df.loc[best_idx, 'combined_mse']

print(f"\nBest blend weight: {best_bw}")
print(f"Best combined MSE: {best_mse:.6f}")
print(f"Baseline combined MSE: {baseline_combined:.6f}")
print(f"Improvement: {(baseline_combined - best_mse) / baseline_combined * 100:.2f}%")

# Compare to best known CV (0.0081 from exp_030)
print(f"\nComparison to best known CV (0.0081):")
print(f"  Current best: {best_mse:.6f}")
print(f"  Difference: {(best_mse - 0.0081) / 0.0081 * 100:.2f}%")

In [None]:
# Save metrics
import json

metrics = {
    'baseline_single_mse': float(baseline_single_mse),
    'baseline_full_mse': float(baseline_full_mse),
    'baseline_combined_mse': float(baseline_combined),
    'best_blend_weight': float(best_bw),
    'best_single_mse': float(results_df.loc[best_idx, 'single_mse']),
    'best_full_mse': float(results_df.loc[best_idx, 'full_mse']),
    'best_combined_mse': float(best_mse),
    'all_results': results
}

with open('/home/code/experiments/105_extrapolation_aware/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

print("Metrics saved")

## Submission Cells

Using the best blend weight found above.

In [None]:
########### DO NOT CHANGE ANYTHING IN THIS CELL OTHER THAN THE MODEL #################
########### THIS MUST BE THE THIRD LAST CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

import tqdm

X, Y = load_data("single_solvent")

split_generator = generate_leave_one_out_splits(X, Y)
all_predictions = []

for fold_idx, split in tqdm.tqdm(enumerate(split_generator)):
    (train_X, train_Y), (test_X, test_Y) = split

    model = ExtrapolationAwareModel(data='single', blend_weight=best_bw) # CHANGE THIS LINE ONLY
    model.train_model(train_X, train_Y)
    predictions = model.predict(test_X)

    predictions_df = pd.DataFrame(
        predictions.numpy(),
        columns=test_Y.columns,
        index=test_Y.index
    )
    all_predictions.append(predictions_df)

submission_single_solvent = pd.concat(all_predictions)

In [None]:
########### DO NOT CHANGE ANYTHING IN THIS CELL OTHER THAN THE MODEL #################
########### THIS MUST BE THE SECOND LAST CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

X, Y = load_data("full")

split_generator = generate_leave_one_ramp_out_splits(X, Y)
all_predictions = []

for fold_idx, split in tqdm.tqdm(enumerate(split_generator)):
    (train_X, train_Y), (test_X, test_Y) = split

    model = ExtrapolationAwareModel(data='full', blend_weight=best_bw) # CHANGE THIS LINE ONLY
    model.train_model(train_X, train_Y)
    predictions = model.predict(test_X)

    predictions_df = pd.DataFrame(
        predictions.numpy(),
        columns=test_Y.columns,
        index=test_Y.index
    )
    all_predictions.append(predictions_df)

submission_full_data = pd.concat(all_predictions)

In [None]:
########### DO NOT CHANGE ANYTHING IN THIS CELL #################
########### THIS MUST BE THE FINAL CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################

submission = pd.concat([submission_single_solvent, submission_full_data])
submission = submission.reset_index()
submission.index.name = "id"
submission.to_csv("/home/code/experiments/105_extrapolation_aware/submission.csv", index=True)

# Also copy to main submission folder
import shutil
shutil.copy("/home/code/experiments/105_extrapolation_aware/submission.csv", "/home/submission/submission.csv")

print(f"Submission shape: {submission.shape}")
print(f"Submission saved to /home/submission/submission.csv")

########### DO NOT CHANGE ANYTHING IN THIS CELL #################
########### THIS MUST BE THE FINAL CELL IN YOUR NOTEBOOK FOR A VALID SUBMISSION #################