# Loop 4 Analysis: Diagnosing Mixed Solvent Performance Degradation

## Key Issue
Exp 004 showed:
- Single solvent improved: 0.0748 → 0.0659 (12% better)
- Full data degraded: 0.0836 → 0.0895 (7% worse)
- Net result: Combined score got slightly worse

Need to understand WHY mixed solvent performance degraded and how to fix it.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, HistGradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

DATA_PATH = '/home/data'
TARGET_LABELS = ["Product 2", "Product 3", "SM"]

def load_data(name="full"):
    if name == "full":
        df = pd.read_csv(f'{DATA_PATH}/catechol_full_data_yields.csv')
        X = df[["Residence Time", "Temperature", "SOLVENT A NAME", "SOLVENT B NAME", "SolventB%"]]
    else:
        df = pd.read_csv(f'{DATA_PATH}/catechol_single_solvent_yields.csv')
        X = df[["Residence Time", "Temperature", "SOLVENT NAME"]]
    Y = df[TARGET_LABELS]
    return X, Y

def load_features(name="spange_descriptors"):
    return pd.read_csv(f'{DATA_PATH}/{name}_lookup.csv', index_col=0)

SPANGE_DF = load_features('spange_descriptors')
ACS_PCA_DF = load_features('acs_pca_descriptors')

print(f"Spange: {SPANGE_DF.shape}, ACS_PCA: {ACS_PCA_DF.shape}")

Spange: (26, 13), ACS_PCA: (24, 5)


In [2]:
# Load full data and analyze the ramp structure
X_full, Y_full = load_data('full')

print("Full data shape:", X_full.shape)
print("\nSolvent ramps:")
ramps = X_full[['SOLVENT A NAME', 'SOLVENT B NAME']].drop_duplicates()
for i, row in ramps.iterrows():
    mask = (X_full['SOLVENT A NAME'] == row['SOLVENT A NAME']) & (X_full['SOLVENT B NAME'] == row['SOLVENT B NAME'])
    count = mask.sum()
    print(f"  {row['SOLVENT A NAME']} + {row['SOLVENT B NAME']}: {count} samples")

Full data shape: (1227, 5)

Solvent ramps:
  Methanol + Ethylene Glycol [1,2-Ethanediol]: 122 samples
  1,1,1,3,3,3-Hexafluoropropan-2-ol + 2-Methyltetrahydrofuran [2-MeTHF]: 124 samples
  Cyclohexane + IPA [Propan-2-ol]: 104 samples
  Water.Acetonitrile + Acetonitrile: 125 samples
  Acetonitrile + Acetonitrile.Acetic Acid: 125 samples
  2-Methyltetrahydrofuran [2-MeTHF] + Diethyl Ether [Ether]: 124 samples
  2,2,2-Trifluoroethanol + Water.2,2,2-Trifluoroethanol: 125 samples
  DMA [N,N-Dimethylacetamide] + Decanol: 110 samples
  Ethanol + THF [Tetrahydrofuran]: 127 samples
  Dihydrolevoglucosenone (Cyrene) + Ethyl Acetate: 36 samples
  MTBE [tert-Butylmethylether] + Butanone [MEK]: 34 samples
  tert-Butanol [2-Methylpropan-2-ol] + Dimethyl Carbonate: 36 samples
  Methyl Propionate + Ethyl Lactate: 35 samples


In [3]:
# Analyze SolventB% distribution
print("\nSolventB% distribution:")
print(X_full['SolventB%'].describe())
print("\nUnique values:", sorted(X_full['SolventB%'].unique()))


SolventB% distribution:
count    1227.000000
mean        0.435865
std         0.400336
min         0.000000
25%         0.000000
50%         0.330153
75%         0.858481
max         1.000000
Name: SolventB%, dtype: float64

Unique values: [0.0, 0.0007551958286629, 0.0011470643892773, 0.0012558138971659, 0.0013800461289313, 0.0014146983940115, 0.0014670453971028, 0.0039301499938112, 0.0044415304154707, 0.0047847161836571, 0.0048333739278409, 0.0048543852203656, 0.0049421164792773, 0.0049705000996266, 0.0050663407785638, 0.0051009908260428, 0.0051057828566375, 0.0305111311583698, 0.0305596743906471, 0.0306118950907354, 0.0306464636956026, 0.0307703956646607, 0.0308579201538992, 0.0312697983243124, 0.0318331981174409, 0.0342892903349568, 0.0343238567383758, 0.0344477807960034, 0.0346047999227302, 0.0353148740466059, 0.0374454720860986, 0.0520676588272898, 0.0620019500597866, 0.0649327278138796, 0.0694426424939573, 0.0755446161518067, 0.0781187713675148, 0.0784615188362507, 0.07861871811

In [4]:
# Test different approaches for mixed solvents
# Hypothesis: The per-target approach may be overfitting on augmented data

def generate_leave_one_ramp_out_splits(X, Y):
    ramps = X[["SOLVENT A NAME", "SOLVENT B NAME"]].drop_duplicates()
    for _, row in ramps.iterrows():
        mask = ~((X["SOLVENT A NAME"] == row["SOLVENT A NAME"]) & (X["SOLVENT B NAME"] == row["SOLVENT B NAME"]))
        yield (X[mask], Y[mask]), (X[~mask], Y[~mask])

def build_features_mixed(X, feature_df, flip=False):
    rt = X['Residence Time'].values.reshape(-1, 1)
    temp = X['Temperature'].values.reshape(-1, 1)
    pct = X['SolventB%'].values.reshape(-1, 1)
    
    # Arrhenius features
    temp_k = temp + 273.15
    inv_temp = 1000.0 / temp_k
    log_time = np.log(rt + 1e-6)
    interaction = inv_temp * log_time
    
    # Solvent features
    A = feature_df.loc[X['SOLVENT A NAME']].values
    B = feature_df.loc[X['SOLVENT B NAME']].values
    
    if flip:
        solvent_feats = B * (1 - pct) + A * pct
    else:
        solvent_feats = A * (1 - pct) + B * pct
    
    return np.hstack([rt, temp, inv_temp, log_time, interaction, pct, solvent_feats])

# Test 1: Simple RF without TTA
print("Test 1: Simple RF (no TTA) on full data")
errors_rf_no_tta = []
for (train_X, train_Y), (test_X, test_Y) in generate_leave_one_ramp_out_splits(X_full, Y_full):
    X_train = build_features_mixed(train_X, SPANGE_DF)
    X_test = build_features_mixed(test_X, SPANGE_DF)
    
    scaler = StandardScaler()
    X_train_sc = scaler.fit_transform(X_train)
    X_test_sc = scaler.transform(X_test)
    
    model = RandomForestRegressor(n_estimators=200, max_depth=8, min_samples_leaf=5, random_state=42, n_jobs=-1)
    model.fit(X_train_sc, train_Y.values)
    preds = model.predict(X_test_sc)
    preds = np.clip(preds, 0, 1)
    
    mae = np.mean(np.abs(preds - test_Y.values))
    errors_rf_no_tta.append(mae)

print(f"RF (no TTA) MAE: {np.mean(errors_rf_no_tta):.4f} +/- {np.std(errors_rf_no_tta):.4f}")

Test 1: Simple RF (no TTA) on full data


RF (no TTA) MAE: 0.0716 +/- 0.0227


In [5]:
# Test 2: Simple RF WITH TTA (training augmentation + test-time averaging)
print("\nTest 2: Simple RF with TTA on full data")
errors_rf_tta = []
for (train_X, train_Y), (test_X, test_Y) in generate_leave_one_ramp_out_splits(X_full, Y_full):
    # Training with augmentation
    X_train_orig = build_features_mixed(train_X, SPANGE_DF, flip=False)
    X_train_flip = build_features_mixed(train_X, SPANGE_DF, flip=True)
    X_train = np.vstack([X_train_orig, X_train_flip])
    y_train = np.vstack([train_Y.values, train_Y.values])
    
    scaler = StandardScaler()
    X_train_sc = scaler.fit_transform(X_train)
    
    model = RandomForestRegressor(n_estimators=200, max_depth=8, min_samples_leaf=5, random_state=42, n_jobs=-1)
    model.fit(X_train_sc, y_train)
    
    # Test with TTA
    X_test_orig = build_features_mixed(test_X, SPANGE_DF, flip=False)
    X_test_flip = build_features_mixed(test_X, SPANGE_DF, flip=True)
    
    preds_orig = model.predict(scaler.transform(X_test_orig))
    preds_flip = model.predict(scaler.transform(X_test_flip))
    preds = (preds_orig + preds_flip) / 2
    preds = np.clip(preds, 0, 1)
    
    mae = np.mean(np.abs(preds - test_Y.values))
    errors_rf_tta.append(mae)

print(f"RF (with TTA) MAE: {np.mean(errors_rf_tta):.4f} +/- {np.std(errors_rf_tta):.4f}")


Test 2: Simple RF with TTA on full data


RF (with TTA) MAE: 0.0932 +/- 0.0338


In [6]:
# Test 3: ExtraTrees (which worked well for single solvent)
print("\nTest 3: ExtraTrees (no TTA) on full data")
errors_etr_no_tta = []
for (train_X, train_Y), (test_X, test_Y) in generate_leave_one_ramp_out_splits(X_full, Y_full):
    X_train = build_features_mixed(train_X, SPANGE_DF)
    X_test = build_features_mixed(test_X, SPANGE_DF)
    
    scaler = StandardScaler()
    X_train_sc = scaler.fit_transform(X_train)
    X_test_sc = scaler.transform(X_test)
    
    model = ExtraTreesRegressor(n_estimators=300, max_depth=10, min_samples_leaf=2, random_state=42, n_jobs=-1)
    model.fit(X_train_sc, train_Y.values)
    preds = model.predict(X_test_sc)
    preds = np.clip(preds, 0, 1)
    
    mae = np.mean(np.abs(preds - test_Y.values))
    errors_etr_no_tta.append(mae)

print(f"ExtraTrees (no TTA) MAE: {np.mean(errors_etr_no_tta):.4f} +/- {np.std(errors_etr_no_tta):.4f}")


Test 3: ExtraTrees (no TTA) on full data


ExtraTrees (no TTA) MAE: 0.0640 +/- 0.0213


In [7]:
# Test 4: HistGradientBoosting (simpler, less prone to overfitting)
print("\nTest 4: HistGradientBoosting on full data")
errors_hgb = []
for (train_X, train_Y), (test_X, test_Y) in generate_leave_one_ramp_out_splits(X_full, Y_full):
    X_train = build_features_mixed(train_X, SPANGE_DF)
    X_test = build_features_mixed(test_X, SPANGE_DF)
    
    scaler = StandardScaler()
    X_train_sc = scaler.fit_transform(X_train)
    X_test_sc = scaler.transform(X_test)
    
    preds_all = []
    for i in range(3):  # 3 targets
        model = HistGradientBoostingRegressor(max_depth=5, max_iter=500, learning_rate=0.05, random_state=42)
        model.fit(X_train_sc, train_Y.values[:, i])
        preds_all.append(model.predict(X_test_sc))
    
    preds = np.column_stack(preds_all)
    preds = np.clip(preds, 0, 1)
    
    mae = np.mean(np.abs(preds - test_Y.values))
    errors_hgb.append(mae)

print(f"HistGradientBoosting MAE: {np.mean(errors_hgb):.4f} +/- {np.std(errors_hgb):.4f}")


Test 4: HistGradientBoosting on full data


HistGradientBoosting MAE: 0.0732 +/- 0.0224


In [8]:
# Test 5: Combined features (acs_pca + spange) with RF
print("\nTest 5: RF with combined features (0.8 acs + 0.2 spange)")
errors_combined = []
for (train_X, train_Y), (test_X, test_Y) in generate_leave_one_ramp_out_splits(X_full, Y_full):
    X_train_spange = build_features_mixed(train_X, SPANGE_DF)
    X_train_acs = build_features_mixed(train_X, ACS_PCA_DF)
    X_test_spange = build_features_mixed(test_X, SPANGE_DF)
    X_test_acs = build_features_mixed(test_X, ACS_PCA_DF)
    
    scaler_spange = StandardScaler()
    scaler_acs = StandardScaler()
    
    X_train_spange_sc = scaler_spange.fit_transform(X_train_spange)
    X_train_acs_sc = scaler_acs.fit_transform(X_train_acs)
    X_test_spange_sc = scaler_spange.transform(X_test_spange)
    X_test_acs_sc = scaler_acs.transform(X_test_acs)
    
    # Train separate models
    model_spange = RandomForestRegressor(n_estimators=200, max_depth=8, min_samples_leaf=5, random_state=42, n_jobs=-1)
    model_acs = RandomForestRegressor(n_estimators=200, max_depth=8, min_samples_leaf=5, random_state=42, n_jobs=-1)
    
    model_spange.fit(X_train_spange_sc, train_Y.values)
    model_acs.fit(X_train_acs_sc, train_Y.values)
    
    preds_spange = model_spange.predict(X_test_spange_sc)
    preds_acs = model_acs.predict(X_test_acs_sc)
    
    # Weighted combination
    preds = 0.8 * preds_acs + 0.2 * preds_spange
    preds = np.clip(preds, 0, 1)
    
    mae = np.mean(np.abs(preds - test_Y.values))
    errors_combined.append(mae)

print(f"RF combined (0.8 acs + 0.2 spange) MAE: {np.mean(errors_combined):.4f} +/- {np.std(errors_combined):.4f}")


Test 5: RF with combined features (0.8 acs + 0.2 spange)


RF combined (0.8 acs + 0.2 spange) MAE: 0.0655 +/- 0.0186


In [9]:
# Summary of full data results
print("\n" + "="*60)
print("SUMMARY: Full Data (Mixed Solvents) Performance")
print("="*60)
results = [
    ('RF (no TTA)', np.mean(errors_rf_no_tta)),
    ('RF (with TTA)', np.mean(errors_rf_tta)),
    ('ExtraTrees (no TTA)', np.mean(errors_etr_no_tta)),
    ('HistGradientBoosting', np.mean(errors_hgb)),
    ('RF combined features', np.mean(errors_combined)),
]
results.sort(key=lambda x: x[1])
for name, mae in results:
    print(f"{name:30s}: {mae:.4f}")

print(f"\nCurrent best (exp_003 RF): 0.0836")
print(f"Exp_004 per-target: 0.0895")


SUMMARY: Full Data (Mixed Solvents) Performance
ExtraTrees (no TTA)           : 0.0640
RF combined features          : 0.0655
RF (no TTA)                   : 0.0716
HistGradientBoosting          : 0.0732
RF (with TTA)                 : 0.0932

Current best (exp_003 RF): 0.0836
Exp_004 per-target: 0.0895


In [10]:
# Now let's test the BEST approach for single solvent
# to see if we can combine the best of both worlds

def generate_leave_one_out_splits(X, Y):
    for solvent in sorted(X["SOLVENT NAME"].unique()):
        mask = X["SOLVENT NAME"] != solvent
        yield (X[mask], Y[mask]), (X[~mask], Y[~mask])

def build_features_single(X, feature_df):
    rt = X['Residence Time'].values.reshape(-1, 1)
    temp = X['Temperature'].values.reshape(-1, 1)
    
    # Arrhenius features
    temp_k = temp + 273.15
    inv_temp = 1000.0 / temp_k
    log_time = np.log(rt + 1e-6)
    interaction = inv_temp * log_time
    
    # Solvent features
    solvent_feats = feature_df.loc[X['SOLVENT NAME']].values
    
    return np.hstack([rt, temp, inv_temp, log_time, interaction, solvent_feats])

X_single, Y_single = load_data('single_solvent')

# Test the best single solvent approach: per-target with combined features
print("\nSingle Solvent: Per-target with combined features")
errors_single_pertarget = []
for (train_X, train_Y), (test_X, test_Y) in generate_leave_one_out_splits(X_single, Y_single):
    X_train_spange = build_features_single(train_X, SPANGE_DF)
    X_train_acs = build_features_single(train_X, ACS_PCA_DF)
    X_test_spange = build_features_single(test_X, SPANGE_DF)
    X_test_acs = build_features_single(test_X, ACS_PCA_DF)
    
    scaler_spange = StandardScaler()
    scaler_acs = StandardScaler()
    
    X_train_spange_sc = scaler_spange.fit_transform(X_train_spange)
    X_train_acs_sc = scaler_acs.fit_transform(X_train_acs)
    X_test_spange_sc = scaler_spange.transform(X_test_spange)
    X_test_acs_sc = scaler_acs.transform(X_test_acs)
    
    preds_all = []
    for i, target in enumerate(['Product 2', 'Product 3', 'SM']):
        if target == 'SM':
            model_spange = HistGradientBoostingRegressor(max_depth=7, max_iter=700, learning_rate=0.04, random_state=42)
            model_acs = HistGradientBoostingRegressor(max_depth=7, max_iter=700, learning_rate=0.04, random_state=42)
        else:
            model_spange = ExtraTreesRegressor(n_estimators=500, max_depth=10, min_samples_leaf=2, random_state=42, n_jobs=-1)
            model_acs = ExtraTreesRegressor(n_estimators=500, max_depth=10, min_samples_leaf=2, random_state=42, n_jobs=-1)
        
        model_spange.fit(X_train_spange_sc, train_Y.values[:, i])
        model_acs.fit(X_train_acs_sc, train_Y.values[:, i])
        
        p_spange = model_spange.predict(X_test_spange_sc)
        p_acs = model_acs.predict(X_test_acs_sc)
        p_combined = 0.8 * p_acs + 0.2 * p_spange
        preds_all.append(p_combined)
    
    preds = np.column_stack(preds_all)
    preds = np.clip(preds, 0, 1)
    
    mae = np.mean(np.abs(preds - test_Y.values))
    errors_single_pertarget.append(mae)

print(f"Per-target combined MAE: {np.mean(errors_single_pertarget):.4f} +/- {np.std(errors_single_pertarget):.4f}")


Single Solvent: Per-target with combined features


Per-target combined MAE: 0.0659 +/- 0.0321


In [11]:
# Test simple RF on single solvent for comparison
print("\nSingle Solvent: Simple RF")
errors_single_rf = []
for (train_X, train_Y), (test_X, test_Y) in generate_leave_one_out_splits(X_single, Y_single):
    X_train = build_features_single(train_X, SPANGE_DF)
    X_test = build_features_single(test_X, SPANGE_DF)
    
    scaler = StandardScaler()
    X_train_sc = scaler.fit_transform(X_train)
    X_test_sc = scaler.transform(X_test)
    
    model = RandomForestRegressor(n_estimators=200, max_depth=8, min_samples_leaf=5, random_state=42, n_jobs=-1)
    model.fit(X_train_sc, train_Y.values)
    preds = model.predict(X_test_sc)
    preds = np.clip(preds, 0, 1)
    
    mae = np.mean(np.abs(preds - test_Y.values))
    errors_single_rf.append(mae)

print(f"Simple RF MAE: {np.mean(errors_single_rf):.4f} +/- {np.std(errors_single_rf):.4f}")


Single Solvent: Simple RF


Simple RF MAE: 0.0757 +/- 0.0342


In [12]:
# Final summary
print("\n" + "="*60)
print("FINAL SUMMARY")
print("="*60)

print("\nSingle Solvent:")
print(f"  Per-target combined: {np.mean(errors_single_pertarget):.4f}")
print(f"  Simple RF: {np.mean(errors_single_rf):.4f}")
print(f"  Current best (exp_003): 0.0748")
print(f"  Exp_004 per-target: 0.0659")

print("\nFull Data (Mixed):")
print(f"  Best from tests: {min([np.mean(errors_rf_no_tta), np.mean(errors_rf_tta), np.mean(errors_etr_no_tta), np.mean(errors_hgb), np.mean(errors_combined)]):.4f}")
print(f"  Current best (exp_003): 0.0836")
print(f"  Exp_004 per-target: 0.0895")

# Calculate combined scores
best_single = np.mean(errors_single_pertarget)
best_full = min([np.mean(errors_rf_no_tta), np.mean(errors_rf_tta), np.mean(errors_etr_no_tta), np.mean(errors_hgb), np.mean(errors_combined)])

n_single = 656
n_full = 1227
combined = (best_single * n_single + best_full * n_full) / (n_single + n_full)
print(f"\nPotential combined score: {combined:.4f}")
print(f"Current best combined: 0.0805")


FINAL SUMMARY

Single Solvent:
  Per-target combined: 0.0659
  Simple RF: 0.0757
  Current best (exp_003): 0.0748
  Exp_004 per-target: 0.0659

Full Data (Mixed):
  Best from tests: 0.0640
  Current best (exp_003): 0.0836
  Exp_004 per-target: 0.0895

Potential combined score: 0.0647
Current best combined: 0.0805


In [13]:
# Key insight: We need DIFFERENT approaches for single vs mixed solvents
# Let's design a hybrid model that uses:
# - Per-target heterogeneous for single solvents (best: 0.0659)
# - Simple RF for mixed solvents (best: ~0.083)

print("\n" + "="*60)
print("RECOMMENDED APPROACH: Hybrid Model")
print("="*60)
print("""
1. For SINGLE SOLVENT (data='single'):
   - Use per-target heterogeneous model
   - HGB for SM, ExtraTrees for Products
   - Combined features (0.8 acs + 0.2 spange)
   - Expected MAE: ~0.066

2. For FULL DATA (data='full'):
   - Use simple RF with regularization
   - NO TTA (it seems to hurt performance)
   - Spange features only
   - Expected MAE: ~0.083

3. Combined expected score:
   - (0.066 * 656 + 0.083 * 1227) / 1883 = 0.077
   - This would be a 4% improvement over current 0.0805
""")

# Calculate
expected_single = 0.066
expected_full = 0.083
expected_combined = (expected_single * 656 + expected_full * 1227) / (656 + 1227)
print(f"Expected combined: {expected_combined:.4f}")


RECOMMENDED APPROACH: Hybrid Model

1. For SINGLE SOLVENT (data='single'):
   - Use per-target heterogeneous model
   - HGB for SM, ExtraTrees for Products
   - Combined features (0.8 acs + 0.2 spange)
   - Expected MAE: ~0.066

2. For FULL DATA (data='full'):
   - Use simple RF with regularization
   - NO TTA (it seems to hurt performance)
   - Spange features only
   - Expected MAE: ~0.083

3. Combined expected score:
   - (0.066 * 656 + 0.083 * 1227) / 1883 = 0.077
   - This would be a 4% improvement over current 0.0805

Expected combined: 0.0771


In [14]:
# Test ExtraTrees with combined features for mixed solvents
print("\nTest: ExtraTrees with combined features (0.8 acs + 0.2 spange) on full data")
errors_etr_combined = []
for (train_X, train_Y), (test_X, test_Y) in generate_leave_one_ramp_out_splits(X_full, Y_full):
    X_train_spange = build_features_mixed(train_X, SPANGE_DF)
    X_train_acs = build_features_mixed(train_X, ACS_PCA_DF)
    X_test_spange = build_features_mixed(test_X, SPANGE_DF)
    X_test_acs = build_features_mixed(test_X, ACS_PCA_DF)
    
    scaler_spange = StandardScaler()
    scaler_acs = StandardScaler()
    
    X_train_spange_sc = scaler_spange.fit_transform(X_train_spange)
    X_train_acs_sc = scaler_acs.fit_transform(X_train_acs)
    X_test_spange_sc = scaler_spange.transform(X_test_spange)
    X_test_acs_sc = scaler_acs.transform(X_test_acs)
    
    # Train separate models
    model_spange = ExtraTreesRegressor(n_estimators=300, max_depth=10, min_samples_leaf=2, random_state=42, n_jobs=-1)
    model_acs = ExtraTreesRegressor(n_estimators=300, max_depth=10, min_samples_leaf=2, random_state=42, n_jobs=-1)
    
    model_spange.fit(X_train_spange_sc, train_Y.values)
    model_acs.fit(X_train_acs_sc, train_Y.values)
    
    preds_spange = model_spange.predict(X_test_spange_sc)
    preds_acs = model_acs.predict(X_test_acs_sc)
    
    # Weighted combination
    preds = 0.8 * preds_acs + 0.2 * preds_spange
    preds = np.clip(preds, 0, 1)
    
    mae = np.mean(np.abs(preds - test_Y.values))
    errors_etr_combined.append(mae)

print(f"ExtraTrees combined (0.8 acs + 0.2 spange) MAE: {np.mean(errors_etr_combined):.4f} +/- {np.std(errors_etr_combined):.4f}")


Test: ExtraTrees with combined features (0.8 acs + 0.2 spange) on full data


ExtraTrees combined (0.8 acs + 0.2 spange) MAE: 0.0625 +/- 0.0214


In [15]:
# Test per-target approach for mixed solvents WITHOUT TTA
print("\nTest: Per-target (HGB+ETR) with combined features, NO TTA on full data")
errors_pertarget_no_tta = []
for (train_X, train_Y), (test_X, test_Y) in generate_leave_one_ramp_out_splits(X_full, Y_full):
    X_train_spange = build_features_mixed(train_X, SPANGE_DF)
    X_train_acs = build_features_mixed(train_X, ACS_PCA_DF)
    X_test_spange = build_features_mixed(test_X, SPANGE_DF)
    X_test_acs = build_features_mixed(test_X, ACS_PCA_DF)
    
    scaler_spange = StandardScaler()
    scaler_acs = StandardScaler()
    
    X_train_spange_sc = scaler_spange.fit_transform(X_train_spange)
    X_train_acs_sc = scaler_acs.fit_transform(X_train_acs)
    X_test_spange_sc = scaler_spange.transform(X_test_spange)
    X_test_acs_sc = scaler_acs.transform(X_test_acs)
    
    preds_all = []
    for i, target in enumerate(['Product 2', 'Product 3', 'SM']):
        if target == 'SM':
            model_spange = HistGradientBoostingRegressor(max_depth=7, max_iter=700, learning_rate=0.04, random_state=42)
            model_acs = HistGradientBoostingRegressor(max_depth=7, max_iter=700, learning_rate=0.04, random_state=42)
        else:
            model_spange = ExtraTreesRegressor(n_estimators=500, max_depth=10, min_samples_leaf=2, random_state=42, n_jobs=-1)
            model_acs = ExtraTreesRegressor(n_estimators=500, max_depth=10, min_samples_leaf=2, random_state=42, n_jobs=-1)
        
        model_spange.fit(X_train_spange_sc, train_Y.values[:, i])
        model_acs.fit(X_train_acs_sc, train_Y.values[:, i])
        
        p_spange = model_spange.predict(X_test_spange_sc)
        p_acs = model_acs.predict(X_test_acs_sc)
        p_combined = 0.8 * p_acs + 0.2 * p_spange
        preds_all.append(p_combined)
    
    preds = np.column_stack(preds_all)
    preds = np.clip(preds, 0, 1)
    
    mae = np.mean(np.abs(preds - test_Y.values))
    errors_pertarget_no_tta.append(mae)

print(f"Per-target (no TTA) MAE: {np.mean(errors_pertarget_no_tta):.4f} +/- {np.std(errors_pertarget_no_tta):.4f}")


Test: Per-target (HGB+ETR) with combined features, NO TTA on full data


Per-target (no TTA) MAE: 0.0603 +/- 0.0219


In [16]:
# FINAL SUMMARY - Best approaches found
print("="*70)
print("FINAL SUMMARY - BEST APPROACHES")
print("="*70)

print("\n** SINGLE SOLVENT **")
print(f"  Per-target (HGB+ETR) combined features: {np.mean(errors_single_pertarget):.4f}")
print(f"  Simple RF: {np.mean(errors_single_rf):.4f}")
print(f"  Current best (exp_003): 0.0748")

print("\n** FULL DATA (MIXED SOLVENTS) **")
print(f"  Per-target (no TTA) combined: {np.mean(errors_pertarget_no_tta):.4f}  <-- BEST!")
print(f"  ExtraTrees combined: {np.mean(errors_etr_combined):.4f}")
print(f"  ExtraTrees (Spange only): {np.mean(errors_etr_no_tta):.4f}")
print(f"  RF combined: {np.mean(errors_combined):.4f}")
print(f"  RF (no TTA): {np.mean(errors_rf_no_tta):.4f}")
print(f"  HGB: {np.mean(errors_hgb):.4f}")
print(f"  RF (with TTA): {np.mean(errors_rf_tta):.4f}  <-- TTA HURTS!")
print(f"  Current best (exp_003): 0.0836")

# Calculate expected combined scores
best_single = np.mean(errors_single_pertarget)  # 0.0659
best_full = np.mean(errors_pertarget_no_tta)    # 0.0603

n_single = 656
n_full = 1227
combined = (best_single * n_single + best_full * n_full) / (n_single + n_full)

print("\n** EXPECTED COMBINED SCORE **")
print(f"  Best single: {best_single:.4f}")
print(f"  Best full: {best_full:.4f}")
print(f"  Combined: {combined:.4f}")
print(f"  Current best: 0.0805")
print(f"  Improvement: {(0.0805 - combined) / 0.0805 * 100:.1f}%")

print("\n** KEY INSIGHT **")
print("  The TTA (data augmentation + test-time averaging) was HURTING performance!")
print("  Removing TTA and using per-target approach improves full data from 0.0895 to 0.0603!")
print("  This is a 33% improvement on full data alone!")

FINAL SUMMARY - BEST APPROACHES

** SINGLE SOLVENT **
  Per-target (HGB+ETR) combined features: 0.0659
  Simple RF: 0.0757
  Current best (exp_003): 0.0748

** FULL DATA (MIXED SOLVENTS) **
  Per-target (no TTA) combined: 0.0603  <-- BEST!
  ExtraTrees combined: 0.0625
  ExtraTrees (Spange only): 0.0640
  RF combined: 0.0655
  RF (no TTA): 0.0716
  HGB: 0.0732
  RF (with TTA): 0.0932  <-- TTA HURTS!
  Current best (exp_003): 0.0836

** EXPECTED COMBINED SCORE **
  Best single: 0.0659
  Best full: 0.0603
  Combined: 0.0623
  Current best: 0.0805
  Improvement: 22.7%

** KEY INSIGHT **
  The TTA (data augmentation + test-time averaging) was HURTING performance!
  Removing TTA and using per-target approach improves full data from 0.0895 to 0.0603!
  This is a 33% improvement on full data alone!
