In [None]:
# Environment check and basic imports
import sys, torch, platform
print("Python :", sys.version)
print("PyTorch:", torch.__version__)
print("CUDA   :", torch.version.cuda)
print("Arch   :", platform.machine())

In [None]:
# =====================================
# 🎯 HYPERPARAMETER TUNING VARIABLES
# =====================================
# Modify these variables to tune model performance
# These will override the values in polymer-GRIT-RRWP.yaml

print("🎛️ Setting up hyperparameter tuning variables...")

# ==== HIGH PRIORITY PARAMETERS (Major Impact on Performance) ====

# Learning Rate - Controls training speed and convergence
# Recommended: [1e-4, 5e-4, 1e-3, 2e-3]
BASE_LR = 1e-3

# Model Depth - Number of Transformer layers  
# Recommended: [8, 10, 12, 14] (more layers = more capacity but slower)
GT_LAYERS = 10

# Hidden Dimension - Model width/capacity
# Recommended: [64, 128, 192] (higher = more capacity but more memory)
GT_DIM_HIDDEN = 64

# Batch Size - Training batch size
# Recommended: [16, 32, 64] (higher = more stable but more memory)
BATCH_SIZE = 32

# ==== MEDIUM PRIORITY PARAMETERS (Moderate Impact) ====

# Dropout Rate - Regularization strength
# Recommended: [0.0, 0.1, 0.2] (higher = more regularization)
GT_DROPOUT = 0.0

# Attention Heads - Multi-head attention
# Recommended: [4, 6, 8, 12] (should divide dim_hidden evenly)
GT_N_HEADS = 8

# Weight Decay - L2 regularization
# Recommended: [1e-6, 1e-5, 1e-4] (higher = more regularization)  
WEIGHT_DECAY = 1e-5

# Training Epochs - Total training steps
# Recommended: [150, 200, 300] (more = longer training)
MAX_EPOCH = 200

# Attention Dropout - Regularization for attention mechanism
# Recommended: [0.0, 0.1, 0.2, 0.3]
ATTN_DROPOUT = 0.2

# ==== PARAMETER SOURCE CONFIRMATION ====
print("=" * 80)
print("🔔 PARAMETER SOURCE CONFIRMATION")
print("=" * 80)
print("✅ USING NOTEBOOK HYPERPARAMETER VARIABLES (NOT polymer-GRIT-RRWP.yaml)")
print("📋 The following parameters will OVERRIDE the YAML file:")

print("\n📊 Current Hyperparameter Settings (FROM NOTEBOOK VARIABLES):")
print(f"   🎯 Learning Rate (BASE_LR): {BASE_LR} ← FROM NOTEBOOK")
print(f"   🎯 Model Layers (GT_LAYERS): {GT_LAYERS} ← FROM NOTEBOOK")
print(f"   🎯 Hidden Dimension (GT_DIM_HIDDEN): {GT_DIM_HIDDEN} ← FROM NOTEBOOK")
print(f"   🎯 Batch Size (BATCH_SIZE): {BATCH_SIZE} ← FROM NOTEBOOK")
print(f"   🎯 Dropout (GT_DROPOUT): {GT_DROPOUT} ← FROM NOTEBOOK")
print(f"   🎯 Attention Heads (GT_N_HEADS): {GT_N_HEADS} ← FROM NOTEBOOK")
print(f"   🎯 Weight Decay (WEIGHT_DECAY): {WEIGHT_DECAY} ← FROM NOTEBOOK")
print(f"   🎯 Max Epochs (MAX_EPOCH): {MAX_EPOCH} ← FROM NOTEBOOK")
print(f"   🎯 Attention Dropout (ATTN_DROPOUT): {ATTN_DROPOUT} ← FROM NOTEBOOK")

# ==== VALIDATION CHECKS ====
print("\n🔍 Parameter Validation:")
# Check if attention heads divide hidden dimension evenly
if GT_DIM_HIDDEN % GT_N_HEADS != 0:
    print(f"⚠️  WARNING: GT_DIM_HIDDEN ({GT_DIM_HIDDEN}) should be divisible by GT_N_HEADS ({GT_N_HEADS})")
    print(f"   Recommended GT_N_HEADS for dim_hidden={GT_DIM_HIDDEN}: {[i for i in [4,6,8,12,16] if GT_DIM_HIDDEN % i == 0]}")
else:
    print(f"✅ GT_DIM_HIDDEN ({GT_DIM_HIDDEN}) is divisible by GT_N_HEADS ({GT_N_HEADS})")

print("\n" + "=" * 80)
print("✅ Hyperparameter variables initialized successfully!")
print("🎯 These values will be used instead of polymer-GRIT-RRWP.yaml")
print("💡 To tune performance, modify the variables above and re-run this cell")
print("=" * 80)

In [None]:
# Kaggle Environment Setup
from pathlib import Path
import os
import sys
import warnings
warnings.filterwarnings('ignore')

# Kaggle paths
TRAIN_CSV = Path('/kaggle/input/neurips-open-polymer-prediction-2025/train.csv')
TEST_CSV = Path('/kaggle/input/neurips-open-polymer-prediction-2025/test.csv')  
SUPPLEMENT_DIR = Path('/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement')
GRIT_SOURCE = Path('/kaggle/input/grit/pytorch/default/1/neurips_challenge/GRIT')
PIPELINE_SOURCE = Path('/kaggle/input/grit/pytorch/default/1/neurips_challenge/full_pipeline.py')
CONFIG_SOURCE = Path('/kaggle/input/grit/pytorch/default/1/neurips_challenge/configs')
KAGGLE_WORKING = Path('/kaggle/working')

print("🔍 Kaggle Environment Setup")
print(f"📖 Pipeline Source: {PIPELINE_SOURCE}")
print(f"📖 Config Source: {CONFIG_SOURCE}")
print(f"✏️  Working Directory: {KAGGLE_WORKING}")

# Install offline wheels
try:
    exec(open('/kaggle/input/grit-wheels-supplement/neurips-offline-wheels-truly-offline/install_offline.py').read())
    exec(open('/kaggle/input/grit-wheels/install_offline.py').read())
    print("✅ Offline wheels installed")
except:
    print("⚠️  Offline wheels installation failed (expected in local testing)")

# Create working directories
working_dirs = ['graphs', 'results', 'cfg_runs', 'checkpoints', 'logs']
for subdir in working_dirs:
    (KAGGLE_WORKING / subdir).mkdir(parents=True, exist_ok=True)
    
# Add to Python path
sys.path.insert(0, str(KAGGLE_WORKING))
print(f"✅ Environment ready")

In [None]:
# Create Dynamic Config with Notebook Hyperparameters
import yaml

print("🎛️ Creating config with notebook hyperparameters...")

# Read original config from Kaggle input
original_config_path = CONFIG_SOURCE / 'polymer-GRIT-RRWP.yaml'
with open(original_config_path, 'r') as f:
    config = yaml.safe_load(f)

print(f"📖 Read base config from: {original_config_path}")

# Override with notebook hyperparameters  
config['train']['batch_size'] = BATCH_SIZE
config['gt']['layers'] = GT_LAYERS
config['gt']['n_heads'] = GT_N_HEADS
config['gt']['dim_hidden'] = GT_DIM_HIDDEN
config['gt']['dropout'] = GT_DROPOUT
config['gt']['attn_dropout'] = ATTN_DROPOUT
config['optim']['base_lr'] = BASE_LR
config['optim']['weight_decay'] = WEIGHT_DECAY  
config['optim']['max_epoch'] = MAX_EPOCH

# Adaptive settings
config['optim']['num_warmup_epochs'] = max(10, MAX_EPOCH // 4)
config['optim']['min_lr'] = BASE_LR / 100
config['gnn']['dim_inner'] = GT_DIM_HIDDEN
config['gnn']['dropout'] = GT_DROPOUT

# Disable tensorboard for Kaggle compatibility
config['tensorboard_each_run'] = False

# Save to working directory
dynamic_config_path = KAGGLE_WORKING / 'polymer-GRIT-RRWP.yaml'
with open(dynamic_config_path, 'w') as f:
    yaml.safe_dump(config, f, sort_keys=False, default_flow_style=False)

print(f"✅ Dynamic config saved: {dynamic_config_path}")

print(f"\n📊 Using Notebook Hyperparameters:")
print(f"  🎯 Learning Rate: {BASE_LR}")
print(f"  🎯 GT Layers: {GT_LAYERS}")
print(f"  🎯 Hidden Dimension: {GT_DIM_HIDDEN}")  
print(f"  🎯 Attention Heads: {GT_N_HEADS}")
print(f"  🎯 Batch Size: {BATCH_SIZE}")
print(f"  🎯 Dropout: {GT_DROPOUT}")
print(f"  🎯 Weight Decay: {WEIGHT_DECAY}")
print(f"  🎯 Max Epochs: {MAX_EPOCH}")
print(f"  📊 TensorBoard: Disabled (Kaggle compatibility)")

print(f"\n🔔 CONFIG CONFIRMED: Using notebook variables instead of original YAML")

In [None]:
# Execute Pipeline with Notebook Hyperparameters
import importlib.util
import torch

print("🚀 Executing full_pipeline.py with notebook hyperparameters...")

# Read and adapt pipeline for Kaggle paths
with open(PIPELINE_SOURCE, 'r') as f:
    pipeline_code = f.read()

# Apply comprehensive Kaggle path fixes
kaggle_fixes = {
    # Basic paths
    'GRIT_DIR = Path(__file__).resolve().parent / "GRIT"': 'GRIT_DIR = Path("/kaggle/working/GRIT")',
    'ROOT        = Path(__file__).resolve().parent': 'ROOT = Path("/kaggle/working")',
    'DATA_ROOT   = ROOT / "data"': 'DATA_ROOT = Path("/kaggle/input/neurips-open-polymer-prediction-2025")',
    'SUPP_DIR    = DATA_ROOT / "train_supplement"': 'SUPP_DIR = Path("/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement")',
    'GRAPH_DIR   = SUPP_DIR / "graphs"': 'GRAPH_DIR = Path("/kaggle/working/graphs")',
    'RESULTS_DIR = ROOT / "results"': 'RESULTS_DIR = Path("/kaggle/working/results")',
    "sub_out.to_csv(ROOT/'submission.csv', index=False)": 'sub_out.to_csv("/kaggle/working/submission.csv", index=False)',
    'dataset = PolymerDS_class(root=DATA_ROOT, target_idx=gym_cfg.dataset.target_idx)': 'dataset = PolymerDS_class(root=Path("/kaggle/working"), target_idx=gym_cfg.dataset.target_idx)',
    
    # Fix the train_supplement/graphs path issue
    'Path(root) / "train_supplement" / "graphs" / "train_graphs.pt"': 'Path("/kaggle/working/graphs/train_graphs.pt")',
    'Path(root) / "train_supplement" / "graphs" / "test_graphs.pt"': 'Path("/kaggle/working/graphs/test_graphs.pt")',
    
    # Additional graph file references
    'torch.save(graphs, GRAPH_DIR / "train_graphs.pt")': 'torch.save(graphs, Path("/kaggle/working/graphs/train_graphs.pt"))',
    'torch.save(t_graphs, GRAPH_DIR / "test_graphs.pt")': 'torch.save(t_graphs, Path("/kaggle/working/graphs/test_graphs.pt"))',
    'torch.load(GRAPH_DIR / "train_graphs.pt"': 'torch.load(Path("/kaggle/working/graphs/train_graphs.pt")',
    'torch.load(GRAPH_DIR / "test_graphs.pt"': 'torch.load(Path("/kaggle/working/graphs/test_graphs.pt")',
    '(GRAPH_DIR / "train_graphs.pt").exists()': 'Path("/kaggle/working/graphs/train_graphs.pt").exists()',
    '(GRAPH_DIR / "test_graphs.pt").exists()': 'Path("/kaggle/working/graphs/test_graphs.pt").exists()',
    
    # Dataset file references
    'DATA_ROOT / "train.csv"': 'Path("/kaggle/input/neurips-open-polymer-prediction-2025/train.csv")',
    'DATA_ROOT / "test.csv"': 'Path("/kaggle/input/neurips-open-polymer-prediction-2025/test.csv")',
    'SUPP_DIR / "dataset1.csv"': 'Path("/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset1.csv")',
    'SUPP_DIR / "dataset2.csv"': 'Path("/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset2.csv")',
    'SUPP_DIR / "dataset3.csv"': 'Path("/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset3.csv")',
    'SUPP_DIR / "dataset4.csv"': 'Path("/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset4.csv")',
    
    # Other results paths
    'report_path = save_dir / "stage1_evaluation_report.csv"': 'report_path = Path("/kaggle/working/stage1_evaluation_report.csv")',
    'CONFIG_SAVE = RESULTS_DIR / "cfg_runs"': 'CONFIG_SAVE = Path("/kaggle/working/cfg_runs")',
}

for old, new in kaggle_fixes.items():
    pipeline_code = pipeline_code.replace(old, new)

# Save adapted pipeline
kaggle_pipeline = KAGGLE_WORKING / 'full_pipeline_kaggle.py' 
with open(kaggle_pipeline, 'w') as f:
    f.write(pipeline_code)

print(f"✅ Pipeline adapted for Kaggle: {kaggle_pipeline}")

# Copy and patch GRIT 
try:
    import shutil
    if GRIT_SOURCE.exists():
        writable_grit = KAGGLE_WORKING / "GRIT"
        if writable_grit.exists():
            shutil.rmtree(writable_grit)
        shutil.copytree(GRIT_SOURCE, writable_grit)
        print(f"✅ GRIT copied to: {writable_grit}")
        
        # Fix OGB smiles2graph import issue
        print("🔧 Patching OGB smiles2graph imports...")
        ogb_implementation = '''# ===== OGB SMILES2GRAPH IMPLEMENTATION =====
import numpy as np
from rdkit import Chem

allowable_features = {
    'possible_atomic_num_list': list(range(1, 119)) + ['misc'],
    'possible_chirality_list': ['CHI_UNSPECIFIED', 'CHI_TETRAHEDRAL_CW', 'CHI_TETRAHEDRAL_CCW', 'CHI_OTHER', 'misc'],
    'possible_degree_list': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 'misc'],
    'possible_formal_charge_list': [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 'misc'],
    'possible_numH_list': [0, 1, 2, 3, 4, 5, 6, 7, 8, 'misc'],
    'possible_number_radical_e_list': [0, 1, 2, 3, 4, 'misc'],
    'possible_hybridization_list': ['SP', 'SP2', 'SP3', 'SP3D', 'SP3D2', 'misc'],
    'possible_is_aromatic_list': [False, True],
    'possible_is_in_ring_list': [False, True],
    'possible_bond_type_list': ['SINGLE', 'DOUBLE', 'TRIPLE', 'AROMATIC', 'misc'],
    'possible_bond_stereo_list': ['STEREONONE', 'STEREOZ', 'STEREOE', 'STEREOCIS', 'STEREOTRANS', 'STEREOANY'],
    'possible_is_conjugated_list': [False, True]
}

def safe_index(l, e):
    try:
        return l.index(e)
    except:
        return len(l) - 1

def atom_to_feature_vector(atom):
    atom_feature = [
        safe_index(allowable_features['possible_atomic_num_list'], atom.GetAtomicNum()),
        safe_index(allowable_features['possible_chirality_list'], str(atom.GetChiralTag())),
        safe_index(allowable_features['possible_degree_list'], atom.GetTotalDegree()),
        safe_index(allowable_features['possible_formal_charge_list'], atom.GetFormalCharge()),
        safe_index(allowable_features['possible_numH_list'], atom.GetTotalNumHs()),
        safe_index(allowable_features['possible_number_radical_e_list'], atom.GetNumRadicalElectrons()),
        safe_index(allowable_features['possible_hybridization_list'], str(atom.GetHybridization())),
        allowable_features['possible_is_aromatic_list'].index(atom.GetIsAromatic()),
        allowable_features['possible_is_in_ring_list'].index(atom.IsInRing()),
    ]
    return atom_feature

def bond_to_feature_vector(bond):
    bond_feature = [
        safe_index(allowable_features['possible_bond_type_list'], str(bond.GetBondType())),
        allowable_features['possible_bond_stereo_list'].index(str(bond.GetStereo())),
        allowable_features['possible_is_conjugated_list'].index(bond.GetIsConjugated()),
    ]
    return bond_feature

def smiles2graph(smiles_string):
    mol = Chem.MolFromSmiles(smiles_string)
    atom_features_list = []
    for atom in mol.GetAtoms():
        atom_features_list.append(atom_to_feature_vector(atom))
    x = np.array(atom_features_list, dtype=np.int64)
    
    num_bond_features = 3
    if len(mol.GetBonds()) > 0:
        edges_list = []
        edge_features_list = []
        for bond in mol.GetBonds():
            i = bond.GetBeginAtomIdx()
            j = bond.GetEndAtomIdx()
            edge_feature = bond_to_feature_vector(bond)
            edges_list.append((i, j))
            edge_features_list.append(edge_feature)
            edges_list.append((j, i))
            edge_features_list.append(edge_feature)
        edge_index = np.array(edges_list, dtype=np.int64).T
        edge_attr = np.array(edge_features_list, dtype=np.int64)
    else:
        edge_index = np.empty((2, 0), dtype=np.int64)
        edge_attr = np.empty((0, num_bond_features), dtype=np.int64)
    
    graph = dict()
    graph['edge_index'] = edge_index
    graph['edge_feat'] = edge_attr
    graph['node_feat'] = x
    graph['num_nodes'] = len(x)
    return graph
# ===== END OGB IMPLEMENTATION ====='''
        
        # Patch files that use OGB
        ogb_files = [
            writable_grit / "grit" / "loader" / "dataset" / "peptides_structural.py",
            writable_grit / "grit" / "loader" / "dataset" / "peptides_functional.py"
        ]
        
        for ogb_file in ogb_files:
            if ogb_file.exists():
                with open(ogb_file, 'r') as f:
                    content = f.read()
                if "from ogb.utils import smiles2graph" in content:
                    content = content.replace("from ogb.utils import smiles2graph", ogb_implementation)
                    with open(ogb_file, 'w') as f:
                        f.write(content)
                    print(f"  ✅ Patched: {ogb_file.name}")
        
    else:
        print("⚠️  GRIT source not found (expected in local testing)")
except Exception as e:
    print(f"⚠️  GRIT setup failed: {e}")

# Execute pipeline
try:
    spec = importlib.util.spec_from_file_location("kaggle_pipeline", kaggle_pipeline)
    pipeline_module = importlib.util.module_from_spec(spec)
    
    # Set command line args
    original_argv = sys.argv.copy()
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    sys.argv = ['full_pipeline_kaggle.py', '--cfg', str(dynamic_config_path), '--device', device]
    
    print(f"⚙️  Config: {dynamic_config_path}")
    print(f"🖥️  Device: {device}")
    
    print(f"\n🎛️ TRAINING WITH NOTEBOOK HYPERPARAMETERS:")
    print(f"  🎯 Learning Rate: {BASE_LR}")
    print(f"  🎯 GT Layers: {GT_LAYERS}")
    print(f"  🎯 Max Epochs: {MAX_EPOCH}")
    
    # Execute
    spec.loader.exec_module(pipeline_module)
    if hasattr(pipeline_module, 'main'):
        pipeline_module.main()
        print(f"\n🎉 Training completed!")
    
except Exception as e:
    print(f"❌ Error: {e}")
    print("📋 This may be expected in local testing - will work on Kaggle")
    
finally:
    sys.argv = original_argv

In [None]:
# Results Analysis
print("="*60)
print("📊 TRAINING RESULTS ANALYSIS")  
print("="*60)

# Check for results files using KAGGLE_WORKING paths
submission_file = KAGGLE_WORKING / "submission.csv"
eval_report = KAGGLE_WORKING / "stage1_evaluation_report.csv"
results_dir = KAGGLE_WORKING / "results"

print(f"📁 Results directory: {results_dir}")
print(f"📄 Submission file: {submission_file}")
print(f"📋 Evaluation report: {eval_report}")

# Analyze submission file
if submission_file.exists():
    print(f"\n✅ Submission file found: {submission_file}")
    
    try:
        import pandas as pd
        submission = pd.read_csv(submission_file)
        print(f"📊 Submission shape: {submission.shape}")
        
        # Show column info
        expected_cols = ['SMILES', 'Tg', 'FFV', 'Tc', 'Density', 'Rg']
        actual_cols = list(submission.columns)
        print(f"📋 Columns: {actual_cols}")
        
        missing_cols = set(expected_cols) - set(actual_cols)
        if missing_cols:
            print(f"❌ Missing columns: {missing_cols}")
        else:
            print("✅ All required columns present")
        
        # Show prediction statistics
        print(f"\n📈 Prediction Statistics:")
        for target in ['Tg', 'FFV', 'Tc', 'Density', 'Rg']:
            if target in submission.columns:
                values = submission[target]
                print(f"  {target:8s}: mean={values.mean():7.3f}, std={values.std():7.3f}, range=[{values.min():6.3f}, {values.max():6.3f}]")
        
        # Show sample predictions
        print(f"\n📄 Sample Predictions:")
        print(submission.head())
        
    except Exception as e:
        print(f"❌ Error reading submission: {e}")
else:
    print(f"❌ No submission file found")

# Analyze evaluation report
if eval_report.exists():
    print(f"\n✅ Evaluation report found: {eval_report}")
    try:
        import pandas as pd
        eval_df = pd.read_csv(eval_report)
        print("\n🏆 Model Performance Summary:")
        
        # Show available columns
        display_cols = ['Target', 'Performance_Grade', 'Performance_Level', 'Relative_Error', 'MAE', 'Test_MAE']
        available_cols = [col for col in display_cols if col in eval_df.columns]
        
        if available_cols:
            print(eval_df[available_cols].to_string(index=False))
        else:
            print(eval_df.to_string(index=False))
            
    except Exception as e:
        print(f"❌ Error reading evaluation report: {e}")
else:
    print(f"❌ No evaluation report found")

# Summary
print(f"\n{'='*60}")
print("🎯 HYPERPARAMETER TUNING SUMMARY")
print("="*60)
print("✅ Successfully used notebook hyperparameters:")
print(f"  🎯 Learning Rate: {BASE_LR}")
print(f"  🎯 GT Layers: {GT_LAYERS}")  
print(f"  🎯 Hidden Dimension: {GT_DIM_HIDDEN}")
print(f"  🎯 Batch Size: {BATCH_SIZE}")
print(f"  🎯 Max Epochs: {MAX_EPOCH}")
print(f"\n💡 To tune performance:")
print("  1. Modify variables in Cell 2")
print("  2. Re-run cells 2-6") 
print("  3. Compare results with previous runs")
print(f"\n📁 Output files:")
print(f"  📄 Submission: {submission_file}")
print(f"  📋 Evaluation: {eval_report}")
print(f"  📁 Results: {results_dir}")

# 🎛️ Hyperparameter Tuning Guide

## 📋 Quick Reference

To adjust model performance, modify the variables in **Cell 2** (Hyperparameter Variables) and re-run the notebook.

### 🎯 High Priority Parameters (Major Impact)

| Parameter | Current | Recommended Values | Impact |
|-----------|---------|-------------------|---------|
| `BASE_LR` | 1e-3 | [1e-4, 5e-4, 1e-3, 2e-3] | Learning speed & convergence |
| `GT_LAYERS` | 10 | [8, 10, 12, 14] | Model capacity & training time |
| `GT_DIM_HIDDEN` | 64 | [64, 128, 192] | Model width & memory usage |
| `BATCH_SIZE` | 32 | [16, 32, 64] | Training stability & memory |

### 🎚️ Medium Priority Parameters

| Parameter | Current | Recommended Values | Impact |
|-----------|---------|-------------------|---------|
| `GT_DROPOUT` | 0.0 | [0.0, 0.1, 0.2] | Regularization strength |
| `GT_N_HEADS` | 8 | [4, 6, 8, 12] | Attention mechanism |
| `WEIGHT_DECAY` | 1e-5 | [1e-6, 1e-5, 1e-4] | L2 regularization |
| `MAX_EPOCH` | 200 | [150, 200, 300] | Training duration |

## 🚀 Tuning Strategy

### For Better Performance:
- **Increase**: `GT_LAYERS` (10→12), `GT_DIM_HIDDEN` (64→128)
- **Adjust**: `BASE_LR` (try 5e-4 or 2e-3)
- **Add regularization**: `GT_DROPOUT` (0.0→0.1)

### For Faster Training:
- **Decrease**: `GT_LAYERS` (10→8), `MAX_EPOCH` (200→150)
- **Increase**: `BATCH_SIZE` (32→64)

### For Memory Issues:
- **Decrease**: `BATCH_SIZE` (32→16), `GT_DIM_HIDDEN` (64→32)
- **Reduce**: `GT_LAYERS` (10→8)

## 📊 Model Performance Grades

The notebook will show performance grades for each target:
- **A+/A**: Excellent (< 8% relative error)
- **B+/B**: Good (< 18% relative error)  
- **C**: Acceptable (< 25% relative error)
- **D**: Poor (> 25% relative error)

## 🔄 How to Tune

1. **Modify variables** in Cell 2 (Hyperparameter Variables)
2. **Re-run** the entire notebook from Cell 2 onwards
3. **Check results** in the final cell for performance grades
4. **Iterate** based on performance and time constraints

## 💡 Pro Tips

- Start with learning rate: try `BASE_LR = 5e-4` or `2e-3`
- For overfitting: increase `GT_DROPOUT` to 0.1-0.2
- For underfitting: increase `GT_LAYERS` or `GT_DIM_HIDDEN`
- Ensure `GT_DIM_HIDDEN` is divisible by `GT_N_HEADS`

In [None]:
# Results verification
print("=" * 60)
print("📋 RESULTS VERIFICATION")
print("=" * 60)

# Check submission file
submission_path = KAGGLE_WORKING / 'submission.csv'
if submission_path.exists():
    print(f"✅ Submission file created: {submission_path}")
    
    try:
        import pandas as pd
        submission = pd.read_csv(submission_path)
        print(f"📊 Submission shape: {submission.shape}")
        
        # Check required columns
        expected_cols = ['SMILES', 'Tg', 'FFV', 'Tc', 'Density', 'Rg']
        actual_cols = list(submission.columns)
        
        print(f"📋 Columns found: {actual_cols}")
        missing_cols = set(expected_cols) - set(actual_cols)
        if missing_cols:
            print(f"❌ Missing columns: {missing_cols}")
        else:
            print("✅ All required columns present")
        
        # Show prediction statistics
        print("\n📈 Prediction Statistics:")
        for target in ['Tg', 'FFV', 'Tc', 'Density', 'Rg']:
            if target in submission.columns:
                values = submission[target]
                print(f"  {target:8s}: mean={values.mean():7.3f}, std={values.std():7.3f}, range=[{values.min():6.3f}, {values.max():6.3f}]")
        
        # Show preview
        print(f"\n📄 Submission Preview:")
        print(submission.head())
        
    except Exception as e:
        print(f"❌ Error analyzing submission: {e}")
        
else:
    print(f"❌ No submission file found at: {submission_path}")

# Check evaluation report
eval_report_path = KAGGLE_WORKING / 'stage1_evaluation_report.csv'
if eval_report_path.exists():
    print(f"\n✅ Training evaluation report: {eval_report_path}")
    try:
        import pandas as pd
        eval_df = pd.read_csv(eval_report_path)
        print("🏆 Model Performance Summary:")
        display_cols = ['Target', 'Performance_Grade', 'Performance_Level', 'Relative_Error']
        available_cols = [col for col in display_cols if col in eval_df.columns]
        if available_cols:
            print(eval_df[available_cols].to_string(index=False))
    except Exception as e:
        print(f"❌ Error reading evaluation report: {e}")

# Final status
print(f"\n{'='*60}")
print("🎯 FINAL STATUS")
print("="*60)

status_checks = [
    ("Competition data found", TRAIN_CSV.exists() and TEST_CSV.exists()),
    ("GRIT copied successfully", (KAGGLE_WORKING / "GRIT").exists()),
    ("Configuration created", (KAGGLE_WORKING / 'polymer-GRIT-RRWP.yaml').exists()),  # Fixed: use correct path
    ("Pipeline adapted", (KAGGLE_WORKING / 'full_pipeline_kaggle.py').exists()),   # Fixed: use correct path
    ("Submission generated", submission_path.exists()),
]

all_good = True
for check_name, status in status_checks:
    icon = "✅" if status else "❌"
    print(f"{icon} {check_name}")
    if not status:
        all_good = False

if all_good and submission_path.exists():
    print(f"\n🎉 SUCCESS! Kaggle submission ready")
    print(f"📄 Submit file: /kaggle/working/submission.csv")
    print(f"📦 File size: {submission_path.stat().st_size / 1024:.1f} KB")
else:
    print(f"\n⚠️  Issues detected - check error messages above")