In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tqdm.notebook import tqdm
from tqdm._tqdm_notebook import tqdm_notebook
import os
import gc
import warnings
warnings.filterwarnings('ignore')

from config import CONTEST_PATH, TRAIN_CSV, SUPPLEMENT_PATH

Please use `tqdm.notebook.*` instead of `tqdm._tqdm_notebook.*`
  from tqdm._tqdm_notebook import tqdm_notebook


# EXPLORATION

In [3]:
for dirname, _, filenames in os.walk(CONTEST_PATH):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/media/azizd/TOSHIBA-HDD/data/kaggle_data/neurips-open-polymer-prediction-2025/sample_submission.csv
/media/azizd/TOSHIBA-HDD/data/kaggle_data/neurips-open-polymer-prediction-2025/test.csv
/media/azizd/TOSHIBA-HDD/data/kaggle_data/neurips-open-polymer-prediction-2025/train.csv
/media/azizd/TOSHIBA-HDD/data/kaggle_data/neurips-open-polymer-prediction-2025/train_supplement/dataset1.csv
/media/azizd/TOSHIBA-HDD/data/kaggle_data/neurips-open-polymer-prediction-2025/train_supplement/dataset2.csv
/media/azizd/TOSHIBA-HDD/data/kaggle_data/neurips-open-polymer-prediction-2025/train_supplement/dataset3.csv
/media/azizd/TOSHIBA-HDD/data/kaggle_data/neurips-open-polymer-prediction-2025/train_supplement/dataset4.csv


In [4]:
main_df = pd.read_csv(TRAIN_CSV)
main_df.head(10)

Unnamed: 0,id,SMILES,Tg,FFV,Tc,Density,Rg
0,87817,*CC(*)c1ccccc1C(=O)OCCCCCC,,0.374645,0.205667,,
1,106919,*Nc1ccc([C@H](CCC)c2ccc(C3(c4ccc([C@@H](CCC)c5...,,0.37041,,,
2,388772,*Oc1ccc(S(=O)(=O)c2ccc(Oc3ccc(C4(c5ccc(Oc6ccc(...,,0.37886,,,
3,519416,*Nc1ccc(-c2c(-c3ccc(C)cc3)c(-c3ccc(C)cc3)c(N*)...,,0.387324,,,
4,539187,*Oc1ccc(OC(=O)c2cc(OCCCCCCCCCOCC3CCCN3c3ccc([N...,,0.35547,,,
5,950661,*OC(=O)CCCCCCCCC(=O)OC1COC2C(*)COC12,,0.339093,,,
6,1012066,*Nc1ccc([C@H]2[C@@H]3C[C@H]4C[C@@H](C3)C[C@@H]...,,0.347616,,,
7,1022104,*C(=O)Nc1ccc(Oc2ccc(Oc3ccc(NC(=O)c4ccc5c(c4)C(...,,0.377545,,,
8,1722264,*CC(*)(C)C(=O)OCCCCCCCCCOc1ccc2cc(C(=O)Oc3cccc...,,0.359176,,,
9,2265305,*Nc1ccc(-c2ccc(-c3ccc(N*)cc3)cc2)cc1,,0.341878,,,


In [5]:
main_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7973 entries, 0 to 7972
Data columns (total 7 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   id       7973 non-null   int64  
 1   SMILES   7973 non-null   object 
 2   Tg       511 non-null    float64
 3   FFV      7030 non-null   float64
 4   Tc       737 non-null    float64
 5   Density  613 non-null    float64
 6   Rg       614 non-null    float64
dtypes: float64(5), int64(1), object(1)
memory usage: 436.1+ KB


In [6]:
supp1_df = pd.read_csv("/media/azizd/TOSHIBA-HDD/data/kaggle_data/neurips-open-polymer-prediction-2025/train_supplement/dataset1.csv")
supp1_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 874 entries, 0 to 873
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   SMILES   874 non-null    object 
 1   TC_mean  874 non-null    float64
dtypes: float64(1), object(1)
memory usage: 13.8+ KB


In [7]:
supp2_df = pd.read_csv("/media/azizd/TOSHIBA-HDD/data/kaggle_data/neurips-open-polymer-prediction-2025/train_supplement/dataset2.csv")
supp2_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7208 entries, 0 to 7207
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   SMILES  7208 non-null   object
dtypes: object(1)
memory usage: 56.4+ KB


In [8]:
supp3_df = pd.read_csv("/media/azizd/TOSHIBA-HDD/data/kaggle_data/neurips-open-polymer-prediction-2025/train_supplement/dataset3.csv")
supp3_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46 entries, 0 to 45
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   SMILES  46 non-null     object 
 1   Tg      46 non-null     float64
dtypes: float64(1), object(1)
memory usage: 864.0+ bytes


In [9]:
supp4_df = pd.read_csv("/media/azizd/TOSHIBA-HDD/data/kaggle_data/neurips-open-polymer-prediction-2025/train_supplement/dataset4.csv")
supp4_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 862 entries, 0 to 861
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   SMILES  862 non-null    object 
 1   FFV     862 non-null    float64
dtypes: float64(1), object(1)
memory usage: 13.6+ KB


# PREPROCESSING

In [10]:
# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

target_columns = ['Tg', 'FFV', 'Tc', 'Density', 'Rg']

In [11]:
# Load and combine all data sources
def load_and_combine_data():
    # Load main training data
    main_df = pd.read_csv(TRAIN_CSV)
    
    # Load supplementary datasets
    supplement_dfs = []
    supplement_path = SUPPLEMENT_PATH
    
    for i in range(1, 5):
        file_path = os.path.join(supplement_path, f'dataset{i}.csv')
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
            
            # Ensure required columns exist
            for col in target_columns:
                if col not in df.columns:
                    df[col] = np.nan
                    
            # Only keep relevant columns
            df = df[['SMILES'] + target_columns]
            supplement_dfs.append(df)
    
    # Combine all data
    combined_df = pd.concat([main_df] + supplement_dfs, ignore_index=True)
    
    # Remove duplicate SMILES (keep first occurrence)
    combined_df = combined_df.drop_duplicates(subset=['SMILES'])
    
    # Clean SMILES strings
    combined_df['SMILES'] = combined_df['SMILES'].str.strip()
    
    return combined_df

In [12]:
#!pip install rdkit-pypi tqdm

In [13]:
# Calculate molecular descriptors (RDKit required)
def compute_molecular_features(smiles):
    try:
        from rdkit import Chem
        from rdkit.Chem import Descriptors
        
        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            return np.zeros(10)  # Return zeros for invalid SMILES
            
        features = [
            Descriptors.MolWt(mol),                # Molecular weight
            Descriptors.HeavyAtomCount(mol),        # Number of heavy atoms
            Descriptors.NumHAcceptors(mol),         # Hydrogen bond acceptors
            Descriptors.NumHDonors(mol),            # Hydrogen bond donors
            Descriptors.TPSA(mol),                  # Polar surface area
            Descriptors.MolLogP(mol),               # Octanol-water partition coefficient
            Descriptors.RingCount(mol),             # Number of rings
            Descriptors.NumRotatableBonds(mol),     # Rotatable bonds
            Descriptors.FractionCSP3(mol),          # Fraction of sp3 carbons
            Descriptors.NumAromaticRings(mol)       # Aromatic rings
        ]
        return np.array(features)
    
    except ImportError:
        # Fallback to simple features if RDKit not available
        return np.array([
            len(smiles),                           # SMILES length
            smiles.count('C'),                     # Carbon atoms
            smiles.count('O'),                     # Oxygen atoms
            smiles.count('N'),                     # Nitrogen atoms
            smiles.count('('),                     # Branch openings
            smiles.count(')'),                     # Branch closings
            smiles.count('='),                     # Double bonds
            smiles.count('#'),                     # Triple bonds
            smiles.count('1'),                     # Ring closures
            smiles.count('@')                      # Chirality indicators
        ])

In [14]:
# Main data processing pipeline
combined_df = load_and_combine_data()

combined_df.head()

Unnamed: 0,id,SMILES,Tg,FFV,Tc,Density,Rg
0,87817.0,*CC(*)c1ccccc1C(=O)OCCCCCC,,0.374645,0.205667,,
1,106919.0,*Nc1ccc([C@H](CCC)c2ccc(C3(c4ccc([C@@H](CCC)c5...,,0.37041,,,
2,388772.0,*Oc1ccc(S(=O)(=O)c2ccc(Oc3ccc(C4(c5ccc(Oc6ccc(...,,0.37886,,,
3,519416.0,*Nc1ccc(-c2c(-c3ccc(C)cc3)c(-c3ccc(C)cc3)c(N*)...,,0.387324,,,
4,539187.0,*Oc1ccc(OC(=O)c2cc(OCCCCCCCCCOCC3CCCN3c3ccc([N...,,0.35547,,,


In [15]:
# Calculate molecular features
print("Computing molecular features...")
tqdm.pandas()
combined_df['features'] = combined_df['SMILES'].progress_apply(compute_molecular_features)

combined_df.head()

Computing molecular features...


  0%|          | 0/10345 [00:00<?, ?it/s]


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/site-packages/traitlets/config/applicat

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/site-packages/traitlets/config/applicat

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/site-packages/traitlets/config/applicat

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/site-packages/traitlets/config/applicat

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/site-packages/traitlets/config/applicat

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.3 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/azizd/miniconda3/envs/kaggle-gpu/lib/python3.10/site-packages/traitlets/config/applicat

AttributeError: _ARRAY_API not found

Unnamed: 0,id,SMILES,Tg,FFV,Tc,Density,Rg,features
0,87817.0,*CC(*)c1ccccc1C(=O)OCCCCCC,,0.374645,0.205667,,,"[232.3229999999999, 17.0, 2.0, 0.0, 26.3, 3.98..."
1,106919.0,*Nc1ccc([C@H](CCC)c2ccc(C3(c4ccc([C@@H](CCC)c5...,,0.37041,,,,"[598.9190000000003, 45.0, 2.0, 2.0, 24.06, 12...."
2,388772.0,*Oc1ccc(S(=O)(=O)c2ccc(Oc3ccc(C4(c5ccc(Oc6ccc(...,,0.37886,,,,"[1003.2069999999997, 73.0, 9.0, 0.0, 122.27000..."
3,519416.0,*Nc1ccc(-c2c(-c3ccc(C)cc3)c(-c3ccc(C)cc3)c(N*)...,,0.387324,,,,"[542.7260000000002, 42.0, 2.0, 2.0, 24.06, 11...."
4,539187.0,*Oc1ccc(OC(=O)c2cc(OCCCCCCCCCOCC3CCCN3c3ccc([N...,,0.35547,,,,"[965.1539999999994, 70.0, 14.0, 0.0, 182.28, 1..."


In [46]:
# Split data
train_val, test_df = train_test_split(combined_df, test_size=0.15, random_state=42)
train_df, val_df = train_test_split(train_val, test_size=0.15, random_state=42)

# Build vocabulary from all SMILES
all_smiles = "".join(combined_df['SMILES'])
vocab = sorted(set(all_smiles))
char_to_idx = {char: idx+1 for idx, char in enumerate(vocab)}
char_to_idx['<unk>'] = len(char_to_idx) + 1
max_length = max(len(s) for s in combined_df['SMILES'])

# Calculate normalization parameters from TRAINING SET
property_means = train_df[target_columns].mean(skipna=True)
property_stds = train_df[target_columns].std(skipna=True)

# Handle zero std
property_stds = property_stds.replace(0, 1.0)

# MODEL

In [47]:
class PolymerDataset(Dataset):
    def __init__(self, df):
        self.df = df.reset_index(drop=True)
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        smile = row['SMILES']
        
        # Encode SMILES sequence
        encoded = [char_to_idx.get(c, char_to_idx['<unk>']) for c in smile]
        padded = np.zeros(max_length, dtype=np.int64)
        padded[:len(encoded)] = encoded
        
        # Convert target values to float and handle NaNs
        target_values = np.zeros(len(target_columns), dtype=np.float32)
        mask = np.zeros(len(target_columns), dtype=bool)
        
        for i, col in enumerate(target_columns):
            value = row[col]
            # Handle numeric values and convert strings to float
            if pd.isna(value):
                target_values[i] = 0.0
                mask[i] = False
            else:
                try:
                    # Convert to float if it's a string representation
                    target_values[i] = float(value)
                    mask[i] = True
                except (TypeError, ValueError):
                    target_values[i] = 0.0
                    mask[i] = False
        
        # Normalize targets using precomputed stats
        normalized_targets = (target_values - property_means.values) / property_stds.values
        
        # Get molecular features
        features = row['features']
        
        return (
            torch.tensor(padded, dtype=torch.long),
            torch.tensor(features, dtype=torch.float),
            torch.tensor(normalized_targets, dtype=torch.float),
            torch.tensor(mask, dtype=torch.float)
        )

In [48]:
# Create datasets
train_data = PolymerDataset(train_df)
val_data = PolymerDataset(val_df)
test_data = PolymerDataset(test_df)

In [49]:
class PolymerPredictor(nn.Module):
    def __init__(self, vocab_size, feature_dim=10, embed_dim=256, 
                 lstm_dim=512, output_dim=5, dropout=0.3):
        super().__init__()
        
        # SMILES Embedding Layer
        self.embedding = nn.Embedding(len(char_to_idx)+1, embed_dim, padding_idx=0)
        
        # BiLSTM Layers
        self.bilstm = nn.LSTM(
            embed_dim,
            lstm_dim // 2,
            bidirectional=True,
            batch_first=True,
            num_layers=2
        )
        
        # Attention Mechanism
        self.attention = nn.Sequential(
            nn.Linear(lstm_dim, 128),
            nn.Tanh(),
            nn.Linear(128, 1)
        )
        # Feature Processing
        self.feature_fc = nn.Sequential(
            nn.Linear(feature_dim, 128),
            nn.ReLU(),
            nn.Dropout(dropout)
        )
        # Combined Processing
        self.combined_fc = nn.Sequential(
            nn.Linear(lstm_dim + 128, 512),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, output_dim)
        )
            
    def forward(self, seq_input, feature_input):
        # Process SMILES sequence
        embedded = self.embedding(seq_input)
        lstm_out, _ = self.bilstm(embedded)
        
        # Attention mechanism
        attn_weights = torch.softmax(self.attention(lstm_out).squeeze(-1), dim=1)
        attn_weights = attn_weights.unsqueeze(-1)
        context = torch.sum(lstm_out * attn_weights, dim=1)
        
        # Process molecular features
        features = self.feature_fc(feature_input)
        
        # Combine representations
        combined = torch.cat([context, features], dim=1)
        
        return self.combined_fc(combined)


In [50]:
class RobustHuberLoss(nn.Module):
    def __init__(self, weights=None, l2_lambda=1e-5, huber_delta=1.0):
        super().__init__()
        self.weights = weights
        self.l2_lambda = l2_lambda
        self.huber = nn.HuberLoss(reduction='none', delta=huber_delta)
        
    def forward(self, outputs, targets, mask, model):
        # Calculate base Huber loss
        huber_loss = self.huber(outputs, targets)
        
        # Apply property weights if provided
        if self.weights is not None:
            huber_loss = huber_loss * self.weights.unsqueeze(0)
        
        # Apply mask to ignore NaN targets
        masked_loss = mask * huber_loss
        loss = masked_loss.sum() / (mask.sum() + 1e-8)
        
        # Add L2 regularization
        if self.l2_lambda > 0:
            l2_reg = torch.tensor(0., device=outputs.device)
            for param in model.parameters():
                l2_reg += torch.norm(param)
            loss += self.l2_lambda * l2_reg
        
        return loss

In [51]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize model
vocab_size = len(vocab)
model = PolymerPredictor(vocab_size).to(device)


In [52]:
# Calculate property weights based on data availability
train_counts = train_df[target_columns].count()
property_weights = torch.tensor([
    5.0 / train_counts['Tg'], # higher weight due to Tg being weak ring on previous results
    1.0 / train_counts['FFV'],
    1.0 / train_counts['Tc'],
    1.0 / train_counts['Density'],
    1.0 / train_counts['Rg']
], device=device)
property_weights = property_weights / property_weights.sum()

# Initialize loss function and optimizer
criterion = RobustHuberLoss(weights=property_weights, l2_lambda=1e-5, huber_delta=1.0)
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.5, patience=5
)

# Create data loaders
batch_size = 128
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size)
test_loader = DataLoader(test_data, batch_size=batch_size)

In [53]:

# Evaluation function
def evaluate_model(model, data_loader, criterion, denorm=True):
    model.eval()
    all_preds = []
    all_targets = []
    all_masks = []
    total_loss = 0.0
    
    with torch.no_grad():
        for seq, features, targets, masks in data_loader:
            seq, features, targets, masks = (
                seq.to(device), features.to(device), 
                targets.to(device), masks.to(device)
            )
            
            outputs = model(seq, features)
            loss = criterion(outputs, targets, masks, model)
            total_loss += loss.item()
            
            # Store for metrics
            all_preds.append(outputs.cpu().numpy())
            all_targets.append(targets.cpu().numpy())
            all_masks.append(masks.cpu().numpy())
    
    # Concatenate all batches
    all_preds = np.vstack(all_preds)
    all_targets = np.vstack(all_targets)
    all_masks = np.vstack(all_masks)
    
    # Denormalize if needed
    if denorm:
        all_preds = (all_preds * property_stds.values) + property_means.values
        all_targets = (all_targets * property_stds.values) + property_means.values
    
    # Calculate metrics per property
    metrics = {}
    for i, prop in enumerate(target_columns):
        prop_mask = all_masks[:, i].astype(bool)
        preds = all_preds[prop_mask, i]
        targs = all_targets[prop_mask, i]
        
        if len(preds) > 0:
            metrics[prop] = {
                'MAE': mean_absolute_error(targs, preds),
                'RMSE': np.sqrt(mean_squared_error(targs, preds)),
                'R²': r2_score(targs, preds),
                'Samples': len(preds)
            }
        else:
            metrics[prop] = {
                'MAE': np.nan,
                'RMSE': np.nan,
                'R²': np.nan,
                'Samples': 0
            }
    
    # Calculate overall metrics
    valid_preds = []
    valid_targets = []
    for i in range(all_preds.shape[0]):
        for j in range(all_preds.shape[1]):
            if all_masks[i, j]:
                valid_preds.append(all_preds[i, j])
                valid_targets.append(all_targets[i, j])
    
    valid_preds = np.array(valid_preds)
    valid_targets = np.array(valid_targets)
    
    if len(valid_preds) > 0:
        overall_metrics = {
            'MAE': mean_absolute_error(valid_targets, valid_preds),
            'RMSE': np.sqrt(mean_squared_error(valid_targets, valid_preds)),
            'R²': r2_score(valid_targets, valid_preds),
            'Samples': len(valid_preds)
        }
    else:
        overall_metrics = {
            'MAE': np.nan,
            'RMSE': np.nan,
            'R²': np.nan,
            'Samples': 0
        }
    
    return {
        'loss': total_loss / len(data_loader),
        'per_property': metrics,
        'overall': overall_metrics
    }


In [54]:
# Training loop
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=300):
    best_val_loss = float('inf')
    patience = 15
    no_improve = 0
    
    for epoch in range(epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        for seq, features, targets, masks in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            seq, features, targets, masks = (
                seq.to(device), features.to(device), 
                targets.to(device), masks.to(device)
            )
            
            optimizer.zero_grad()
            outputs = model(seq, features)
            loss = criterion(outputs, targets, masks, model)
            loss.backward()
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            train_loss += loss.item()
        
        avg_train_loss = train_loss / len(train_loader)
        
        # Validation phase
        val_metrics = evaluate_model(model, val_loader, criterion, denorm=False)
        val_loss = val_metrics['loss']
        
        # Update learning rate
        scheduler.step(val_loss)
        
        # Print epoch summary
        print(f"\nEpoch {epoch+1:03d}")
        print(f"  Train Loss: {avg_train_loss:.4f} | Val Loss: {val_loss:.4f}")
        print(f"  Overall Val MAE: {val_metrics['overall']['MAE']:.4f}, RMSE: {val_metrics['overall']['RMSE']:.4f}, R²: {val_metrics['overall']['R²']:.4f}")
        
        # Early stopping and model checkpoint
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')
            no_improve = 0
            print("  ↳ Model improved! Saving checkpoint...")
        else:
            no_improve += 1
            print(f"  ↳ No improvement for {no_improve}/{patience} epochs")
            if no_improve >= patience:
                print("Early stopping triggered!")
                break
    
    # Load best model
    model.load_state_dict(torch.load('best_model.pth'))
    return model

# Train the model
print("\nStarting model training...")
model = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, epochs=300)


Starting model training...


Epoch 1:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 001
  Train Loss: 0.0657 | Val Loss: 0.0270
  Overall Val MAE: 0.6940, RMSE: 0.9048, R²: 0.0728
  ↳ Model improved! Saving checkpoint...


Epoch 2:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 002
  Train Loss: 0.0299 | Val Loss: 0.0248
  Overall Val MAE: 0.6489, RMSE: 0.8676, R²: 0.1476
  ↳ Model improved! Saving checkpoint...


Epoch 3:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 003
  Train Loss: 0.0265 | Val Loss: 0.0226
  Overall Val MAE: 0.6005, RMSE: 0.8118, R²: 0.2536
  ↳ Model improved! Saving checkpoint...


Epoch 4:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 004
  Train Loss: 0.0248 | Val Loss: 0.0210
  Overall Val MAE: 0.5796, RMSE: 0.7923, R²: 0.2890
  ↳ Model improved! Saving checkpoint...


Epoch 5:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 005
  Train Loss: 0.0228 | Val Loss: 0.0205
  Overall Val MAE: 0.5785, RMSE: 0.8037, R²: 0.2684
  ↳ Model improved! Saving checkpoint...


Epoch 6:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 006
  Train Loss: 0.0222 | Val Loss: 0.0213
  Overall Val MAE: 0.5703, RMSE: 0.7856, R²: 0.3011
  ↳ No improvement for 1/15 epochs


Epoch 7:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 007
  Train Loss: 0.0210 | Val Loss: 0.0182
  Overall Val MAE: 0.5247, RMSE: 0.7411, R²: 0.3779
  ↳ Model improved! Saving checkpoint...


Epoch 8:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 008
  Train Loss: 0.0207 | Val Loss: 0.0199
  Overall Val MAE: 0.5431, RMSE: 0.7631, R²: 0.3405
  ↳ No improvement for 1/15 epochs


Epoch 9:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 009
  Train Loss: 0.0197 | Val Loss: 0.0170
  Overall Val MAE: 0.5240, RMSE: 0.7472, R²: 0.3677
  ↳ Model improved! Saving checkpoint...


Epoch 10:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 010
  Train Loss: 0.0188 | Val Loss: 0.0173
  Overall Val MAE: 0.5120, RMSE: 0.7298, R²: 0.3968
  ↳ No improvement for 1/15 epochs


Epoch 11:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 011
  Train Loss: 0.0181 | Val Loss: 0.0172
  Overall Val MAE: 0.5155, RMSE: 0.7279, R²: 0.3999
  ↳ No improvement for 2/15 epochs


Epoch 12:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 012
  Train Loss: 0.0173 | Val Loss: 0.0171
  Overall Val MAE: 0.5054, RMSE: 0.7022, R²: 0.4416
  ↳ No improvement for 3/15 epochs


Epoch 13:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 013
  Train Loss: 0.0170 | Val Loss: 0.0154
  Overall Val MAE: 0.4779, RMSE: 0.6819, R²: 0.4734
  ↳ Model improved! Saving checkpoint...


Epoch 14:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 014
  Train Loss: 0.0160 | Val Loss: 0.0159
  Overall Val MAE: 0.4764, RMSE: 0.6853, R²: 0.4681
  ↳ No improvement for 1/15 epochs


Epoch 15:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 015
  Train Loss: 0.0152 | Val Loss: 0.0152
  Overall Val MAE: 0.4565, RMSE: 0.6506, R²: 0.5206
  ↳ Model improved! Saving checkpoint...


Epoch 16:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 016
  Train Loss: 0.0149 | Val Loss: 0.0148
  Overall Val MAE: 0.4490, RMSE: 0.6351, R²: 0.5432
  ↳ Model improved! Saving checkpoint...


Epoch 17:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 017
  Train Loss: 0.0146 | Val Loss: 0.0145
  Overall Val MAE: 0.4225, RMSE: 0.6013, R²: 0.5905
  ↳ Model improved! Saving checkpoint...


Epoch 18:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 018
  Train Loss: 0.0142 | Val Loss: 0.0145
  Overall Val MAE: 0.4351, RMSE: 0.6207, R²: 0.5636
  ↳ Model improved! Saving checkpoint...


Epoch 19:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 019
  Train Loss: 0.0134 | Val Loss: 0.0141
  Overall Val MAE: 0.4082, RMSE: 0.5962, R²: 0.5974
  ↳ Model improved! Saving checkpoint...


Epoch 20:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 020
  Train Loss: 0.0128 | Val Loss: 0.0145
  Overall Val MAE: 0.4176, RMSE: 0.5946, R²: 0.5996
  ↳ No improvement for 1/15 epochs


Epoch 21:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 021
  Train Loss: 0.0123 | Val Loss: 0.0132
  Overall Val MAE: 0.3891, RMSE: 0.5674, R²: 0.6354
  ↳ Model improved! Saving checkpoint...


Epoch 22:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 022
  Train Loss: 0.0124 | Val Loss: 0.0145
  Overall Val MAE: 0.4048, RMSE: 0.5859, R²: 0.6112
  ↳ No improvement for 1/15 epochs


Epoch 23:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 023
  Train Loss: 0.0120 | Val Loss: 0.0136
  Overall Val MAE: 0.3873, RMSE: 0.5596, R²: 0.6454
  ↳ No improvement for 2/15 epochs


Epoch 24:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 024
  Train Loss: 0.0116 | Val Loss: 0.0140
  Overall Val MAE: 0.3937, RMSE: 0.5691, R²: 0.6332
  ↳ No improvement for 3/15 epochs


Epoch 25:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 025
  Train Loss: 0.0108 | Val Loss: 0.0138
  Overall Val MAE: 0.3851, RMSE: 0.5694, R²: 0.6328
  ↳ No improvement for 4/15 epochs


Epoch 26:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 026
  Train Loss: 0.0107 | Val Loss: 0.0131
  Overall Val MAE: 0.3653, RMSE: 0.5427, R²: 0.6665
  ↳ Model improved! Saving checkpoint...


Epoch 27:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 027
  Train Loss: 0.0104 | Val Loss: 0.0137
  Overall Val MAE: 0.3617, RMSE: 0.5508, R²: 0.6564
  ↳ No improvement for 1/15 epochs


Epoch 28:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 028
  Train Loss: 0.0109 | Val Loss: 0.0142
  Overall Val MAE: 0.3723, RMSE: 0.5461, R²: 0.6622
  ↳ No improvement for 2/15 epochs


Epoch 29:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 029
  Train Loss: 0.0101 | Val Loss: 0.0128
  Overall Val MAE: 0.3595, RMSE: 0.5276, R²: 0.6847
  ↳ Model improved! Saving checkpoint...


Epoch 30:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 030
  Train Loss: 0.0092 | Val Loss: 0.0130
  Overall Val MAE: 0.3432, RMSE: 0.5187, R²: 0.6953
  ↳ No improvement for 1/15 epochs


Epoch 31:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 031
  Train Loss: 0.0090 | Val Loss: 0.0125
  Overall Val MAE: 0.3399, RMSE: 0.5112, R²: 0.7040
  ↳ Model improved! Saving checkpoint...


Epoch 32:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 032
  Train Loss: 0.0088 | Val Loss: 0.0124
  Overall Val MAE: 0.3386, RMSE: 0.5105, R²: 0.7049
  ↳ Model improved! Saving checkpoint...


Epoch 33:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 033
  Train Loss: 0.0087 | Val Loss: 0.0132
  Overall Val MAE: 0.3246, RMSE: 0.5034, R²: 0.7131
  ↳ No improvement for 1/15 epochs


Epoch 34:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 034
  Train Loss: 0.0088 | Val Loss: 0.0128
  Overall Val MAE: 0.3401, RMSE: 0.5086, R²: 0.7070
  ↳ No improvement for 2/15 epochs


Epoch 35:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 035
  Train Loss: 0.0083 | Val Loss: 0.0128
  Overall Val MAE: 0.3372, RMSE: 0.5084, R²: 0.7073
  ↳ No improvement for 3/15 epochs


Epoch 36:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 036
  Train Loss: 0.0082 | Val Loss: 0.0122
  Overall Val MAE: 0.3288, RMSE: 0.5024, R²: 0.7141
  ↳ Model improved! Saving checkpoint...


Epoch 37:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 037
  Train Loss: 0.0082 | Val Loss: 0.0123
  Overall Val MAE: 0.3190, RMSE: 0.4836, R²: 0.7351
  ↳ No improvement for 1/15 epochs


Epoch 38:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 038
  Train Loss: 0.0078 | Val Loss: 0.0125
  Overall Val MAE: 0.3185, RMSE: 0.4801, R²: 0.7389
  ↳ No improvement for 2/15 epochs


Epoch 39:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 039
  Train Loss: 0.0076 | Val Loss: 0.0124
  Overall Val MAE: 0.3133, RMSE: 0.4811, R²: 0.7379
  ↳ No improvement for 3/15 epochs


Epoch 40:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 040
  Train Loss: 0.0073 | Val Loss: 0.0122
  Overall Val MAE: 0.3109, RMSE: 0.4766, R²: 0.7427
  ↳ No improvement for 4/15 epochs


Epoch 41:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 041
  Train Loss: 0.0075 | Val Loss: 0.0122
  Overall Val MAE: 0.3034, RMSE: 0.4700, R²: 0.7498
  ↳ No improvement for 5/15 epochs


Epoch 42:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 042
  Train Loss: 0.0072 | Val Loss: 0.0121
  Overall Val MAE: 0.3032, RMSE: 0.4728, R²: 0.7468
  ↳ Model improved! Saving checkpoint...


Epoch 43:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 043
  Train Loss: 0.0072 | Val Loss: 0.0120
  Overall Val MAE: 0.3030, RMSE: 0.4633, R²: 0.7569
  ↳ Model improved! Saving checkpoint...


Epoch 44:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 044
  Train Loss: 0.0071 | Val Loss: 0.0127
  Overall Val MAE: 0.2995, RMSE: 0.4681, R²: 0.7519
  ↳ No improvement for 1/15 epochs


Epoch 45:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 045
  Train Loss: 0.0069 | Val Loss: 0.0127
  Overall Val MAE: 0.3083, RMSE: 0.4759, R²: 0.7435
  ↳ No improvement for 2/15 epochs


Epoch 46:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 046
  Train Loss: 0.0071 | Val Loss: 0.0124
  Overall Val MAE: 0.2951, RMSE: 0.4664, R²: 0.7536
  ↳ No improvement for 3/15 epochs


Epoch 47:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 047
  Train Loss: 0.0069 | Val Loss: 0.0122
  Overall Val MAE: 0.2971, RMSE: 0.4639, R²: 0.7563
  ↳ No improvement for 4/15 epochs


Epoch 48:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 048
  Train Loss: 0.0066 | Val Loss: 0.0123
  Overall Val MAE: 0.2964, RMSE: 0.4567, R²: 0.7638
  ↳ No improvement for 5/15 epochs


Epoch 49:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 049
  Train Loss: 0.0064 | Val Loss: 0.0124
  Overall Val MAE: 0.2916, RMSE: 0.4607, R²: 0.7596
  ↳ No improvement for 6/15 epochs


Epoch 50:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 050
  Train Loss: 0.0061 | Val Loss: 0.0118
  Overall Val MAE: 0.2791, RMSE: 0.4491, R²: 0.7716
  ↳ Model improved! Saving checkpoint...


Epoch 51:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 051
  Train Loss: 0.0057 | Val Loss: 0.0120
  Overall Val MAE: 0.2778, RMSE: 0.4458, R²: 0.7749
  ↳ No improvement for 1/15 epochs


Epoch 52:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 052
  Train Loss: 0.0056 | Val Loss: 0.0120
  Overall Val MAE: 0.2732, RMSE: 0.4449, R²: 0.7759
  ↳ No improvement for 2/15 epochs


Epoch 53:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 053
  Train Loss: 0.0055 | Val Loss: 0.0117
  Overall Val MAE: 0.2717, RMSE: 0.4406, R²: 0.7801
  ↳ Model improved! Saving checkpoint...


Epoch 54:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 054
  Train Loss: 0.0054 | Val Loss: 0.0120
  Overall Val MAE: 0.2742, RMSE: 0.4431, R²: 0.7776
  ↳ No improvement for 1/15 epochs


Epoch 55:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 055
  Train Loss: 0.0053 | Val Loss: 0.0118
  Overall Val MAE: 0.2716, RMSE: 0.4391, R²: 0.7817
  ↳ No improvement for 2/15 epochs


Epoch 56:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 056
  Train Loss: 0.0052 | Val Loss: 0.0117
  Overall Val MAE: 0.2691, RMSE: 0.4347, R²: 0.7860
  ↳ Model improved! Saving checkpoint...


Epoch 57:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 057
  Train Loss: 0.0053 | Val Loss: 0.0119
  Overall Val MAE: 0.2782, RMSE: 0.4458, R²: 0.7749
  ↳ No improvement for 1/15 epochs


Epoch 58:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 058
  Train Loss: 0.0053 | Val Loss: 0.0119
  Overall Val MAE: 0.2734, RMSE: 0.4441, R²: 0.7767
  ↳ No improvement for 2/15 epochs


Epoch 59:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 059
  Train Loss: 0.0053 | Val Loss: 0.0119
  Overall Val MAE: 0.2693, RMSE: 0.4406, R²: 0.7801
  ↳ No improvement for 3/15 epochs


Epoch 60:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 060
  Train Loss: 0.0050 | Val Loss: 0.0118
  Overall Val MAE: 0.2652, RMSE: 0.4363, R²: 0.7844
  ↳ No improvement for 4/15 epochs


Epoch 61:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 061
  Train Loss: 0.0051 | Val Loss: 0.0120
  Overall Val MAE: 0.2717, RMSE: 0.4407, R²: 0.7800
  ↳ No improvement for 5/15 epochs


Epoch 62:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 062
  Train Loss: 0.0052 | Val Loss: 0.0120
  Overall Val MAE: 0.2722, RMSE: 0.4372, R²: 0.7835
  ↳ No improvement for 6/15 epochs


Epoch 63:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 063
  Train Loss: 0.0050 | Val Loss: 0.0118
  Overall Val MAE: 0.2590, RMSE: 0.4331, R²: 0.7876
  ↳ No improvement for 7/15 epochs


Epoch 64:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 064
  Train Loss: 0.0048 | Val Loss: 0.0118
  Overall Val MAE: 0.2629, RMSE: 0.4315, R²: 0.7892
  ↳ No improvement for 8/15 epochs


Epoch 65:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 065
  Train Loss: 0.0047 | Val Loss: 0.0116
  Overall Val MAE: 0.2586, RMSE: 0.4265, R²: 0.7940
  ↳ Model improved! Saving checkpoint...


Epoch 66:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 066
  Train Loss: 0.0047 | Val Loss: 0.0117
  Overall Val MAE: 0.2609, RMSE: 0.4304, R²: 0.7902
  ↳ No improvement for 1/15 epochs


Epoch 67:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 067
  Train Loss: 0.0047 | Val Loss: 0.0117
  Overall Val MAE: 0.2596, RMSE: 0.4298, R²: 0.7908
  ↳ No improvement for 2/15 epochs


Epoch 68:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 068
  Train Loss: 0.0046 | Val Loss: 0.0117
  Overall Val MAE: 0.2606, RMSE: 0.4262, R²: 0.7943
  ↳ No improvement for 3/15 epochs


Epoch 69:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 069
  Train Loss: 0.0047 | Val Loss: 0.0117
  Overall Val MAE: 0.2591, RMSE: 0.4257, R²: 0.7948
  ↳ No improvement for 4/15 epochs


Epoch 70:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 070
  Train Loss: 0.0046 | Val Loss: 0.0116
  Overall Val MAE: 0.2583, RMSE: 0.4270, R²: 0.7935
  ↳ Model improved! Saving checkpoint...


Epoch 71:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 071
  Train Loss: 0.0045 | Val Loss: 0.0115
  Overall Val MAE: 0.2567, RMSE: 0.4248, R²: 0.7956
  ↳ Model improved! Saving checkpoint...


Epoch 72:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 072
  Train Loss: 0.0045 | Val Loss: 0.0116
  Overall Val MAE: 0.2587, RMSE: 0.4275, R²: 0.7930
  ↳ No improvement for 1/15 epochs


Epoch 73:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 073
  Train Loss: 0.0045 | Val Loss: 0.0118
  Overall Val MAE: 0.2640, RMSE: 0.4342, R²: 0.7865
  ↳ No improvement for 2/15 epochs


Epoch 74:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 074
  Train Loss: 0.0045 | Val Loss: 0.0117
  Overall Val MAE: 0.2586, RMSE: 0.4272, R²: 0.7933
  ↳ No improvement for 3/15 epochs


Epoch 75:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 075
  Train Loss: 0.0044 | Val Loss: 0.0117
  Overall Val MAE: 0.2582, RMSE: 0.4294, R²: 0.7912
  ↳ No improvement for 4/15 epochs


Epoch 76:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 076
  Train Loss: 0.0046 | Val Loss: 0.0116
  Overall Val MAE: 0.2556, RMSE: 0.4242, R²: 0.7963
  ↳ No improvement for 5/15 epochs


Epoch 77:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 077
  Train Loss: 0.0045 | Val Loss: 0.0118
  Overall Val MAE: 0.2660, RMSE: 0.4328, R²: 0.7879
  ↳ No improvement for 6/15 epochs


Epoch 78:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 078
  Train Loss: 0.0044 | Val Loss: 0.0115
  Overall Val MAE: 0.2530, RMSE: 0.4220, R²: 0.7984
  ↳ No improvement for 7/15 epochs


Epoch 79:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 079
  Train Loss: 0.0043 | Val Loss: 0.0116
  Overall Val MAE: 0.2549, RMSE: 0.4244, R²: 0.7960
  ↳ No improvement for 8/15 epochs


Epoch 80:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 080
  Train Loss: 0.0042 | Val Loss: 0.0115
  Overall Val MAE: 0.2537, RMSE: 0.4233, R²: 0.7971
  ↳ No improvement for 9/15 epochs


Epoch 81:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 081
  Train Loss: 0.0042 | Val Loss: 0.0115
  Overall Val MAE: 0.2565, RMSE: 0.4233, R²: 0.7971
  ↳ No improvement for 10/15 epochs


Epoch 82:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 082
  Train Loss: 0.0041 | Val Loss: 0.0114
  Overall Val MAE: 0.2537, RMSE: 0.4193, R²: 0.8009
  ↳ Model improved! Saving checkpoint...


Epoch 83:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 083
  Train Loss: 0.0042 | Val Loss: 0.0116
  Overall Val MAE: 0.2538, RMSE: 0.4229, R²: 0.7975
  ↳ No improvement for 1/15 epochs


Epoch 84:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 084
  Train Loss: 0.0042 | Val Loss: 0.0114
  Overall Val MAE: 0.2535, RMSE: 0.4204, R²: 0.7998
  ↳ No improvement for 2/15 epochs


Epoch 85:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 085
  Train Loss: 0.0042 | Val Loss: 0.0115
  Overall Val MAE: 0.2564, RMSE: 0.4233, R²: 0.7971
  ↳ No improvement for 3/15 epochs


Epoch 86:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 086
  Train Loss: 0.0042 | Val Loss: 0.0116
  Overall Val MAE: 0.2537, RMSE: 0.4205, R²: 0.7998
  ↳ No improvement for 4/15 epochs


Epoch 87:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 087
  Train Loss: 0.0042 | Val Loss: 0.0115
  Overall Val MAE: 0.2552, RMSE: 0.4221, R²: 0.7982
  ↳ No improvement for 5/15 epochs


Epoch 88:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 088
  Train Loss: 0.0041 | Val Loss: 0.0115
  Overall Val MAE: 0.2536, RMSE: 0.4208, R²: 0.7995
  ↳ No improvement for 6/15 epochs


Epoch 89:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 089
  Train Loss: 0.0041 | Val Loss: 0.0114
  Overall Val MAE: 0.2525, RMSE: 0.4196, R²: 0.8006
  ↳ No improvement for 7/15 epochs


Epoch 90:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 090
  Train Loss: 0.0040 | Val Loss: 0.0114
  Overall Val MAE: 0.2522, RMSE: 0.4182, R²: 0.8019
  ↳ No improvement for 8/15 epochs


Epoch 91:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 091
  Train Loss: 0.0041 | Val Loss: 0.0115
  Overall Val MAE: 0.2542, RMSE: 0.4189, R²: 0.8012
  ↳ No improvement for 9/15 epochs


Epoch 92:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 092
  Train Loss: 0.0040 | Val Loss: 0.0115
  Overall Val MAE: 0.2556, RMSE: 0.4204, R²: 0.7999
  ↳ No improvement for 10/15 epochs


Epoch 93:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 093
  Train Loss: 0.0041 | Val Loss: 0.0115
  Overall Val MAE: 0.2540, RMSE: 0.4180, R²: 0.8021
  ↳ No improvement for 11/15 epochs


Epoch 94:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 094
  Train Loss: 0.0040 | Val Loss: 0.0115
  Overall Val MAE: 0.2526, RMSE: 0.4168, R²: 0.8032
  ↳ No improvement for 12/15 epochs


Epoch 95:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 095
  Train Loss: 0.0040 | Val Loss: 0.0115
  Overall Val MAE: 0.2538, RMSE: 0.4185, R²: 0.8017
  ↳ No improvement for 13/15 epochs


Epoch 96:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 096
  Train Loss: 0.0040 | Val Loss: 0.0114
  Overall Val MAE: 0.2534, RMSE: 0.4168, R²: 0.8032
  ↳ No improvement for 14/15 epochs


Epoch 97:   0%|          | 0/59 [00:00<?, ?it/s]


Epoch 097
  Train Loss: 0.0040 | Val Loss: 0.0115
  Overall Val MAE: 0.2537, RMSE: 0.4183, R²: 0.8019
  ↳ No improvement for 15/15 epochs
Early stopping triggered!


In [55]:
# Evaluate on validation set
val_metrics = evaluate_model(model, val_loader, criterion, denorm=True)
print("\nValidation Metrics:")
for prop, metrics in val_metrics['per_property'].items():
    print(f"  {prop} ({metrics['Samples']} samples):")
    print(f"    MAE: {metrics['MAE']:.4f}  RMSE: {metrics['RMSE']:.4f}  R²: {metrics['R²']:.4f}")

print("\nOverall Validation:")
print(f"  MAE: {val_metrics['overall']['MAE']:.4f}")
print(f"  RMSE: {val_metrics['overall']['RMSE']:.4f}")
print(f"  R²: {val_metrics['overall']['R²']:.4f}")

# Evaluate on test set
test_metrics = evaluate_model(model, test_loader, criterion, denorm=True)
print("\nTest Set Metrics:")
for prop, metrics in test_metrics['per_property'].items():
    print(f"  {prop} ({metrics['Samples']} samples):")
    print(f"    MAE: {metrics['MAE']:.4f}  RMSE: {metrics['RMSE']:.4f}  R²: {metrics['R²']:.4f}")

print("\nOverall Test:")
print(f"  MAE: {test_metrics['overall']['MAE']:.4f}")
print(f"  RMSE: {test_metrics['overall']['RMSE']:.4f}")
print(f"  R²: {test_metrics['overall']['R²']:.4f}")


Validation Metrics:
  Tg (66 samples):
    MAE: 61.7070  RMSE: 80.7569  R²: 0.3058
  FFV (961 samples):
    MAE: 0.0066  RMSE: 0.0112  R²: 0.8356
  Tc (94 samples):
    MAE: 0.0278  RMSE: 0.0398  R²: 0.8031
  Density (84 samples):
    MAE: 0.0313  RMSE: 0.0504  R²: 0.8545
  Rg (84 samples):
    MAE: 1.7347  RMSE: 2.6822  R²: 0.6912

Overall Validation:
  MAE: 3.2816
  RMSE: 18.2865
  R²: 0.5427

Test Set Metrics:
  Tg (82 samples):
    MAE: 58.7250  RMSE: 72.6959  R²: 0.4536
  FFV (1098 samples):
    MAE: 0.0062  RMSE: 0.0106  R²: 0.8634
  Tc (116 samples):
    MAE: 0.0262  RMSE: 0.0393  R²: 0.8038
  Density (93 samples):
    MAE: 0.0377  RMSE: 0.0546  R²: 0.7301
  Rg (94 samples):
    MAE: 1.7128  RMSE: 2.4187  R²: 0.7317

Overall Test:
  MAE: 3.3647
  RMSE: 17.1049
  R²: 0.6808
