In [1]:
# =============================================================================
# SEGMENT 1: Environment Setup & Chemical Sanitization
# =============================================================================
import pandas as pd
import numpy as np
import warnings
import os

# RDKit libraries
from rdkit import Chem
from rdkit.Chem import AllChem, Descriptors
from rdkit import RDLogger

# Suppress warnings
RDLogger.DisableLog('rdApp.*')
warnings.filterwarnings("ignore")

# Define file paths (Update these paths if running on a different machine)
TRAIN_PATH = "C:/Users/Benjamin Gu/Desktop/MML_project/train.csv"
TEST_PATH = "C:/Users/Benjamin Gu/Desktop/MML_project/test.csv"

def load_data(train_path, test_path):
   if not os.path.exists(train_path) or not os.path.exists(test_path):
       raise FileNotFoundError(f"Files not found: {train_path} or {test_path}")
   
   train_df = pd.read_csv(train_path)
   test_df = pd.read_csv(test_path)
   print(f"‚úÖ Data Loaded. Train: {train_df.shape}, Test: {test_df.shape}")
   return train_df, test_df

def sanitize_polymer_smiles(smiles, capping_group='C'):
   # Type check to prevent errors with NaN or non-string inputs
   if pd.isna(smiles) or not isinstance(smiles, str):
       return None
   
   # Methyl Capping: Replace wildcard '*' with 'C'
   sanitized_smiles = smiles.replace('*', capping_group)
   try:
       mol = Chem.MolFromSmiles(sanitized_smiles)
       if mol:
           # Return standardized Canonical SMILES
           return Chem.MolToSmiles(mol, canonical=True)
       return None
   except:
       return None

def preprocess_pipeline(df, name="Dataset"):
   print(f"\nüîÑ Processing {name}...")
   df_clean = df.copy()
   
   # Check if SMILES column exists
   target_col = 'SMILES'
   if target_col not in df_clean.columns:
       raise KeyError(f"Column '{target_col}' not found in {name}. Please check CSV headers.")

   # 1. Apply sanitization ONLY to the SMILES column
   print("   Running Methyl Capping...")
   df_clean['Sanitized_SMILES'] = df_clean[target_col].apply(
       lambda x: sanitize_polymer_smiles(x, capping_group='C')
   )
   
   # 2. Check for failures
   n_failures = df_clean['Sanitized_SMILES'].isna().sum()
   
   if n_failures > 0:
       print(f"   ‚ö†Ô∏è Warning: {n_failures} molecules failed sanitization and will be dropped.")
       df_clean = df_clean.dropna(subset=['Sanitized_SMILES']).reset_index(drop=True)
   else:
       print(f"   ‚úÖ All molecules sanitized successfully.")
       
   print(f"   Final shape for {name}: {df_clean.shape}")
   return df_clean

# --- Execution ---
if __name__ == "__main__":
   # Load Data
   raw_train, raw_test = load_data(TRAIN_PATH, TEST_PATH)
   
   # Run Preprocessing Pipeline
   clean_train = preprocess_pipeline(raw_train, name="Train Set")
   clean_test = preprocess_pipeline(raw_test, name="Test Set")

   print("\nüîç Verification:")
   # Display first two rows for verification
   cols_to_show = ['id', 'SMILES', 'Sanitized_SMILES']
   if 'id' in clean_train.columns:
       print(clean_train[cols_to_show].head(2))
   else:
       print(clean_train[['SMILES', 'Sanitized_SMILES']].head(2))

‚úÖ Data Loaded. Train: (7973, 7), Test: (3, 2)

üîÑ Processing Train Set...
   Running Methyl Capping...
   ‚úÖ All molecules sanitized successfully.
   Final shape for Train Set: (7973, 8)

üîÑ Processing Test Set...
   Running Methyl Capping...
   ‚úÖ All molecules sanitized successfully.
   Final shape for Test Set: (3, 3)

üîç Verification:
       id                                             SMILES  \
0   87817                         *CC(*)c1ccccc1C(=O)OCCCCCC   
1  106919  *Nc1ccc([C@H](CCC)c2ccc(C3(c4ccc([C@@H](CCC)c5...   

                                    Sanitized_SMILES  
0                         CCCCCCOC(=O)c1ccccc1C(C)CC  
1  CCCCCC1CCC(c2ccc([C@@H](CCC)c3ccc(NC)cc3)cc2)(...  


In [2]:
# =============================================================================
# SEGMENT 2: Hybrid Feature Engineering
# =============================================================================
from transformers import AutoTokenizer, AutoModel
import torch
from tqdm import tqdm

def calculate_rdkit_features(smiles):
   mol = Chem.MolFromSmiles(smiles)
   if not mol: return {}
   return {
       'MolLogP': Descriptors.MolLogP(mol),
       'TPSA': Descriptors.TPSA(mol),
       'MolWt': Descriptors.MolWt(mol),
       'BertzCT': Descriptors.BertzCT(mol),
       'NumRotatableBonds': Descriptors.NumRotatableBonds(mol),
       'RingCount': Descriptors.RingCount(mol)
   }

def process_rdkit_features(df):
   print("   ‚öóÔ∏è Calculating RDKit descriptors...")
   
   # Calculate features for every SMILES in the list
   features_list = [calculate_rdkit_features(x) for x in df['Sanitized_SMILES']]
   
   # Convert list of dictionaries to DataFrame
   feat_df = pd.DataFrame(features_list)
   
   # Concatenate original data with new features
   return pd.concat([df.reset_index(drop=True), feat_df.reset_index(drop=True)], axis=1)

def get_chemberta_embeddings(smiles_list):
   print("   ü§ñ Extracting Transformer Embeddings...")
   model_name = 'DeepChem/ChemBERTa-77M-MLM'
   
   # Load Model
   tokenizer = AutoTokenizer.from_pretrained(model_name)
   model = AutoModel.from_pretrained(model_name)
   
   # Check for GPU
   device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
   model.to(device)
   print(f"      Running on: {device}")
   
   batch_size = 32
   all_embeddings = []
   
   for i in tqdm(range(0, len(smiles_list), batch_size)):
       batch = smiles_list[i:i+batch_size]
       
       # Tokenize
       inputs = tokenizer(batch, padding=True, truncation=True, return_tensors="pt", max_length=512).to(device)
       
       with torch.no_grad():
           outputs = model(**inputs)
       
       # Mean pooling (average over tokens to get sentence embedding)
       embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
       all_embeddings.append(embeddings)
       
   return np.vstack(all_embeddings)

def get_morgan_fingerprints(df, n_bits=2048):
   print("  üñê Generating Morgan Fingerprints...")
   
   # Helper function: Convert fingerprint to numpy array
   def _get_fp(smiles):
       mol = Chem.MolFromSmiles(smiles)
       if not mol: return np.zeros((n_bits,))\
       fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=n_bits)
       arr = np.zeros((0,), dtype=np.int8)
       from rdkit.DataStructs import ConvertToNumpyArray
       ConvertToNumpyArray(fp, arr)
       return arr
       
   # Calculate fingerprints
   fps = np.stack(df['Sanitized_SMILES'].apply(_get_fp).values)
   
   # Convert to DataFrame
   col_names = [f'FP_{i}' for i in range(n_bits)]
   return pd.DataFrame(fps, columns=col_names)

# --- Execution ---

# 1. Generate Morgan Fingerprints
train_fp = get_morgan_fingerprints(clean_train)
test_fp = get_morgan_fingerprints(clean_test)

# 2. RDKit Features
clean_train = process_rdkit_features(clean_train)
clean_test = process_rdkit_features(clean_test)

# 3. ChemBERTa Embeddings
print("   (Note: Running on full dataset may take time on CPU)")

# Convert column to list for tokenizer
train_smiles = clean_train['Sanitized_SMILES'].tolist()
test_smiles = clean_test['Sanitized_SMILES'].tolist()

train_emb = get_chemberta_embeddings(train_smiles)
test_emb = get_chemberta_embeddings(test_smiles)

# Convert embeddings to DataFrame with auto-generated column names
emb_cols = [f'ChemBERTa_{i}' for i in range(train_emb.shape[1])]
train_emb_df = pd.DataFrame(train_emb, columns=emb_cols)
test_emb_df = pd.DataFrame(test_emb, columns=emb_cols)

# 4. Concatenate all features
full_train = pd.concat([clean_train.reset_index(drop=True), train_emb_df, train_fp], axis=1)
full_test = pd.concat([clean_test.reset_index(drop=True), test_emb_df, test_fp], axis=1)

print(f"‚úÖ Feature Engineering Done. Full Train Shape: {full_train.shape}")

  üñê Generating Morgan Fingerprints...
  üñê Generating Morgan Fingerprints...
   ‚öóÔ∏è Calculating RDKit descriptors...
   ‚öóÔ∏è Calculating RDKit descriptors...
   (Note: Running on full dataset may take time on CPU)
   ü§ñ Extracting Transformer Embeddings...


Some weights of RobertaModel were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


      Running on: cuda


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 250/250 [00:01<00:00, 200.55it/s]


   ü§ñ Extracting Transformer Embeddings...


Some weights of RobertaModel were not initialized from the model checkpoint at DeepChem/ChemBERTa-77M-MLM and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


      Running on: cuda


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 285.72it/s]

‚úÖ Feature Engineering Done. Full Train Shape: (7973, 2446)





In [3]:
# =============================================================================
# SEGMENT 3: Scaffold Splitting
# =============================================================================
from rdkit.Chem.Scaffolds import MurckoScaffold
from rdkit import Chem

def get_scaffold(smiles):
   # Type check to prevent errors
   if not isinstance(smiles, str):
       return 'Generic'
   
   try:
       mol = Chem.MolFromSmiles(smiles)
       if mol:
           scaffold = MurckoScaffold.GetScaffoldForMol(mol)
           return Chem.MolToSmiles(scaffold)
       return 'Generic'
   except:
       return 'Generic'

print("\nüß¨ Generating Scaffolds...")

# Apply to 'Sanitized_SMILES' column and save to new 'Scaffold' column
full_train['Scaffold'] = full_train['Sanitized_SMILES'].apply(get_scaffold)

# Apply to test set as well (useful for post-analysis)
if 'Sanitized_SMILES' in full_test.columns:
   full_test['Scaffold'] = full_test['Sanitized_SMILES'].apply(get_scaffold)

print(f"   Unique Scaffolds in Train: {full_train['Scaffold'].nunique()}")

# Verify the Scaffold column generation
print(full_train[['Sanitized_SMILES', 'Scaffold']].head())


üß¨ Generating Scaffolds...
   Unique Scaffolds in Train: 3072
                                       Sanitized_SMILES  \
0                            CCCCCCOC(=O)c1ccccc1C(C)CC   
1     CCCCCC1CCC(c2ccc([C@@H](CCC)c3ccc(NC)cc3)cc2)(...   
2     COc1ccc(S(=O)(=O)c2ccc(Oc3ccc(C4(c5ccc(Oc6ccc(...   
3     CNc1ccc(-c2c(-c3ccc(C)cc3)c(-c3ccc(C)cc3)c(NC)...   
4     COc1ccc(OC(=O)c2cc(OCCCCCCCCCOCC3CCCN3c3ccc([N...   
...                                                 ...   
7968       CCCCCCCCc1cc(OC)cc(OC(=O)c2cccc(C(C)=O)c2)c1   
7969  CC(=O)OCCN(CCOC(=O)c1ccc2c(c1)C(=O)N(c1cccc(N3...   
7970  CCCCCCCCNC(=O)c1cc(C)cc(N2C(=O)c3ccc(-c4ccc5c(...   
7971                                   CC=C(C)c1ccccc1C   
7972  Cc1ccc(OCCCCCCCCCCCOC(=O)CCCCC(=O)OCCCCCCCCCCC...   

                                               Scaffold  
0                                              c1ccccc1  
1     c1ccc(Cc2ccc(C3(c4ccc(Cc5ccccc5)cc4)CCCCC3)cc2...  
2     O=C1C(=Cc2ccccc2)CCCC1=Cc1ccc(Oc2ccc(S(=O)(=O)

In [6]:
# =============================================================================
# SEGMENT 4: AutoGluon Multi-Target Training
# =============================================================================
from autogluon.tabular import TabularPredictor
import shutil
import pandas as pd
import os

# 1. Define target columns (Properties to predict)
TARGETS = ['Tg', 'FFV', 'Tc', 'Density', 'Rg']

# 2. Define metadata columns to exclude from training
# id: Just an identifier
# SMILES/Sanitized_SMILES: Raw strings (converted to features already)
# Scaffold: Used for splitting, not direct training
METADATA_COLS = ['id', 'SMILES', 'Sanitized_SMILES', 'Scaffold']

# ‚è±Ô∏è Time Allocation Strategy (Total ~30 mins)
time_allocation = {
  'FFV': 900,      # 15 mins: High priority
  'Tg': 180,       # 3 mins: Smaller dataset
  'Tc': 240,       # 4 mins
  'Density': 240,  # 4 mins
  'Rg': 240        # 4 mins
}

# Setup output container
submission = pd.DataFrame({'id': full_test['id']})
MODEL_ROOT = 'ag_models'

for target in TARGETS:
  # Get time limit for current target, default to 300s
  current_time_limit = time_allocation.get(target, 300)
  
  print(f"\nüéØ Training for Target: {target} | ‚è≥ Time Limit: {current_time_limit}s")
  
  # 1. Filter valid data (remove rows where target is NaN)
  train_data = full_train[full_train[target].notna()].copy()
  
  if len(train_data) == 0:
      print(f"   ‚ö†Ô∏è No training data for {target}, skipping...")
      continue

  # 2. Drop metadata and other target columns to prevent leakage
  other_targets = [t for t in TARGETS if t != target]
  drop_cols = METADATA_COLS + other_targets
  train_data = train_data.drop(columns=drop_cols, errors='ignore')
  
  # 3. Train Model
  save_path = os.path.join(MODEL_ROOT, target)
  if os.path.exists(save_path): shutil.rmtree(save_path)
  
  predictor = TabularPredictor(label=target, path=save_path, problem_type='regression')
  
  # Note: If time is tight, 'best_quality' might downgrade automatically.
  # We aim for 'best_quality' to utilize Bagging/Stacking.
  predictor.fit(
      train_data, 
      presets='best_quality',  
      time_limit=current_time_limit,   
      ag_args_fit={'num_gpus': 1} # Comment this out if GPU is not available
  )
  
  # 4. Predict
  test_features = full_test.drop(columns=METADATA_COLS, errors='ignore')
  submission[target] = predictor.predict(test_features)

print("\n‚úÖ Training Complete.")
submission.to_csv("submission.csv", index=False)
print("üìÑ Saved submission.csv")

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.9.23
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          16
Memory Avail:       16.50 GB / 31.11 GB (53.0%)
Disk Space Avail:   14.74 GB / 1862.21 GB (0.8%)
Presets specified: ['best_quality']
Using hyperparameters preset: hyperparameters='zeroshot'
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack fo


üéØ Training for Target: Tg | ‚è≥ Time Limit: 180s


Beginning AutoGluon training ... Time limit = 45s
AutoGluon will save models to "C:\Users\Benjamin Gu\Desktop\MML_project\ag_models\Tg\ds_sub_fit\sub_fit_ho"
Train Data Rows:    454
Train Data Columns: 2438
Label Column:       Tg
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    16893.49 MB
	Train Data (Original)  Memory Usage: 1.57 MB (0.0% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
			Note: Converting 1511 features to boolean dtype as they only contain 2 unique values.
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDupl


üéØ Training for Target: FFV | ‚è≥ Time Limit: 900s


Beginning AutoGluon training ... Time limit = 225s
AutoGluon will save models to "C:\Users\Benjamin Gu\Desktop\MML_project\ag_models\FFV\ds_sub_fit\sub_fit_ho"
Train Data Rows:    6248
Train Data Columns: 2438
Label Column:       FFV
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    16776.47 MB
	Train Data (Original)  Memory Usage: 21.64 MB (0.1% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
			Note: Converting 2037 features to boolean dtype as they only contain 2 unique values.
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting Dro

[1000]	valid_set's rmse: 0.00973463


	Ran out of time, early stopping on iteration 1622. Best iteration is:
	[1622]	valid_set's rmse: 0.0096686
	Training S1F2 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0172238


	Ran out of time, early stopping on iteration 1675. Best iteration is:
	[1632]	valid_set's rmse: 0.0171889
	Training S1F3 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0151593


	Ran out of time, early stopping on iteration 1731. Best iteration is:
	[1719]	valid_set's rmse: 0.0151221
	Training S1F4 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.00971409


	Ran out of time, early stopping on iteration 1792. Best iteration is:
	[1760]	valid_set's rmse: 0.00967862
	Training S1F5 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0106584


	Ran out of time, early stopping on iteration 1946. Best iteration is:
	[1944]	valid_set's rmse: 0.0106059
	Training S1F6 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0101433
[2000]	valid_set's rmse: 0.0100973


	Ran out of time, early stopping on iteration 2094. Best iteration is:
	[2085]	valid_set's rmse: 0.0100942
	Training S1F7 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0145299
[2000]	valid_set's rmse: 0.0144583


	Ran out of time, early stopping on iteration 2320. Best iteration is:
	[2207]	valid_set's rmse: 0.0144505
	Training S1F8 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.00985083
[2000]	valid_set's rmse: 0.00981746


	Ran out of time, early stopping on iteration 2811. Best iteration is:
	[2529]	valid_set's rmse: 0.0098078
	-0.0124	 = Validation score   (-root_mean_squared_error)
	140.24s	 = Training   runtime
	0.15s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ... Training model for up to 5.67s of the 79.16s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=8, gpus=1)
	Training S1F1 with GPU, note that this may negatively impact model quality compared to CPU training.
	Ran out of time, early stopping on iteration 1. Best iteration is:
	[1]	valid_set's rmse: 0.0276517
	Time limit exceeded... Skipping LightGBM_BAG_L1.
Fitting model: RandomForestMSE_BAG_L1 ... Training model for up to 4.19s of the 77.67s of remaining time.
	-0.015	 = Validation score   (-root_mean_squared_error)
	60.0s	 = Training   runtime
	1.95s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ... Training model for up to 220.36s of the 15.30s

[1000]	valid_set's rmse: 0.00919546
[2000]	valid_set's rmse: 0.00908408
[3000]	valid_set's rmse: 0.00907
[4000]	valid_set's rmse: 0.00906146
[5000]	valid_set's rmse: 0.00906019
[6000]	valid_set's rmse: 0.00905963
[7000]	valid_set's rmse: 0.00905956
[8000]	valid_set's rmse: 0.00905945


	Ran out of time, early stopping on iteration 8376. Best iteration is:
	[7846]	valid_set's rmse: 0.00905943
	Training S1F2 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0180526
[2000]	valid_set's rmse: 0.0179781
[3000]	valid_set's rmse: 0.017965
[4000]	valid_set's rmse: 0.0179626
[5000]	valid_set's rmse: 0.0179629


	Training S1F3 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0107186
[2000]	valid_set's rmse: 0.0106481
[3000]	valid_set's rmse: 0.0106321
[4000]	valid_set's rmse: 0.0106283
[5000]	valid_set's rmse: 0.0106258
[6000]	valid_set's rmse: 0.0106254
[7000]	valid_set's rmse: 0.0106252
[8000]	valid_set's rmse: 0.0106252
[9000]	valid_set's rmse: 0.0106252


	Ran out of time, early stopping on iteration 9367. Best iteration is:
	[7761]	valid_set's rmse: 0.0106251
	Training S1F4 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0111692
[2000]	valid_set's rmse: 0.0110814
[3000]	valid_set's rmse: 0.0110719
[4000]	valid_set's rmse: 0.0110677
[5000]	valid_set's rmse: 0.0110661
[6000]	valid_set's rmse: 0.0110658
[7000]	valid_set's rmse: 0.0110658
[8000]	valid_set's rmse: 0.0110657
[9000]	valid_set's rmse: 0.0110657


	Ran out of time, early stopping on iteration 9707. Best iteration is:
	[9539]	valid_set's rmse: 0.0110657
	Training S1F5 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0175707
[2000]	valid_set's rmse: 0.0174785
[3000]	valid_set's rmse: 0.0174648
[4000]	valid_set's rmse: 0.01746
[5000]	valid_set's rmse: 0.0174587
[6000]	valid_set's rmse: 0.0174583
[7000]	valid_set's rmse: 0.0174582
[8000]	valid_set's rmse: 0.0174581
[9000]	valid_set's rmse: 0.0174581
[10000]	valid_set's rmse: 0.0174581


	Training S1F6 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0103131
[2000]	valid_set's rmse: 0.0102459
[3000]	valid_set's rmse: 0.0102368


	Training S1F7 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0172698
[2000]	valid_set's rmse: 0.0171689
[3000]	valid_set's rmse: 0.0171576
[4000]	valid_set's rmse: 0.0171515
[5000]	valid_set's rmse: 0.0171515
[6000]	valid_set's rmse: 0.0171506
[7000]	valid_set's rmse: 0.0171504
[8000]	valid_set's rmse: 0.0171504
[9000]	valid_set's rmse: 0.0171503
[10000]	valid_set's rmse: 0.0171503


	Training S1F8 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0101842
[2000]	valid_set's rmse: 0.0100984
[3000]	valid_set's rmse: 0.0100841
[4000]	valid_set's rmse: 0.0100841
[5000]	valid_set's rmse: 0.0100826
[6000]	valid_set's rmse: 0.0100821
[7000]	valid_set's rmse: 0.010082
[8000]	valid_set's rmse: 0.0100819
[9000]	valid_set's rmse: 0.0100818
[10000]	valid_set's rmse: 0.0100818


	-0.0134	 = Validation score   (-root_mean_squared_error)
	532.94s	 = Training   runtime
	0.35s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ... Training model for up to 134.91s of the 134.91s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=8, gpus=1)
	Training S1F1 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.00949193


	Ran out of time, early stopping on iteration 1223. Best iteration is:
	[1220]	valid_set's rmse: 0.00945937
	Training S1F2 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0185569


	Ran out of time, early stopping on iteration 1269. Best iteration is:
	[1258]	valid_set's rmse: 0.0185427
	Training S1F3 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F4 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0112728


	Ran out of time, early stopping on iteration 1386. Best iteration is:
	[1196]	valid_set's rmse: 0.0112578
	Training S1F5 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F6 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0115078


	Training S1F7 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0173556
[2000]	valid_set's rmse: 0.0173122


	Ran out of time, early stopping on iteration 2365. Best iteration is:
	[2330]	valid_set's rmse: 0.0173087
	Training S1F8 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0106117


	-0.0142	 = Validation score   (-root_mean_squared_error)
	115.32s	 = Training   runtime
	0.13s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1 ... Training model for up to 18.70s of the 18.70s of remaining time.
	-0.0157	 = Validation score   (-root_mean_squared_error)
	70.34s	 = Training   runtime
	2.97s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ... Training model for up to 360.00s of the -55.07s of remaining time.
	Ensemble Weights: {'LightGBMXT_BAG_L1': 0.875, 'LightGBM_BAG_L1': 0.125}
	-0.0134	 = Validation score   (-root_mean_squared_error)
	0.0s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 729.27s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 1801.0 rows/s (879 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("C:\Users\Benjamin Gu\Desktop\MML_project\ag_models\FFV")
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.4.0
Python Version:     3.


üéØ Training for Target: Tc | ‚è≥ Time Limit: 240s


AutoGluon will save models to "C:\Users\Benjamin Gu\Desktop\MML_project\ag_models\Tc\ds_sub_fit\sub_fit_ho"
Train Data Rows:    655
Train Data Columns: 2438
Label Column:       Tc
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    16742.83 MB
	Train Data (Original)  Memory Usage: 2.27 MB (0.0% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
			Note: Converting 1257 features to boolean dtype as they only contain 2 unique values.
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Useless Original Featur

[1000]	valid_set's rmse: 0.0379574


	Ran out of time, early stopping on iteration 1453. Best iteration is:
	[1365]	valid_set's rmse: 0.0379257
	Training S1F5 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F6 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F7 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F8 with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.0386	 = Validation score   (-root_mean_squared_error)
	18.01s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ... Training model for up to 20.33s of the 39.63s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=8, gpus=1)
	Training S1F1 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F2 with GPU, note that this may negat

[1000]	valid_set's rmse: 0.0464875
[2000]	valid_set's rmse: 0.0464654


	Training S1F6 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F7 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0426541
[2000]	valid_set's rmse: 0.0425489
[3000]	valid_set's rmse: 0.0425367
[4000]	valid_set's rmse: 0.0425361


	Training S1F8 with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.039	 = Validation score   (-root_mean_squared_error)
	41.37s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ... Training model for up to 76.34s of the 135.39s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=8, gpus=1)
	Training S1F1 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F2 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F3 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F4 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F5 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F6 with GPU, note that this may negatively

[1000]	valid_set's rmse: 0.0436868
[2000]	valid_set's rmse: 0.0435846
[3000]	valid_set's rmse: 0.0435756
[4000]	valid_set's rmse: 0.0435741


	Ran out of time, early stopping on iteration 4464. Best iteration is:
	[4456]	valid_set's rmse: 0.0435739
	Training S1F8 with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.0403	 = Validation score   (-root_mean_squared_error)
	41.76s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1 ... Training model for up to 34.22s of the 93.28s of remaining time.
	-0.0413	 = Validation score   (-root_mean_squared_error)
	3.75s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: CatBoost_BAG_L1 ... Training model for up to 30.16s of the 89.22s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=8, gpus=1)
	Training S1F1 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F2 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F3 with GPU, note 

[1000]	valid_set's rmse: 0.0315462
[2000]	valid_set's rmse: 0.0314916
[3000]	valid_set's rmse: 0.0314861


	Ran out of time, early stopping on iteration 3134. Best iteration is:
	[3076]	valid_set's rmse: 0.0314858
	Training S1F7 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F8 with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.0405	 = Validation score   (-root_mean_squared_error)
	26.34s	 = Training   runtime
	0.05s	 = Validation runtime
Fitting model: LightGBM_BAG_L2 ... Training model for up to 32.10s of the 32.07s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=8, gpus=1)
	Training S1F1 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F2 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F3 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F4 with GPU, note that this may negat


üéØ Training for Target: Density | ‚è≥ Time Limit: 240s


Beginning AutoGluon training ... Time limit = 60s
AutoGluon will save models to "C:\Users\Benjamin Gu\Desktop\MML_project\ag_models\Density\ds_sub_fit\sub_fit_ho"
Train Data Rows:    544
Train Data Columns: 2438
Label Column:       Density
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    16899.40 MB
	Train Data (Original)  Memory Usage: 1.88 MB (0.0% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
			Note: Converting 1248 features to boolean dtype as they only contain 2 unique values.
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fittin

[1000]	valid_set's rmse: 0.0497492


	Ran out of time, early stopping on iteration 1337. Best iteration is:
	[1337]	valid_set's rmse: 0.0493294
	Training S1F3 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0488274


	Ran out of time, early stopping on iteration 1409. Best iteration is:
	[1407]	valid_set's rmse: 0.0487642
	Training S1F4 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F5 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F6 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0722989


	Training S1F7 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0537664


	Training S1F8 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.080191
[2000]	valid_set's rmse: 0.0799725
[3000]	valid_set's rmse: 0.0799556


	Ran out of time, early stopping on iteration 3940. Best iteration is:
	[3929]	valid_set's rmse: 0.0799511
	-0.067	 = Validation score   (-root_mean_squared_error)
	35.64s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ... Training model for up to 2.65s of the 22.00s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=8, gpus=1)
	Training S1F1 with GPU, note that this may negatively impact model quality compared to CPU training.
	Ran out of time, early stopping on iteration 1. Best iteration is:
	[1]	valid_set's rmse: 0.136017
	Time limit exceeded... Skipping LightGBM_BAG_L1.
Fitting model: RandomForestMSE_BAG_L1 ... Training model for up to 1.90s of the 21.25s of remaining time.
	-0.0798	 = Validation score   (-root_mean_squared_error)
	2.79s	 = Training   runtime
	0.1s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ... Training model for up to 58.04s of the 18.19s of 

[1000]	valid_set's rmse: 0.100371
[2000]	valid_set's rmse: 0.100212


	Training S1F2 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0838172


	Training S1F3 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F4 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0498368


	Training S1F5 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F6 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F7 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0614437
[2000]	valid_set's rmse: 0.0610556
[3000]	valid_set's rmse: 0.0610126
[4000]	valid_set's rmse: 0.0610075
[5000]	valid_set's rmse: 0.0610064
[6000]	valid_set's rmse: 0.0610062
[7000]	valid_set's rmse: 0.0610062
[8000]	valid_set's rmse: 0.0610061
[9000]	valid_set's rmse: 0.0610061
[10000]	valid_set's rmse: 0.0610061


	Training S1F8 with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.0694	 = Validation score   (-root_mean_squared_error)
	61.64s	 = Training   runtime
	0.05s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ... Training model for up to 55.61s of the 114.52s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=8, gpus=1)
	Training S1F1 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.10245


	Ran out of time, early stopping on iteration 1206. Best iteration is:
	[1200]	valid_set's rmse: 0.102365
	Training S1F2 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F3 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F4 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F5 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 0.0692918


	Ran out of time, early stopping on iteration 1875. Best iteration is:
	[1872]	valid_set's rmse: 0.0691861
	Training S1F6 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F7 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F8 with GPU, note that this may negatively impact model quality compared to CPU training.
	-0.0726	 = Validation score   (-root_mean_squared_error)
	30.14s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L1 ... Training model for up to 25.12s of the 84.03s of remaining time.
	-0.0821	 = Validation score   (-root_mean_squared_error)
	3.08s	 = Training   runtime
	0.15s	 = Validation runtime
Fitting model: CatBoost_BAG_L1 ... Training model for up to 21.74s of the 80.65s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=8, gpus=1)
	Training S1F1 with GPU, note 

[1000]	valid_set's rmse: 0.0574507
[2000]	valid_set's rmse: 0.0572917


	-0.0687	 = Validation score   (-root_mean_squared_error)
	21.98s	 = Training   runtime
	0.05s	 = Validation runtime
Fitting model: LightGBM_BAG_L2 ... Training model for up to 36.32s of the 36.30s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=8, gpus=1)
	Training S1F1 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F2 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F3 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F4 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F5 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F6 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F7 with GPU, note that this may negatively


üéØ Training for Target: Rg | ‚è≥ Time Limit: 240s


Beginning AutoGluon training ... Time limit = 60s
AutoGluon will save models to "C:\Users\Benjamin Gu\Desktop\MML_project\ag_models\Rg\ds_sub_fit\sub_fit_ho"
Train Data Rows:    545
Train Data Columns: 2438
Label Column:       Rg
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    16894.33 MB
	Train Data (Original)  Memory Usage: 1.89 MB (0.0% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
			Note: Converting 1260 features to boolean dtype as they only contain 2 unique values.
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDupl

[1000]	valid_set's rmse: 2.03908


	Training S1F2 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F3 with GPU, note that this may negatively impact model quality compared to CPU training.


[1000]	valid_set's rmse: 2.58338
[2000]	valid_set's rmse: 2.58096
[3000]	valid_set's rmse: 2.58056
[4000]	valid_set's rmse: 2.58048


	Ran out of time, early stopping on iteration 4401. Best iteration is:
	[4396]	valid_set's rmse: 2.58047
	Training S1F4 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F5 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F6 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F7 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F8 with GPU, note that this may negatively impact model quality compared to CPU training.
	-2.435	 = Validation score   (-root_mean_squared_error)
	33.8s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: LightGBM_BAG_L1 ... Training model for up to 83.87s of the 142.90s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=8, gpus=1)
	Training S1F1 with GPU, note that this may negative

[1000]	valid_set's rmse: 2.96797
[2000]	valid_set's rmse: 2.96182
[3000]	valid_set's rmse: 2.9608


	Ran out of time, early stopping on iteration 3113. Best iteration is:
	[3069]	valid_set's rmse: 2.96077
	Training S1F6 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F7 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F8 with GPU, note that this may negatively impact model quality compared to CPU training.
	-2.4298	 = Validation score   (-root_mean_squared_error)
	22.78s	 = Training   runtime
	0.05s	 = Validation runtime
Fitting model: LightGBM_BAG_L2 ... Training model for up to 35.65s of the 35.63s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=8, gpus=1)
	Training S1F1 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F2 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F3 with GPU, note that this may negativ

[1000]	valid_set's rmse: 3.0353


	Ran out of time, early stopping on iteration 1211. Best iteration is:
	[1033]	valid_set's rmse: 3.03488
	Training S1F6 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F7 with GPU, note that this may negatively impact model quality compared to CPU training.
	Training S1F8 with GPU, note that this may negatively impact model quality compared to CPU training.
	-2.4209	 = Validation score   (-root_mean_squared_error)
	18.98s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: RandomForestMSE_BAG_L2 ... Training model for up to 16.38s of the 16.36s of remaining time.
	-2.4637	 = Validation score   (-root_mean_squared_error)
	3.13s	 = Training   runtime
	0.16s	 = Validation runtime
Fitting model: CatBoost_BAG_L2 ... Training model for up to 12.94s of the 12.92s of remaining time.
	Fitting 8 child models (S1F1 - S1F8) | Fitting with SequentialLocalFoldFittingStrategy (sequential: cpus=8, gpus=1)
	Training S1F1 with GPU, note th


‚úÖ Training Complete. 30 Minutes Well Spent!
üìÑ Saved submission.csv
