In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt
import seaborn as sns
import os
from tqdm import tqdm
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import impute

In [2]:
DATA_PATH = '../data/'
FEATURE_PATH = '../'
train_log_df = pd.read_csv(os.path.join(DATA_PATH, 'train_log.csv'))
metadata = train_log_df[['object_id', 'Z', 'EBV', 'target']]
print("Metadata loaded successfully")

Metadata loaded successfully


In [3]:
# Load ALL Lightcurve Data for tsFresh
all_lc_df_list = []
print("Loading all lightcurve data...")
for split_folder in tqdm(train_log_df['split'].unique()):
    path = os.path.join(DATA_PATH, split_folder, 'train_full_lightcurves.csv')
    df = pd.read_csv(path)
    all_lc_df_list.append(df)

full_lc_df = pd.concat(all_lc_df_list)
print(f"Full lightcurve data loaded. Shape: {full_lc_df.shape}")

Loading all lightcurve data...


100%|██████████| 20/20 [00:00<00:00, 94.79it/s]

Full lightcurve data loaded. Shape: (479384, 5)





In [4]:
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import EfficientFCParameters


# Feature Extraction with tsFresh
print("Starting tsFresh feature extraction per filter...")

band_features = []
for f in full_lc_df['Filter'].unique():
    subset = full_lc_df[full_lc_df['Filter'] == f][['object_id', 'Time (MJD)', 'Flux']].copy()
    subset.columns = ['id', 'time', 'value']
    
    # Drop NaNs
    subset = subset.dropna(subset=['value'])
    
    feats = extract_features(
        subset,
        column_id='id',
        column_sort='time',
        default_fc_parameters=EfficientFCParameters(),
        disable_progressbar=False,
        n_jobs=4
    )
    # tag features with filter name
    feats.columns = [f"{col}_{f}" for col in feats.columns]
    band_features.append(feats)

# Merge all filter-specific features
extracted_features = pd.concat(band_features, axis=1)

# Impute NaN/Infs
impute(extracted_features)

# Feature selection
y_for_selection = metadata.set_index('object_id').loc[extracted_features.index]['target']
relevant_features = select_features(
    extracted_features,
    y_for_selection,
    fdr_level=0.005
)

print(f"tsFresh finished successfully. Found {relevant_features.shape[1]} relevant features.")


Starting tsFresh feature extraction per filter...


Feature Extraction: 100%|██████████| 20/20 [00:34<00:00,  1.73s/it]
Feature Extraction: 100%|██████████| 20/20 [00:36<00:00,  1.83s/it]
Feature Extraction: 100%|██████████| 20/20 [00:31<00:00,  1.60s/it]
Feature Extraction: 100%|██████████| 20/20 [00:32<00:00,  1.61s/it]
Feature Extraction: 100%|██████████| 20/20 [00:31<00:00,  1.59s/it]
Feature Extraction: 100%|██████████| 20/20 [00:30<00:00,  1.54s/it]
 'value__query_similarity_count__query_None__threshold_0.0_r'
 'value__fft_coefficient__attr_"real"__coeff_93_y'
 'value__fft_coefficient__attr_"real"__coeff_94_y'
 'value__fft_coefficient__attr_"real"__coeff_95_y'
 'value__fft_coefficient__attr_"real"__coeff_96_y'
 'value__fft_coefficient__attr_"real"__coeff_97_y'
 'value__fft_coefficient__attr_"real"__coeff_98_y'
 'value__fft_coefficient__attr_"real"__coeff_99_y'
 'value__fft_coefficient__attr_"imag"__coeff_93_y'
 'value__fft_coefficient__attr_"imag"__coeff_94_y'
 'value__fft_coefficient__attr_"imag"__coeff_95_y'
 'value__fft_coeffic

tsFresh finished successfully. Found 198 relevant features.


In [5]:
# Combine Features and Prepare for Modeling
# Marge the new tsFresh with best old features
final_features = metadata.merge( 
    relevant_features,
    left_on='object_id',
    right_index=True,
    how='inner'
)

# Clean column names 
final_features.columns = (
    final_features.columns
    .str.replace('[^A-Za-z0-9_]+', '_', regex=True)  
    .str.strip('_')                                  
)

print("Saving final_features.csv...")
final_features.to_csv('final_features.csv', index=False)
print("File saved successfully.")

# Prepare X and y
y = final_features['target']
X = final_features.drop(columns=['object_id', 'target'])
features = X.columns.tolist()

print(f"Data prepared for final modeling. New features shape: {X.shape}")

Saving final_features.csv...
File saved successfully.
Data prepared for final modeling. New features shape: (3043, 200)


In [6]:
import optuna
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
import numpy as np
import pandas as pd

# Standard scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X = pd.DataFrame(X_scaled, columns=X.columns)

# Cross-validation
N_SPLITS = 5
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)

# Objective function for Optuna
def objective(trial):
    params = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 1.0),
        'n_estimators': 2000,
        'device': 'gpu',
        'gpu_platform_id': 0,
        'gpu_device_id': 0,
        'verbose': -1
    }

    f1_scores = []

    for train_idx, val_idx in skf.split(X, y):
        X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]

        # Compute scale_pos_weight
        neg_count = y_train.value_counts()[0]
        pos_count = y_train.value_counts()[1]
        params['scale_pos_weight'] = neg_count / pos_count

        model = lgb.LGBMClassifier(**params)
        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)],
            eval_metric='f1',
            callbacks=[lgb.early_stopping(stopping_rounds=100, verbose=False)]
        )

        val_preds_proba = model.predict_proba(X_val)[:, 1]
        thresholds = np.linspace(0.01, 0.99, 100)
        f1_values = [f1_score(y_val, (val_preds_proba > t).astype(int)) for t in thresholds]
        f1_scores.append(np.max(f1_values))

    return np.mean(f1_scores)

# Run Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50, show_progress_bar=True)

print("\nBest F1 Score:", study.best_value)
print("Best Hyperparameters:", study.best_params)


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-11-06 15:39:38,775] A new study created in memory with name: no-name-0de1a288-c1dd-4b71-bba7-0849fd28f094
Best trial: 0. Best value: 0.496018:   2%|▏         | 1/50 [00:14<11:44, 14.39s/it]

[I 2025-11-06 15:39:53,162] Trial 0 finished with value: 0.4960182279994023 and parameters: {'learning_rate': 0.02395753895685934, 'num_leaves': 57, 'max_depth': 8, 'min_child_samples': 57, 'subsample': 0.844336096890923, 'colsample_bytree': 0.638007240637462, 'reg_alpha': 0.10007190678904954, 'reg_lambda': 0.1021877360137542}. Best is trial 0 with value: 0.4960182279994023.


Best trial: 0. Best value: 0.496018:   4%|▍         | 2/50 [00:21<08:03, 10.07s/it]

[I 2025-11-06 15:40:00,215] Trial 1 finished with value: 0.49183944037426397 and parameters: {'learning_rate': 0.061022691141937674, 'num_leaves': 48, 'max_depth': 15, 'min_child_samples': 62, 'subsample': 0.7242533844054863, 'colsample_bytree': 0.8630972288835417, 'reg_alpha': 0.9413421325213572, 'reg_lambda': 0.9333634510889371}. Best is trial 0 with value: 0.4960182279994023.


Best trial: 0. Best value: 0.496018:   6%|▌         | 3/50 [00:30<07:23,  9.44s/it]

[I 2025-11-06 15:40:08,904] Trial 2 finished with value: 0.4948629709837933 and parameters: {'learning_rate': 0.027884842518968615, 'num_leaves': 31, 'max_depth': 13, 'min_child_samples': 80, 'subsample': 0.6860417078459281, 'colsample_bytree': 0.7725017425498204, 'reg_alpha': 0.8864849523114802, 'reg_lambda': 0.40543135040066003}. Best is trial 0 with value: 0.4960182279994023.


Best trial: 0. Best value: 0.496018:   8%|▊         | 4/50 [00:37<06:38,  8.67s/it]

[I 2025-11-06 15:40:16,386] Trial 3 finished with value: 0.4718755920500669 and parameters: {'learning_rate': 0.09937676106125903, 'num_leaves': 116, 'max_depth': 8, 'min_child_samples': 18, 'subsample': 0.5475753361178246, 'colsample_bytree': 0.9597677781997178, 'reg_alpha': 0.10490486989439751, 'reg_lambda': 0.7542776607379303}. Best is trial 0 with value: 0.4960182279994023.


Best trial: 0. Best value: 0.496018:  10%|█         | 5/50 [00:53<08:21, 11.14s/it]

[I 2025-11-06 15:40:31,921] Trial 4 finished with value: 0.4944396732236391 and parameters: {'learning_rate': 0.01774948745011243, 'num_leaves': 126, 'max_depth': 16, 'min_child_samples': 46, 'subsample': 0.8825318118378633, 'colsample_bytree': 0.9518256128606128, 'reg_alpha': 0.3711619694777867, 'reg_lambda': 0.981158428321287}. Best is trial 0 with value: 0.4960182279994023.


Best trial: 5. Best value: 0.49758:  12%|█▏        | 6/50 [01:05<08:23, 11.45s/it] 

[I 2025-11-06 15:40:43,977] Trial 5 finished with value: 0.49757963878445804 and parameters: {'learning_rate': 0.02577469023339054, 'num_leaves': 48, 'max_depth': 8, 'min_child_samples': 15, 'subsample': 0.9431679769083888, 'colsample_bytree': 0.5798384258555445, 'reg_alpha': 0.0076034876884099045, 'reg_lambda': 0.2654833658891008}. Best is trial 5 with value: 0.49757963878445804.


Best trial: 5. Best value: 0.49758:  14%|█▍        | 7/50 [01:18<08:40, 12.10s/it]

[I 2025-11-06 15:40:57,412] Trial 6 finished with value: 0.48839203495584976 and parameters: {'learning_rate': 0.01964511665557542, 'num_leaves': 32, 'max_depth': 16, 'min_child_samples': 26, 'subsample': 0.6424861523445403, 'colsample_bytree': 0.6704538684588557, 'reg_alpha': 0.145617636984527, 'reg_lambda': 0.492569548695024}. Best is trial 5 with value: 0.49757963878445804.


Best trial: 7. Best value: 0.499661:  16%|█▌        | 8/50 [01:30<08:28, 12.10s/it]

[I 2025-11-06 15:41:09,505] Trial 7 finished with value: 0.49966125148214696 and parameters: {'learning_rate': 0.02779240384985236, 'num_leaves': 30, 'max_depth': 10, 'min_child_samples': 25, 'subsample': 0.9295059946508166, 'colsample_bytree': 0.5941610365536186, 'reg_alpha': 0.9758021430480254, 'reg_lambda': 0.7675259161677483}. Best is trial 7 with value: 0.49966125148214696.


Best trial: 8. Best value: 0.504203:  18%|█▊        | 9/50 [01:38<07:22, 10.78s/it]

[I 2025-11-06 15:41:17,396] Trial 8 finished with value: 0.5042032753888682 and parameters: {'learning_rate': 0.045785564901657735, 'num_leaves': 36, 'max_depth': 11, 'min_child_samples': 62, 'subsample': 0.6860364337363414, 'colsample_bytree': 0.7540585489783467, 'reg_alpha': 0.34215378642445193, 'reg_lambda': 0.32986322524622425}. Best is trial 8 with value: 0.5042032753888682.


Best trial: 8. Best value: 0.504203:  20%|██        | 10/50 [01:52<07:53, 11.84s/it]

[I 2025-11-06 15:41:31,614] Trial 9 finished with value: 0.4707920571332688 and parameters: {'learning_rate': 0.018552869292439916, 'num_leaves': 114, 'max_depth': 17, 'min_child_samples': 32, 'subsample': 0.8252086588917169, 'colsample_bytree': 0.8162053032657173, 'reg_alpha': 0.33529742998740486, 'reg_lambda': 0.0026409464930383653}. Best is trial 8 with value: 0.5042032753888682.


Best trial: 8. Best value: 0.504203:  22%|██▏       | 11/50 [01:54<05:42,  8.78s/it]

[I 2025-11-06 15:41:33,440] Trial 10 finished with value: 0.3339186576008982 and parameters: {'learning_rate': 0.011676894555585958, 'num_leaves': 84, 'max_depth': 3, 'min_child_samples': 100, 'subsample': 0.5521986631116153, 'colsample_bytree': 0.5142632475267945, 'reg_alpha': 0.6813412065749962, 'reg_lambda': 0.2841503927102284}. Best is trial 8 with value: 0.5042032753888682.


Best trial: 11. Best value: 0.508641:  24%|██▍       | 12/50 [02:02<05:27,  8.63s/it]

[I 2025-11-06 15:41:41,726] Trial 11 finished with value: 0.5086410256410256 and parameters: {'learning_rate': 0.05032294694113933, 'num_leaves': 82, 'max_depth': 20, 'min_child_samples': 41, 'subsample': 0.7739699461470442, 'colsample_bytree': 0.6938187668404225, 'reg_alpha': 0.5676366212792807, 'reg_lambda': 0.7012711881986196}. Best is trial 11 with value: 0.5086410256410256.


Best trial: 11. Best value: 0.508641:  26%|██▌       | 13/50 [02:10<05:07,  8.32s/it]

[I 2025-11-06 15:41:49,347] Trial 12 finished with value: 0.49870817779098936 and parameters: {'learning_rate': 0.04748312696747512, 'num_leaves': 76, 'max_depth': 20, 'min_child_samples': 72, 'subsample': 0.7856945498240531, 'colsample_bytree': 0.7248031109815016, 'reg_alpha': 0.5627375874638445, 'reg_lambda': 0.67866832472091}. Best is trial 11 with value: 0.5086410256410256.


Best trial: 11. Best value: 0.508641:  28%|██▊       | 14/50 [02:19<05:02,  8.41s/it]

[I 2025-11-06 15:41:57,953] Trial 13 finished with value: 0.49796275646743976 and parameters: {'learning_rate': 0.04283600603637093, 'num_leaves': 98, 'max_depth': 20, 'min_child_samples': 44, 'subsample': 0.6469821572953989, 'colsample_bytree': 0.6943634432955724, 'reg_alpha': 0.6768930487604407, 'reg_lambda': 0.5918648783883704}. Best is trial 11 with value: 0.5086410256410256.


Best trial: 11. Best value: 0.508641:  30%|███       | 15/50 [02:24<04:26,  7.62s/it]

[I 2025-11-06 15:42:03,736] Trial 14 finished with value: 0.49457650627136757 and parameters: {'learning_rate': 0.07292193754458894, 'num_leaves': 144, 'max_depth': 12, 'min_child_samples': 83, 'subsample': 0.7551504713039731, 'colsample_bytree': 0.8788195082895853, 'reg_alpha': 0.37531825358081217, 'reg_lambda': 0.3652429458195289}. Best is trial 11 with value: 0.5086410256410256.


Best trial: 11. Best value: 0.508641:  32%|███▏      | 16/50 [02:28<03:33,  6.27s/it]

[I 2025-11-06 15:42:06,865] Trial 15 finished with value: 0.35692336697468974 and parameters: {'learning_rate': 0.03963930312129907, 'num_leaves': 66, 'max_depth': 4, 'min_child_samples': 37, 'subsample': 0.6269751701475536, 'colsample_bytree': 0.7783750718896141, 'reg_alpha': 0.5178271401823077, 'reg_lambda': 0.5504584671797474}. Best is trial 11 with value: 0.5086410256410256.


Best trial: 11. Best value: 0.508641:  34%|███▍      | 17/50 [02:36<03:52,  7.06s/it]

[I 2025-11-06 15:42:15,770] Trial 16 finished with value: 0.49075865897555204 and parameters: {'learning_rate': 0.06157665491551288, 'num_leaves': 100, 'max_depth': 10, 'min_child_samples': 7, 'subsample': 0.9924994139885432, 'colsample_bytree': 0.7312010407527537, 'reg_alpha': 0.2641452494310695, 'reg_lambda': 0.187395480533727}. Best is trial 11 with value: 0.5086410256410256.


Best trial: 11. Best value: 0.508641:  36%|███▌      | 18/50 [02:45<04:03,  7.61s/it]

[I 2025-11-06 15:42:24,649] Trial 17 finished with value: 0.49147558699282834 and parameters: {'learning_rate': 0.03766100943360947, 'num_leaves': 21, 'max_depth': 14, 'min_child_samples': 63, 'subsample': 0.716226018947325, 'colsample_bytree': 0.8385854293873307, 'reg_alpha': 0.7340315329850792, 'reg_lambda': 0.845271158623532}. Best is trial 11 with value: 0.5086410256410256.


Best trial: 18. Best value: 0.508908:  38%|███▊      | 19/50 [02:51<03:40,  7.12s/it]

[I 2025-11-06 15:42:30,636] Trial 18 finished with value: 0.5089078009699812 and parameters: {'learning_rate': 0.08402419460994226, 'num_leaves': 64, 'max_depth': 18, 'min_child_samples': 49, 'subsample': 0.785368290340142, 'colsample_bytree': 0.6199720674439635, 'reg_alpha': 0.44009766534461653, 'reg_lambda': 0.43631137395236064}. Best is trial 18 with value: 0.5089078009699812.


Best trial: 18. Best value: 0.508908:  40%|████      | 20/50 [02:57<03:19,  6.65s/it]

[I 2025-11-06 15:42:36,184] Trial 19 finished with value: 0.5004655685506749 and parameters: {'learning_rate': 0.09092402296322015, 'num_leaves': 72, 'max_depth': 18, 'min_child_samples': 45, 'subsample': 0.8006058466739026, 'colsample_bytree': 0.52847281397882, 'reg_alpha': 0.5907659728143324, 'reg_lambda': 0.4868356575745836}. Best is trial 18 with value: 0.5089078009699812.


Best trial: 18. Best value: 0.508908:  42%|████▏     | 21/50 [03:03<03:08,  6.50s/it]

[I 2025-11-06 15:42:42,326] Trial 20 finished with value: 0.4927886697581309 and parameters: {'learning_rate': 0.0724148048887888, 'num_leaves': 95, 'max_depth': 18, 'min_child_samples': 52, 'subsample': 0.8763653897987107, 'colsample_bytree': 0.615160900808309, 'reg_alpha': 0.8102279076905761, 'reg_lambda': 0.647080222435811}. Best is trial 18 with value: 0.5089078009699812.


Best trial: 18. Best value: 0.508908:  44%|████▍     | 22/50 [03:10<03:04,  6.57s/it]

[I 2025-11-06 15:42:49,078] Trial 21 finished with value: 0.48216450216450213 and parameters: {'learning_rate': 0.05598783826133701, 'num_leaves': 56, 'max_depth': 20, 'min_child_samples': 69, 'subsample': 0.7786246621876117, 'colsample_bytree': 0.6678164662671169, 'reg_alpha': 0.4666755679271917, 'reg_lambda': 0.38090725937176506}. Best is trial 18 with value: 0.5089078009699812.


Best trial: 18. Best value: 0.508908:  46%|████▌     | 23/50 [03:18<03:09,  7.00s/it]

[I 2025-11-06 15:42:57,076] Trial 22 finished with value: 0.4836199095022625 and parameters: {'learning_rate': 0.05059576929576405, 'num_leaves': 44, 'max_depth': 18, 'min_child_samples': 39, 'subsample': 0.694465426641908, 'colsample_bytree': 0.7089162014070648, 'reg_alpha': 0.4482523617686959, 'reg_lambda': 0.44036592428627913}. Best is trial 18 with value: 0.5089078009699812.


Best trial: 18. Best value: 0.508908:  48%|████▊     | 24/50 [03:24<02:54,  6.70s/it]

[I 2025-11-06 15:43:03,084] Trial 23 finished with value: 0.4795315444130067 and parameters: {'learning_rate': 0.07604879299028007, 'num_leaves': 63, 'max_depth': 10, 'min_child_samples': 53, 'subsample': 0.5859275517604806, 'colsample_bytree': 0.6517041445013334, 'reg_alpha': 0.27898046088197515, 'reg_lambda': 0.2348432841013306}. Best is trial 18 with value: 0.5089078009699812.


Best trial: 18. Best value: 0.508908:  50%|█████     | 25/50 [03:32<02:58,  7.12s/it]

[I 2025-11-06 15:43:11,188] Trial 24 finished with value: 0.4909315389194397 and parameters: {'learning_rate': 0.0357508149208624, 'num_leaves': 81, 'max_depth': 14, 'min_child_samples': 78, 'subsample': 0.5020223873617089, 'colsample_bytree': 0.5587486950249464, 'reg_alpha': 0.2121466204942235, 'reg_lambda': 0.3206551855001668}. Best is trial 18 with value: 0.5089078009699812.


Best trial: 18. Best value: 0.508908:  52%|█████▏    | 26/50 [03:37<02:37,  6.56s/it]

[I 2025-11-06 15:43:16,425] Trial 25 finished with value: 0.4941433531716508 and parameters: {'learning_rate': 0.08383186688009367, 'num_leaves': 38, 'max_depth': 12, 'min_child_samples': 93, 'subsample': 0.743477308333572, 'colsample_bytree': 0.7700971809787275, 'reg_alpha': 0.4316558626394278, 'reg_lambda': 0.5901531468880066}. Best is trial 18 with value: 0.5089078009699812.


Best trial: 18. Best value: 0.508908:  54%|█████▍    | 27/50 [03:43<02:24,  6.28s/it]

[I 2025-11-06 15:43:22,062] Trial 26 finished with value: 0.49516633619740025 and parameters: {'learning_rate': 0.06227245457227041, 'num_leaves': 88, 'max_depth': 5, 'min_child_samples': 60, 'subsample': 0.6712340213385297, 'colsample_bytree': 0.6219679375221029, 'reg_alpha': 0.6098169321118848, 'reg_lambda': 0.1369776312167996}. Best is trial 18 with value: 0.5089078009699812.


Best trial: 18. Best value: 0.508908:  56%|█████▌    | 28/50 [03:53<02:42,  7.37s/it]

[I 2025-11-06 15:43:31,968] Trial 27 finished with value: 0.5075681282003742 and parameters: {'learning_rate': 0.03273202689293662, 'num_leaves': 21, 'max_depth': 19, 'min_child_samples': 32, 'subsample': 0.7567078565485951, 'colsample_bytree': 0.8046476722736409, 'reg_alpha': 0.525721523200149, 'reg_lambda': 0.6910329705015805}. Best is trial 18 with value: 0.5089078009699812.


Best trial: 18. Best value: 0.508908:  58%|█████▊    | 29/50 [04:03<02:52,  8.21s/it]

[I 2025-11-06 15:43:42,132] Trial 28 finished with value: 0.49664350033756344 and parameters: {'learning_rate': 0.03268689067063787, 'num_leaves': 21, 'max_depth': 19, 'min_child_samples': 32, 'subsample': 0.8458700619439056, 'colsample_bytree': 0.8244198946612763, 'reg_alpha': 0.523896961090264, 'reg_lambda': 0.7685392663490727}. Best is trial 18 with value: 0.5089078009699812.


Best trial: 18. Best value: 0.508908:  60%|██████    | 30/50 [04:17<03:17,  9.86s/it]

[I 2025-11-06 15:43:55,859] Trial 29 finished with value: 0.501630782858983 and parameters: {'learning_rate': 0.022233175607571865, 'num_leaves': 60, 'max_depth': 19, 'min_child_samples': 52, 'subsample': 0.8378199990383551, 'colsample_bytree': 0.9167247912215738, 'reg_alpha': 0.7672234320753846, 'reg_lambda': 0.6868052505772949}. Best is trial 18 with value: 0.5089078009699812.


Best trial: 18. Best value: 0.508908:  62%|██████▏   | 31/50 [04:37<04:07, 13.03s/it]

[I 2025-11-06 15:44:16,263] Trial 30 finished with value: 0.49557541312153647 and parameters: {'learning_rate': 0.015267380321465253, 'num_leaves': 53, 'max_depth': 17, 'min_child_samples': 24, 'subsample': 0.7565296089997596, 'colsample_bytree': 0.689089148247529, 'reg_alpha': 0.6085930344487026, 'reg_lambda': 0.8546219822726712}. Best is trial 18 with value: 0.5089078009699812.


Best trial: 31. Best value: 0.518039:  64%|██████▍   | 32/50 [04:45<03:26, 11.45s/it]

[I 2025-11-06 15:44:24,035] Trial 31 finished with value: 0.5180388030633127 and parameters: {'learning_rate': 0.044340625339063855, 'num_leaves': 42, 'max_depth': 6, 'min_child_samples': 37, 'subsample': 0.7235462093434964, 'colsample_bytree': 0.7601101703577037, 'reg_alpha': 0.41776414727628636, 'reg_lambda': 0.5722306906603363}. Best is trial 31 with value: 0.5180388030633127.


Best trial: 31. Best value: 0.518039:  66%|██████▌   | 33/50 [04:53<02:55, 10.34s/it]

[I 2025-11-06 15:44:31,800] Trial 32 finished with value: 0.4837398645498935 and parameters: {'learning_rate': 0.05233149509341002, 'num_leaves': 46, 'max_depth': 7, 'min_child_samples': 39, 'subsample': 0.7219361497935063, 'colsample_bytree': 0.7843569775188739, 'reg_alpha': 0.4077552076994192, 'reg_lambda': 0.5578622964098022}. Best is trial 31 with value: 0.5180388030633127.


Best trial: 31. Best value: 0.518039:  68%|██████▊   | 34/50 [05:03<02:44, 10.26s/it]

[I 2025-11-06 15:44:41,859] Trial 33 finished with value: 0.49184726152936414 and parameters: {'learning_rate': 0.029948604902374665, 'num_leaves': 68, 'max_depth': 6, 'min_child_samples': 36, 'subsample': 0.7794155523129174, 'colsample_bytree': 0.8039894026038052, 'reg_alpha': 0.4986300676242558, 'reg_lambda': 0.6273154747759213}. Best is trial 31 with value: 0.5180388030633127.


Best trial: 34. Best value: 0.537197:  70%|███████   | 35/50 [05:13<02:33, 10.24s/it]

[I 2025-11-06 15:44:52,059] Trial 34 finished with value: 0.5371970983176417 and parameters: {'learning_rate': 0.03215969291539223, 'num_leaves': 24, 'max_depth': 15, 'min_child_samples': 47, 'subsample': 0.8033809288783094, 'colsample_bytree': 0.8699727843770074, 'reg_alpha': 0.6751003782070576, 'reg_lambda': 0.7239832435177513}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  72%|███████▏  | 36/50 [05:22<02:17,  9.84s/it]

[I 2025-11-06 15:45:00,963] Trial 35 finished with value: 0.5007793981707025 and parameters: {'learning_rate': 0.0428910771071196, 'num_leaves': 40, 'max_depth': 15, 'min_child_samples': 47, 'subsample': 0.8098332915923261, 'colsample_bytree': 0.8727840137939591, 'reg_alpha': 0.8378124966693135, 'reg_lambda': 0.8504557249374214}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  74%|███████▍  | 37/50 [05:28<01:54,  8.84s/it]

[I 2025-11-06 15:45:07,463] Trial 36 finished with value: 0.496225751622171 and parameters: {'learning_rate': 0.06540157261491442, 'num_leaves': 30, 'max_depth': 15, 'min_child_samples': 55, 'subsample': 0.8612678244716813, 'colsample_bytree': 0.9970585012967834, 'reg_alpha': 0.6594686836797082, 'reg_lambda': 0.43080570334674145}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  76%|███████▌  | 38/50 [05:36<01:41,  8.42s/it]

[I 2025-11-06 15:45:14,922] Trial 37 finished with value: 0.4907628082496084 and parameters: {'learning_rate': 0.05524957868470837, 'num_leaves': 76, 'max_depth': 16, 'min_child_samples': 48, 'subsample': 0.8928292336340397, 'colsample_bytree': 0.8954264652895148, 'reg_alpha': 0.6494437923702954, 'reg_lambda': 0.7293083046535577}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  78%|███████▊  | 39/50 [05:50<01:52, 10.26s/it]

[I 2025-11-06 15:45:29,476] Trial 38 finished with value: 0.4780134795443015 and parameters: {'learning_rate': 0.025025617873288348, 'num_leaves': 56, 'max_depth': 17, 'min_child_samples': 17, 'subsample': 0.9058834697017223, 'colsample_bytree': 0.6361109843365205, 'reg_alpha': 0.7356397580575891, 'reg_lambda': 0.5173593502890431}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  80%|████████  | 40/50 [06:04<01:52, 11.28s/it]

[I 2025-11-06 15:45:43,141] Trial 39 finished with value: 0.516808408982322 and parameters: {'learning_rate': 0.02206498979502789, 'num_leaves': 51, 'max_depth': 13, 'min_child_samples': 43, 'subsample': 0.8143348222844428, 'colsample_bytree': 0.5607277292514508, 'reg_alpha': 0.2812926484660412, 'reg_lambda': 0.7977042981625149}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  82%|████████▏ | 41/50 [06:18<01:48, 12.07s/it]

[I 2025-11-06 15:45:57,039] Trial 40 finished with value: 0.49510969226034557 and parameters: {'learning_rate': 0.021980951586820632, 'num_leaves': 52, 'max_depth': 8, 'min_child_samples': 27, 'subsample': 0.8180632116860321, 'colsample_bytree': 0.5384316820256437, 'reg_alpha': 0.2791971235745001, 'reg_lambda': 0.9286717062937628}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  84%|████████▍ | 42/50 [06:35<01:48, 13.56s/it]

[I 2025-11-06 15:46:14,069] Trial 41 finished with value: 0.4977943852077516 and parameters: {'learning_rate': 0.015396208915945354, 'num_leaves': 48, 'max_depth': 13, 'min_child_samples': 41, 'subsample': 0.7321547410157949, 'colsample_bytree': 0.5880662253253072, 'reg_alpha': 0.1409686652528444, 'reg_lambda': 0.8250312455086432}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  86%|████████▌ | 43/50 [06:44<01:26, 12.40s/it]

[I 2025-11-06 15:46:23,764] Trial 42 finished with value: 0.506353224468317 and parameters: {'learning_rate': 0.03519910801270555, 'num_leaves': 31, 'max_depth': 15, 'min_child_samples': 49, 'subsample': 0.7983343526649627, 'colsample_bytree': 0.561913030912907, 'reg_alpha': 0.03811657637172039, 'reg_lambda': 0.7932846405917827}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  88%|████████▊ | 44/50 [06:55<01:11, 11.87s/it]

[I 2025-11-06 15:46:34,417] Trial 43 finished with value: 0.49735853646669587 and parameters: {'learning_rate': 0.03047475647473633, 'num_leaves': 38, 'max_depth': 13, 'min_child_samples': 44, 'subsample': 0.7708621495952916, 'colsample_bytree': 0.6112893277805979, 'reg_alpha': 0.21880345890567868, 'reg_lambda': 0.9199092589248643}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  90%|█████████ | 45/50 [07:06<00:58, 11.63s/it]

[I 2025-11-06 15:46:45,463] Trial 44 finished with value: 0.5182694895596481 and parameters: {'learning_rate': 0.027374619238405392, 'num_leaves': 26, 'max_depth': 14, 'min_child_samples': 58, 'subsample': 0.8494830739254006, 'colsample_bytree': 0.6563293792359308, 'reg_alpha': 0.3775081306354229, 'reg_lambda': 0.7188227203571572}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  92%|█████████▏| 46/50 [07:17<00:45, 11.43s/it]

[I 2025-11-06 15:46:56,432] Trial 45 finished with value: 0.4783353201944305 and parameters: {'learning_rate': 0.02041458303102323, 'num_leaves': 26, 'max_depth': 11, 'min_child_samples': 58, 'subsample': 0.8498855662927892, 'colsample_bytree': 0.5711320866540759, 'reg_alpha': 0.33365906468513745, 'reg_lambda': 0.6194640969529218}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  94%|█████████▍| 47/50 [07:26<00:32, 10.78s/it]

[I 2025-11-06 15:47:05,704] Trial 46 finished with value: 0.4819223263075722 and parameters: {'learning_rate': 0.02685552882194387, 'num_leaves': 44, 'max_depth': 14, 'min_child_samples': 65, 'subsample': 0.8288788904503946, 'colsample_bytree': 0.6650927679028817, 'reg_alpha': 0.39271544289060495, 'reg_lambda': 0.7260002047980888}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  96%|█████████▌| 48/50 [07:40<00:23, 11.58s/it]

[I 2025-11-06 15:47:19,134] Trial 47 finished with value: 0.46959654489385505 and parameters: {'learning_rate': 0.017510241941768097, 'num_leaves': 27, 'max_depth': 9, 'min_child_samples': 56, 'subsample': 0.9056675170648367, 'colsample_bytree': 0.9289996348125511, 'reg_alpha': 0.3369041723607979, 'reg_lambda': 0.4719261990227097}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197:  98%|█████████▊| 49/50 [07:51<00:11, 11.48s/it]

[I 2025-11-06 15:47:30,387] Trial 48 finished with value: 0.4667633347045112 and parameters: {'learning_rate': 0.010730385765350344, 'num_leaves': 35, 'max_depth': 12, 'min_child_samples': 68, 'subsample': 0.949965625015573, 'colsample_bytree': 0.5994083586704654, 'reg_alpha': 0.47855626825285824, 'reg_lambda': 0.8952834631863896}. Best is trial 34 with value: 0.5371970983176417.


Best trial: 34. Best value: 0.537197: 100%|██████████| 50/50 [08:03<00:00,  9.67s/it]

[I 2025-11-06 15:47:42,374] Trial 49 finished with value: 0.5015626906757964 and parameters: {'learning_rate': 0.02382120160076752, 'num_leaves': 41, 'max_depth': 16, 'min_child_samples': 50, 'subsample': 0.6990080281515016, 'colsample_bytree': 0.7388186399441403, 'reg_alpha': 0.2094088658189187, 'reg_lambda': 0.7973638287257452}. Best is trial 34 with value: 0.5371970983176417.

Best F1 Score: 0.5371970983176417
Best Hyperparameters: {'learning_rate': 0.03215969291539223, 'num_leaves': 24, 'max_depth': 15, 'min_child_samples': 47, 'subsample': 0.8033809288783094, 'colsample_bytree': 0.8699727843770074, 'reg_alpha': 0.6751003782070576, 'reg_lambda': 0.7239832435177513}





# Analysis

1. Here we have generated `features` per filter. This preserved band specific information (eg. "color" evolution of the transient), which is clearly highly predictive

2. Used **Optuna** to find identify the best hyperparameters

3. Pushed the final score from **0.4281** to **0.5224**

Here we have likely reached the point of diminishing returns for this feature engineereing on tabular data approach. To get even higher score we need neural networks. On the next notebooks we will try Recurrent Neural Networks (RNNs) and Transformers.