In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from scipy.stats import skew, kurtosis 
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from scipy.stats import linregress
import gc

BASE_PATH = '/kaggle/input/mallorn-dataset'
SEQ_LEN = 150        
BATCH_SIZE = 64      
NUM_EPOCHS = 30   
LEARNING_RATE = 1e-5
VAL_SIZE = 0.2

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def extract_features(fluxes, times, flux_errs, filters):
    if len(fluxes) == 0: 
        return [0]*10
    f = fluxes
    t = times
    fil = np.array(filters)
    max_f = np.max(f)
    min_f = np.min(f)
    mean = np.mean(f) 
    std = np.std(f)
    sk = skew(f) if std > 0 else 0 
    kt = kurtosis(f) if std > 0 else 0
    amplitude = (np.max(f) - np.min(f)) / 2

    flux_by_band = {b: [] for b in range(6)}
    for val, band in zip(f, filters):
        flux_by_band[band].append(val)
        
    means_band = {b: np.mean(vals) if len(vals)>0 else np.nan for b, vals in flux_by_band.items()}
    
    u_g = np.nan_to_num(means_band[0] - means_band[1]) 
    g_r = np.nan_to_num(means_band[1] - means_band[2]) 
    r_i = np.nan_to_num(means_band[2] - means_band[3]) 
    
    flux_ratio = max_f / (np.abs(np.median(f)) + 1e-6)
    
    idx_max = np.argmax(f)
    time_max = t[idx_max]
    slope = 0
    if idx_max < len(f) - 1:
        decay_flux = f[idx_max:]
        decay_time = t[idx_max:]
        if len(np.unique(decay_time)) > 1:
            try:
                slope, _, _, _, _ = linregress(decay_time, decay_flux)
                if np.isnan(slope): slope = 0
            except ValueError:
                slope = 0 
        else:
            slope = 0

    n = len(f)
    if n > 1 and std > 0:
        residuals = (f - mean) / (std + 1e-6)
        stetson_k = (1 / np.sqrt(n)) * np.sum(np.abs(residuals)) / np.sqrt(np.mean(residuals**2))
    else:
        stetson_k = 0
    t_peak = t[idx_max]
    t_rise = t_peak - t[0]
    t_decay = t[-1] - t_peak
    rise_decay_ratio = 0
    if t_decay > 0:
        rise_decay_ratio = t_rise / (t_decay + 1e-6)

    power_law_index = 0
    
    if idx_max < n - 2: 
        post_peak_f = f[idx_max+1:]
        post_peak_t = t[idx_max+1:]
        valid_mask = post_peak_f > 0
        if np.sum(valid_mask) > 1: 
            y_log = np.log(post_peak_f[valid_mask])
            x_log = np.log(post_peak_t[valid_mask] - t_peak + 1.0) 
            if len(np.unique(x_log)) > 1:
                try:
                    p_index, _, _, _, _ = linregress(x_log, y_log)
                    if not np.isnan(p_index):
                        power_law_index = p_index
                except: power_law_index = 0
    
    mask_peak = (t <= t_peak + 10)
    mask_tail = (t > t_peak + 20)
    def get_color_gr(mask):
        f_masked = f[mask]
        fil_masked = fil[mask]
        g_vals = f_masked[fil_masked == 1] 
        r_vals = f_masked[fil_masked == 2] 
        if len(g_vals) > 0 and len(r_vals) > 0:
            return np.mean(g_vals) - np.mean(r_vals)
        return 0.0 
    gr_peak = get_color_gr(mask_peak)
    gr_tail = get_color_gr(mask_tail)
    
    delta_color_gr = gr_tail - gr_peak if gr_tail != 0 and gr_peak != 0 else 0
    half_max_flux = max_f / 2.0
    width_half_max = 0

    mask_high = f > half_max_flux
    if np.sum(mask_high) >= 2:
        t_high = t[mask_high]
        width_half_max = t_high.max() - t_high.min()
    else:
        width_half_max = 0
        
    t_rise_50 = t_peak - t[f > half_max_flux].min() if np.sum(f > half_max_flux) > 0 else 0
    t_fall_50 = t[f > half_max_flux].max() - t_peak if np.sum(f > half_max_flux) > 0 else 0
    
    asymmetry_50 = t_fall_50 / (t_rise_50 + 1e-6)
    
    return [mean, std, sk, kt, amplitude, 
            u_g, g_r, r_i, flux_ratio, len(f),
            means_band[0], means_band[1], means_band[2], slope, stetson_k,
            rise_decay_ratio, power_law_index, delta_color_gr, width_half_max, asymmetry_50]


df_log = pd.read_csv('/kaggle/input/mallorn-dataset/train_log.csv')
TRAIN_Z_MAX = df_log['Z'].max()
TRAIN_EBV_MAX = df_log['EBV'].max()
df_log['Z'] = df_log['Z'] / (df_log['Z'].max() + 1e-6)
df_log['EBV'] = df_log['EBV'] / (df_log['EBV'].max() + 1e-6)

label_encoder = LabelEncoder()
df_log['label_encoded'] = label_encoder.fit_transform(df_log['SpecType'])
num_classes = len(label_encoder.classes_)
tde_index = label_encoder.transform(['TDE'])[0]

class_weights = torch.ones(num_classes, dtype=torch.float32).to(device)

class_weights[tde_index] = 5


static_dict = df_log.set_index('object_id')[['Z', 'EBV']].T.to_dict('list')
id_to_label = dict(zip(df_log['object_id'], df_log['label_encoded']))

all_X_seq = []   
all_X_static = [] 
all_y = []
band_map = {'u': 0, 'g': 1, 'r': 2, 'i': 3, 'z': 4, 'y': 5}
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
        
    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, weight=self.alpha, reduction='none')
        
        pt = torch.exp(-ce_loss)
        
        focal_loss = (1 - pt) ** self.gamma * ce_loss

        if self.reduction == 'mean':
            return torch.mean(focal_loss)
        elif self.reduction == 'sum':
            return torch.sum(focal_loss)
        else:
            return focal_loss
            
for i in range(1, 21): 
    folder_name = f"split_{i:02d}"
    file_path = f"{BASE_PATH}/{folder_name}/train_full_lightcurves.csv"
    df_chunk = pd.read_csv(file_path, usecols=['object_id', 'Flux', 'Time (MJD)', 'Flux_err', 'Filter'], dtype={'Flux': 'float32', 'Time (MJD)': 'float32'})
    grouped = df_chunk.groupby('object_id').agg({'Flux': list, 'Time (MJD)': list, 'Flux_err': list, 'Filter': list}).to_dict(orient='index')
    
    for obj_id, data in grouped.items():
        fluxes = np.array(data['Flux'], dtype=np.float32)
        times = np.array(data['Time (MJD)'], dtype=np.float32)
        filters = np.array([band_map.get(f, 0) for f in data['Filter']])
        flux_errs = np.array(data['Flux_err'], dtype=np.float32)
        
        sorted_idx = np.argsort(times)
        times = times[sorted_idx]
        fluxes = fluxes[sorted_idx]
        flux_errs = flux_errs[sorted_idx]
        filters = filters[sorted_idx]
        
        phys_feats = extract_features(fluxes, times, flux_errs, filters)
        
        f_mean = np.mean(fluxes)
        f_std = np.std(fluxes) + 1e-6
        fluxes = (fluxes - f_mean) / f_std
        fluxes = fluxes.tolist()
        
        flux_errs = flux_errs / f_std
        flux_errs = flux_errs.tolist()
        
        start_time = times[0]
        norm_times = times - start_time
        norm_times = norm_times.tolist()

        filters = filters.tolist()

        if len(fluxes) > SEQ_LEN:
            fluxes = fluxes[:SEQ_LEN]
            norm_times = norm_times[:SEQ_LEN]
            flux_errs = flux_errs[:SEQ_LEN]
            filters = filters[:SEQ_LEN]
        else:
            pad_len = SEQ_LEN - len(fluxes)
            fluxes = fluxes + [0.0] * pad_len
            norm_times = norm_times + [0.0] * pad_len
            flux_errs = flux_errs + [0.0] * pad_len 
            filters = filters + [0] * pad_len
            
        combined_seq = [[f, t, err, filt] for f, t, err, filt in zip(fluxes, norm_times, flux_errs, filters)]
        current_static = static_dict[obj_id] + phys_feats
        
        all_X_seq.append(combined_seq)
        all_X_static.append(current_static)
        all_y.append(id_to_label[obj_id])
        
    del df_chunk, grouped
    gc.collect()

X_seq_tensor = torch.tensor(all_X_seq, dtype=torch.float32)
X_seq_tensor = torch.nan_to_num(X_seq_tensor, nan=0.0)

X_static_tensor = torch.tensor(all_X_static, dtype=torch.float32)
X_static_tensor = torch.nan_to_num(X_static_tensor, nan=0.0)

mean_static = X_static_tensor.mean(dim=0)
std_static = X_static_tensor.std(dim=0) + 1e-6
X_static_tensor = (X_static_tensor - mean_static) / std_static

y_tensor = torch.tensor(all_y, dtype=torch.long)

STATIC_INPUT_DIM = X_static_tensor.shape[1] 

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=500): 
        super(PositionalEncoding, self).__init__()
        
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1), :]
        return x

class Transformer(nn.Module):
    def __init__(self, seq_len, static_input_dim, d_model, num_classes):
        super(Transformer, self).__init__()
        self.filter_embedding = nn.Embedding(num_embeddings=6, embedding_dim=16)
        
        total_input_dim = 3 + 16 + static_input_dim 
        self.input_proj = nn.Linear(total_input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, max_len=seq_len + 50)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model = d_model,
            nhead = 4,
            dim_feedforward=256,
            batch_first=True,
            dropout = 0.3
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers = 4)
        
        self.fc = nn.Sequential(
            nn.Linear(d_model, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128, num_classes)
        )

    def forward(self, x_seq, x_static):
        batch_size, seq_len, _ = x_seq.shape
        
        numeric_feats = x_seq[:, :, :3]
        filter_ids = x_seq[:, :, 3].long()
        filter_emb = self.filter_embedding(filter_ids)
        
        padding_mask = (numeric_feats[:, :, 0] != 0).float().unsqueeze(-1)
        
        static_expanded = x_static.unsqueeze(1).repeat(1, seq_len, 1)
        static_expanded = static_expanded * padding_mask
        
        combined_input = torch.cat([numeric_feats, filter_emb, static_expanded], dim=-1)
        
        x = self.input_proj(combined_input)
        
        src_key_padding_mask = (numeric_feats[:, :, 0] == 0)
        x = self.transformer(x, src_key_padding_mask=src_key_padding_mask)
        
        x = torch.nan_to_num(x, nan=0.0)
        x_max = x.max(dim=1)[0]
        x_mean = x.mean(dim=1)
        x_feat = x_max + x_mean
        
        res = self.fc(x_feat)
        return res

class LSTMModel(nn.Module):
    def __init__(self, static_input_dim, hidden_dim, num_classes, num_layers=2):
        super(LSTMModel, self).__init__()
        
        self.filter_embedding = nn.Embedding(num_embeddings=6, embedding_dim=16)
        
        input_dim = 3 + 16 
        
        self.lstm = nn.LSTM(
            input_size=input_dim, 
            hidden_size=hidden_dim, 
            num_layers=num_layers, 
            batch_first=True, 
            bidirectional=True,
            dropout=0.3
        )
        
        self.static_proj = nn.Sequential(
            nn.Linear(static_input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2)
        )
        
        combined_dim = (hidden_dim * 4) + hidden_dim
        
        self.fc = nn.Sequential(
            nn.Linear(combined_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128, num_classes)
        )

    def forward(self, x_seq, x_static):
        numeric_feats = x_seq[:, :, :3]
        filter_ids = x_seq[:, :, 3].long()
        filter_emb = self.filter_embedding(filter_ids)
        lstm_input = torch.cat([numeric_feats, filter_emb], dim=-1)
        out, _ = self.lstm(lstm_input)
        
        avg_pool = torch.mean(out, dim=1)
        max_pool, _ = torch.max(out, dim=1)
        seq_feat = torch.cat([avg_pool, max_pool], dim=1) 
        
        static_feat = self.static_proj(x_static) 
        
        final_feat = torch.cat([seq_feat, static_feat], dim=1)
        return self.fc(final_feat)
        
N_FOLDS = 4
skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=42)
fold_best_thresholds = []
for fold, (train_idx, val_idx) in enumerate(skf.split(X_seq_tensor, y_tensor.cpu().numpy())):
    print(f"\n{'='*20} FOLD {fold+1}/{N_FOLDS} {'='*20}")

    X_seq_train_fold = X_seq_tensor[train_idx]
    X_static_train_fold = X_static_tensor[train_idx]
    y_train_fold = y_tensor[train_idx]
    
    X_seq_val_fold = X_seq_tensor[val_idx]
    X_static_val_fold = X_static_tensor[val_idx]
    y_val_fold = y_tensor[val_idx]

    train_ds = torch.utils.data.TensorDataset(X_seq_train_fold, X_static_train_fold, y_train_fold)
    val_ds = torch.utils.data.TensorDataset(X_seq_val_fold, X_static_val_fold, y_val_fold)
    
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)

    model = LSTMModel(
        static_input_dim=STATIC_INPUT_DIM,
        hidden_dim=128,      
        num_classes=num_classes,
        num_layers=2         
    ).to(device)
    
    criterion = FocalLoss(alpha=class_weights, gamma=2.0).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4) 
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)
    fold_best_f1 = 0.0
    fold_best_thr = 0.5
    
    for epoch in range(NUM_EPOCHS):
        model.train()
        total_loss = 0
        
        for batch_seq, batch_static, batch_y in train_loader:
            batch_seq, batch_static, batch_y = batch_seq.to(device), batch_static.to(device), batch_y.to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_seq, batch_static)
            loss = criterion(outputs, batch_y)
            
            if torch.isnan(loss): continue
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            total_loss += loss.item()
            
        avg_train_loss = total_loss / len(train_loader)
        model.eval()
        val_loss = 0
        all_probs = []   
        all_targets = []
        with torch.no_grad():
            for batch_seq, batch_static, batch_y in val_loader:
                batch_seq, batch_static, batch_y = batch_seq.to(device), batch_static.to(device), batch_y.to(device)
                
                outputs = model(batch_seq, batch_static)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item()
                
                probs = torch.softmax(outputs, dim=1)[:, tde_index]
                
                all_probs.extend(probs.cpu().numpy())
                all_targets.extend((batch_y == tde_index).cpu().numpy().astype(int))
        avg_val_loss = val_loss / len(val_loader)
        scheduler.step(avg_val_loss)
        print(f"\nEpoch [{epoch+1}/{NUM_EPOCHS}] | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")       
        thresholds = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
        
        all_probs = np.array(all_probs)
        all_targets = np.array(all_targets)
        
        epoch_best_f1 = 0
        epoch_best_thr = 0
        for thr in thresholds:
            preds = (all_probs > thr).astype(int)
            f1 = f1_score(all_targets, preds, zero_division=0)
            if f1 > epoch_best_f1:
                epoch_best_f1 = f1
                epoch_best_thr = thr
        print(f"   >>> Epoch Best F1: {epoch_best_f1:.4f} at Thr: {epoch_best_thr}")
        if epoch_best_f1 > fold_best_f1 and epoch > 20:
            fold_best_f1 = epoch_best_f1
            fold_best_thr = epoch_best_thr
            
            save_name = f'best_model_fold_{fold}.pth'
            
            torch.save({
                'model_state_dict': model.state_dict(),
                'threshold': fold_best_thr,
                'f1_score': fold_best_f1
            }, save_name)
            
            print(f"   >>> [SAVED] New Best Model for Fold {fold} (F1: {fold_best_f1:.4f})")
            
    print(f"--- Finished Fold {fold+1}. Best F1: {fold_best_f1:.4f} at Threshold: {fold_best_thr} ---")
    fold_best_thresholds.append(fold_best_thr)
    del model, optimizer, train_loader, val_loader, train_ds, val_ds
    torch.cuda.empty_cache()
    gc.collect()

print("Best Thresholds per fold:", fold_best_thresholds)
np.save('folds_thresholds.npy', np.array(fold_best_thresholds))



Epoch [1/30] | Train Loss: 2.9052 | Val Loss: 2.6610
   >>> Epoch Best F1: 0.0000 at Thr: 0

Epoch [2/30] | Train Loss: 2.3667 | Val Loss: 2.1572
   >>> Epoch Best F1: 0.1392 at Thr: 0.1

Epoch [3/30] | Train Loss: 1.9905 | Val Loss: 1.8795
   >>> Epoch Best F1: 0.1170 at Thr: 0.1

Epoch [4/30] | Train Loss: 1.6861 | Val Loss: 1.6009
   >>> Epoch Best F1: 0.1460 at Thr: 0.2

Epoch [5/30] | Train Loss: 1.4397 | Val Loss: 1.3513
   >>> Epoch Best F1: 0.1525 at Thr: 0.2

Epoch [6/30] | Train Loss: 1.2813 | Val Loss: 1.2193
   >>> Epoch Best F1: 0.1434 at Thr: 0.2

Epoch [7/30] | Train Loss: 1.1592 | Val Loss: 1.0975
   >>> Epoch Best F1: 0.1714 at Thr: 0.3

Epoch [8/30] | Train Loss: 1.0623 | Val Loss: 1.0622
   >>> Epoch Best F1: 0.1592 at Thr: 0.2

Epoch [9/30] | Train Loss: 1.0132 | Val Loss: 1.0093
   >>> Epoch Best F1: 0.2338 at Thr: 0.3

Epoch [10/30] | Train Loss: 0.9867 | Val Loss: 0.9823
   >>> Epoch Best F1: 0.2778 at Thr: 0.3

Epoch [11/30] | Train Loss: 0.9434 | Val Loss: 0.

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from scipy.stats import skew, kurtosis
import gc


BASE_PATH = '/kaggle/input/mallorn-dataset'
SEQ_LEN = 150        
BATCH_SIZE_TEST = 128      
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
FINAL_THRESHOLD = 0.35


df_train_full = pd.read_csv(f'{BASE_PATH}/train_log.csv')
unique_labels = sorted(df_train_full['SpecType'].unique())
num_classes = len(unique_labels)


TRAIN_Z_MAX = df_train_full['Z'].max()
TRAIN_EBV_MAX = df_train_full['EBV'].max()
static_dict_train = df_train_full.set_index('object_id')[['Z', 'EBV']].T.to_dict('list')

df_test_log = pd.read_csv(f'{BASE_PATH}/test_log.csv')
df_test_log['Z'] = df_test_log['Z'] / (TRAIN_Z_MAX + 1e-6)
df_test_log['EBV'] = df_test_log['EBV'] / (TRAIN_EBV_MAX + 1e-6)
static_dict_test = df_test_log.set_index('object_id')[['Z', 'EBV']].T.to_dict('list')

models = []
for fold in range(4):
    model = LSTMModel(
        static_input_dim=STATIC_INPUT_DIM,
        hidden_dim=128,      
        num_classes=num_classes,
        num_layers=2         
    ).to(device)
    checkpoint = torch.load(f'best_model_fold_{fold}.pth', map_location=DEVICE)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    models.append(model)

results = {}
mean_static = mean_static.to(DEVICE)
std_static = std_static.to(DEVICE)
with torch.no_grad(): 
    for i in range(1, 21): 
        folder_name = f"split_{i:02d}"
        file_path = f"{BASE_PATH}/{folder_name}/test_full_lightcurves.csv"
        df_chunk = pd.read_csv(file_path, usecols=['object_id', 'Flux', 'Time (MJD)', 'Flux_err', 'Filter'], 
                               dtype={'Flux': 'float32', 'Time (MJD)': 'float32'})
        
        grouped = df_chunk.groupby('object_id').agg({
            'Flux': list, 'Time (MJD)': list, 'Flux_err': list, 'Filter': list
        }).to_dict(orient='index')
        
        batch_ids, batch_seq, batch_static = [], [], []
        
        for obj_id, data in grouped.items():
            fluxes = np.array(data['Flux'], dtype=np.float32)
            times = np.array(data['Time (MJD)'], dtype=np.float32)
            flux_errs = np.array(data['Flux_err'], dtype=np.float32)
            filters = np.array([band_map.get(f, 0) for f in data['Filter']])

            sorted_idx = np.argsort(times)
            times = times[sorted_idx]
            fluxes = fluxes[sorted_idx]
            flux_errs = flux_errs[sorted_idx]
            filters = filters[sorted_idx]
            
            phys_feats = extract_features(fluxes, times, flux_errs, filters)
            
            f_std = np.std(fluxes) + 1e-6
            fluxes = ((fluxes - np.mean(fluxes)) / f_std).tolist()
            flux_errs = (flux_errs / f_std).tolist()
            norm_times = (times - times[0]).tolist()
            filters = filters.tolist()
            
            if len(fluxes) > SEQ_LEN:
                fluxes, norm_times, flux_errs, filters = fluxes[:SEQ_LEN], norm_times[:SEQ_LEN], flux_errs[:SEQ_LEN], filters[:SEQ_LEN]
            else:
                pad = SEQ_LEN - len(fluxes)
                fluxes += [0]*pad; norm_times += [0]*pad; flux_errs += [0]*pad; filters += [0]*pad
            
            batch_ids.append(obj_id)
            batch_seq.append([[f, t, err, filt] for f, t, err, filt in zip(fluxes, norm_times, flux_errs, filters)])
            batch_static.append(static_dict_test[obj_id] + phys_feats)
            
            if len(batch_ids) >= BATCH_SIZE_TEST:
                X_seq = torch.tensor(batch_seq, dtype=torch.float32).to(DEVICE)
                X_stat = torch.tensor(batch_static, dtype=torch.float32).to(DEVICE)
                
                X_seq = torch.nan_to_num(X_seq, nan=0.0)
                X_stat = torch.nan_to_num(X_stat, nan=0.0)
                X_stat = (X_stat - mean_static) / std_static
                X_stat = torch.nan_to_num(X_stat, nan=0.0) 
                
                avg_probs = np.zeros(len(batch_ids))
                for model in models:
                    avg_probs += torch.softmax(model(X_seq, X_stat), dim=1)[:, tde_index].cpu().numpy()
                avg_probs /= len(models)
                
                preds = (avg_probs > FINAL_THRESHOLD).astype(int)
                for oid, p in zip(batch_ids, preds): results[oid] = p
                batch_ids, batch_seq, batch_static = [], [], []

        if len(batch_ids) > 0:
            X_seq = torch.tensor(batch_seq, dtype=torch.float32).to(DEVICE)
            X_stat = torch.tensor(batch_static, dtype=torch.float32).to(DEVICE)
            X_seq = torch.nan_to_num(X_seq, nan=0.0)
            X_stat = torch.nan_to_num(X_stat, nan=0.0)
            X_stat = (X_stat - mean_static) / std_static
            X_stat = torch.nan_to_num(X_stat, nan=0.0)
            
            avg_probs = np.zeros(len(batch_ids))
            for model in models:
                avg_probs += torch.softmax(model(X_seq, X_stat), dim=1)[:, tde_index].cpu().numpy()
            avg_probs /= len(models)
            preds = (avg_probs > FINAL_THRESHOLD).astype(int)
            for oid, p in zip(batch_ids, preds): 
                results[oid] = p
        
        del df_chunk, grouped
        gc.collect()


submission_df = pd.DataFrame({'object_id': df_test_log['object_id']})
submission_df['prediction'] = submission_df['object_id'].map(results).fillna(0).astype(int)
n_pos = submission_df['prediction'].sum()
print(f" Tìm thấy {n_pos} TDE trên tổng số {len(submission_df)} mẫu.")
submission_df.to_csv('submission.csv', index=False)

 Tìm thấy 853 TDE trên tổng số 7135 mẫu.
