In [None]:
import os, glob, cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# For deep models (ResNet, DenseNet, etc.)
import torchvision.models as models
import timm

# For gradient boosting models
from xgboost import XGBClassifier
from catboost import CatBoostClassifier, Pool
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
def get_image_files(folder):
    return sorted(glob.glob(os.path.join(folder, "*.*")))

def get_chunk(file_list, chunk_index, total_chunks=14):
    n = len(file_list)
    chunk_size = n // total_chunks
    start = chunk_index * chunk_size
    end = n if chunk_index == total_chunks - 1 else start + chunk_size
    return file_list[start:end]

def read_image(path, target_size=(32,32)):
    img = cv2.imread(path)
    if img is None:
        raise FileNotFoundError(f"Image not found: {path}")
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, target_size)
    return img.astype(np.float32) / 255.0

In [None]:
class ImageStatsTransform:
    def __call__(self, x):
        # x: (C, H, W)
        mean = x.mean(dim=[1,2], keepdim=True)
        std = x.std(dim=[1,2], keepdim=True) + 1e-6
        return (x - mean) / std

In [None]:
def compute_physics_features_tensor(x, eps=1e-6):
    # x: (B, 3, H, W); channels represent [ecal, hcal, tracks]
    ecal = x[:, 0:1, :, :]
    hcal = x[:, 1:2, :, :]
    tracks = x[:, 2:3, :, :]
    ratio = torch.mean(ecal / (hcal + eps), dim=[2,3])
    mean_tracks = torch.mean(tracks, dim=[2,3])
    diff = torch.mean(ecal - hcal, dim=[2,3])
    norm_diff = torch.mean(torch.abs(ecal - hcal) / (ecal + hcal + eps), dim=[2,3])
    return torch.cat([ratio, mean_tracks, diff, norm_diff], dim=1)  # (B, 4)


In [None]:
class ChannelWiseFPN(nn.Module):
    def __init__(self, in_channels=3, out_channels=16):
        super(ChannelWiseFPN, self).__init__()
        self.conv1x1 = nn.Conv2d(1, out_channels, kernel_size=1, padding=0)
        self.conv3x3 = nn.Conv2d(1, out_channels, kernel_size=3, padding=1)
        self.conv5x5 = nn.Conv2d(1, out_channels, kernel_size=5, padding=2)
        self.fuse_conv = nn.Sequential(
            nn.Conv2d(3*out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=1)
        )
    def forward(self, x):
        outputs = []
        for i in range(x.shape[1]):
            xi = x[:, i:i+1, :, :]
            feat1 = self.conv1x1(xi)
            feat2 = self.conv3x3(xi)
            feat3 = self.conv5x5(xi)
            cat_feats = torch.cat([feat1, feat2, feat3], dim=1)
            fused = self.fuse_conv(cat_feats)
            outputs.append(fused)
        return torch.cat(outputs, dim=1)  # (B, in_channels*out_channels, H, W)

In [None]:
class ResNetWithChannelFPN(nn.Module):
    def __init__(self, num_classes=2):
        super(ResNetWithChannelFPN, self).__init__()
        self.fpn_orig = ChannelWiseFPN(3, 16)
        self.fpn_diff = ChannelWiseFPN(3, 16)
        self.fpn_graph = ChannelWiseFPN(3, 16)
        self.resnet_orig = models.resnet18(pretrained=True)
        self.resnet_diff = models.resnet18(pretrained=True)
        self.resnet_graph = models.resnet18(pretrained=True)
        for branch in [self.resnet_orig, self.resnet_diff, self.resnet_graph]:
            branch.conv1 = nn.Conv2d(48, 64, kernel_size=7, stride=2, padding=3, bias=False)
            branch.fc = nn.Identity()  # Output: 512
        self.classifier = nn.Linear(512 + 4, num_classes)
    def forward(self, x_orig, x_rec, x_graph):
        x_diff = x_orig - x_rec
        feat_orig = self.resnet_orig(self.fpn_orig(x_orig))
        feat_diff = self.resnet_diff(self.fpn_diff(x_diff))
        feat_graph = self.resnet_graph(self.fpn_graph(x_graph))
        deep_feat = (feat_orig + feat_diff + feat_graph) / 3.0
        phys_feat = compute_physics_features_tensor(x_orig)
        combined = torch.cat([deep_feat, phys_feat], dim=1)
        return self.classifier(combined)

In [None]:
class DenseNetWithChannelFPN(nn.Module):
    def __init__(self, num_classes=2):
        super(DenseNetWithChannelFPN, self).__init__()
        self.fpn_orig = ChannelWiseFPN(3, 16)
        self.fpn_diff = ChannelWiseFPN(3, 16)
        self.fpn_graph = ChannelWiseFPN(3, 16)
        self.densenet_orig = models.densenet121(pretrained=True)
        self.densenet_diff = models.densenet121(pretrained=True)
        self.densenet_graph = models.densenet121(pretrained=True)
        for net in [self.densenet_orig, self.densenet_diff, self.densenet_graph]:
            net.features.conv0 = nn.Conv2d(48, 64, kernel_size=7, stride=2, padding=3, bias=False)
            net.classifier = nn.Identity()  # Output: 1024
        self.classifier = nn.Linear(1024 + 4, num_classes)
    def forward(self, x_orig, x_rec, x_graph):
        x_diff = x_orig - x_rec
        feat_orig = self.densenet_orig(self.fpn_orig(x_orig))
        feat_diff = self.densenet_diff(self.fpn_diff(x_diff))
        feat_graph = self.densenet_graph(self.fpn_graph(x_graph))
        deep_feat = (feat_orig + feat_diff + feat_graph) / 3.0
        phys_feat = compute_physics_features_tensor(x_orig)
        combined = torch.cat([deep_feat, phys_feat], dim=1)
        return self.classifier(combined)


In [None]:
class EfficientNetWithChannelFPN(nn.Module):
    def __init__(self, num_classes=2):
        super(EfficientNetWithChannelFPN, self).__init__()
        self.fpn_orig = ChannelWiseFPN(3, 16)
        self.fpn_diff = ChannelWiseFPN(3, 16)
        self.fpn_graph = ChannelWiseFPN(3, 16)
        self.effnet_orig = timm.create_model('efficientnet_b0', pretrained=True)
        self.effnet_diff = timm.create_model('efficientnet_b0', pretrained=True)
        self.effnet_graph = timm.create_model('efficientnet_b0', pretrained=True)
        for net in [self.effnet_orig, self.effnet_diff, self.effnet_graph]:
            net.conv_stem = nn.Conv2d(48, net.conv_stem.out_channels,
                                        kernel_size=net.conv_stem.kernel_size,
                                        stride=net.conv_stem.stride,
                                        padding=net.conv_stem.padding,
                                        bias=False)
            net.classifier = nn.Identity()  # Output: 1280
        self.classifier = nn.Linear(1280 + 4, num_classes)
    def forward(self, x_orig, x_rec, x_graph):
        x_diff = x_orig - x_rec
        feat_orig = self.effnet_orig(self.fpn_orig(x_orig))
        feat_diff = self.effnet_diff(self.fpn_diff(x_diff))
        feat_graph = self.effnet_graph(self.fpn_graph(x_graph))
        deep_feat = (feat_orig + feat_diff + feat_graph) / 3.0
        phys_feat = compute_physics_features_tensor(x_orig)
        combined = torch.cat([deep_feat, phys_feat], dim=1)
        return self.classifier(combined)

In [None]:
class ViTWithChannelFPN(nn.Module):
    def __init__(self, num_classes=2):
        super(ViTWithChannelFPN, self).__init__()
        self.fpn_orig = ChannelWiseFPN(3, 16)
        self.fpn_diff = ChannelWiseFPN(3, 16)
        self.fpn_graph = ChannelWiseFPN(3, 16)
        self.vit_orig = timm.create_model('vit_base_patch16_224', pretrained=True)
        self.vit_diff = timm.create_model('vit_base_patch16_224', pretrained=True)
        self.vit_graph = timm.create_model('vit_base_patch16_224', pretrained=True)
        for net in [self.vit_orig, self.vit_diff, self.vit_graph]:
            net.patch_embed.proj = nn.Conv2d(48, net.patch_embed.proj.out_channels,
                                             kernel_size=net.patch_embed.proj.kernel_size,
                                             stride=net.patch_embed.proj.stride,
                                             padding=net.patch_embed.proj.padding)
            net.head = nn.Identity()
        self.classifier = nn.Linear(768 + 4, num_classes)
    def forward(self, x_orig, x_rec, x_graph):
        x_diff = x_orig - x_rec
        feat_orig = self.vit_orig(self.fpn_orig(x_orig))
        feat_diff = self.vit_diff(self.fpn_diff(x_diff))
        feat_graph = self.vit_graph(self.fpn_graph(x_graph))
        deep_feat = (feat_orig + feat_diff + feat_graph) / 3.0
        phys_feat = compute_physics_features_tensor(x_orig)
        combined = torch.cat([deep_feat, phys_feat], dim=1)
        return self.classifier(combined)

In [None]:
class SwinWithChannelFPN(nn.Module):
    def __init__(self, num_classes=2):
        super(SwinWithChannelFPN, self).__init__()
        self.fpn_orig = ChannelWiseFPN(3, 16)
        self.fpn_diff = ChannelWiseFPN(3, 16)
        self.fpn_graph = ChannelWiseFPN(3, 16)
        self.swin_orig = timm.create_model('swin_base_patch4_window7_224', pretrained=True)
        self.swin_diff = timm.create_model('swin_base_patch4_window7_224', pretrained=True)
        self.swin_graph = timm.create_model('swin_base_patch4_window7_224', pretrained=True)
        for net in [self.swin_orig, self.swin_diff, self.swin_graph]:
            net.patch_embed.proj = nn.Conv2d(48, net.patch_embed.proj.out_channels,
                                             kernel_size=net.patch_embed.proj.kernel_size,
                                             stride=net.patch_embed.proj.stride,
                                             padding=net.patch_embed.proj.padding)
            net.head = nn.Identity()
        self.classifier = nn.Linear(1024 + 4, num_classes)
    def forward(self, x_orig, x_rec, x_graph):
        x_diff = x_orig - x_rec
        feat_orig = self.swin_orig(self.fpn_orig(x_orig))
        feat_diff = self.swin_diff(self.fpn_diff(x_diff))
        feat_graph = self.swin_graph(self.fpn_graph(x_graph))
        deep_feat = (feat_orig + feat_diff + feat_graph) / 3.0
        phys_feat = compute_physics_features_tensor(x_orig)
        combined = torch.cat([deep_feat, phys_feat], dim=1)
        return self.classifier(combined)

In [None]:
def load_checkpoint(model, optimizer, filename):
    if os.path.isfile(filename):
        checkpoint = torch.load(filename, map_location="cpu")
        model.load_state_dict(checkpoint["model_state"])
        optimizer.load_state_dict(checkpoint["optimizer_state"])
        print(f"Loaded checkpoint from {filename} (epoch {checkpoint['epoch']})")
    else:
        print(f"No checkpoint found at {filename}")

In [None]:
class TestQuarkGluonDataset(Dataset):
    def __init__(self, orig_files, rec_files, graph_files, transform=None):
        self.orig_files = orig_files
        self.rec_files = rec_files
        self.graph_files = graph_files
        self.transform = transform
    def __len__(self):
        return len(self.orig_files)
    def __getitem__(self, idx):
        orig = read_image(self.orig_files[idx])
        rec = read_image(self.rec_files[idx])
        graph = read_image(self.graph_files[idx])
        orig = torch.tensor(orig).permute(2,0,1)
        rec = torch.tensor(rec).permute(2,0,1)
        graph = torch.tensor(graph).permute(2,0,1)
        if self.transform:
            orig = self.transform(orig)
            rec = self.transform(rec)
            graph = self.transform(graph)
        return {"original": orig, "reconstructed": rec, "graph": graph}

In [None]:
def get_deep_ensemble_predictions(model_list, dataloader, device):
    model_preds = []
    with torch.no_grad():
        for model in model_list:
            model.eval()
            preds = []
            for batch in dataloader:
                x_orig = batch["original"].to(device)
                x_rec = batch["reconstructed"].to(device)
                x_graph = batch["graph"].to(device)
                outputs = torch.softmax(model(x_orig, x_rec, x_graph), dim=1)[:,1]  # probability of class 1
                preds.append(outputs.cpu().numpy())
            model_preds.append(np.concatenate(preds))
    # Average predictions across models
    deep_ensemble = np.mean(np.stack(model_preds, axis=0), axis=0)
    return deep_ensemble

In [None]:
def main_final():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    transform = ImageStatsTransform()
    
    # Update folder paths based on new structure.
    orig_folder = "/kaggle/input/genie-extracted-dataset"
    rec_folders = [
        "/kaggle/input/genie-output-part-1/reconstructions",
        "/kaggle/input/genie-common-task-1-output-part-2final/reconstructions"
    ]
    graph_folders = [
        "/kaggle/input/common-task-2-dataset-part-1/processed_jet_graphs",
        "/kaggle/input/output-part-2-of-task-2/processed_jet_graphs",
        "/kaggle/input/output-part-3-of-task-2/processed_jet_graphs",
        "/kaggle/input/part-4-task-2-output/processed_jet_graphs"
    ]
    
    # Load original image files.
    orig_files = get_image_files(orig_folder)
    
    # Gather reconstructed files from both parts.
    rec_files = []
    for folder in rec_folders:
        rec_files.extend(get_image_files(folder))
    
    # Gather graph files from all parts, filtering out specified files.
    graph_files = []
    for folder in graph_folders:
        files = get_image_files(folder)
        ignore_files = []
        if "common-task-2-dataset-part-1" in folder:
            ignore_files.append("processed_chunk_120000_130000.pt")
        if "output-part-2-of-task-2" in folder:
            ignore_files.append("processed_chunk_40000_50000.pt")
        if "output-part-3-of-task-2" in folder:
            ignore_files.append("processed_chunk_80000_90000.pt")
        filtered = [f for f in files if os.path.basename(f) not in ignore_files]
        graph_files.extend(filtered)
    
    # Create the test dataset and dataloader.
    test_dataset = TestQuarkGluonDataset(orig_files, rec_files, graph_files, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=2)
    
    # Instantiate all five deep models and load their checkpoints.
    resnet_model = ResNetWithChannelFPN(num_classes=2).to(device)
    dense_model  = DenseNetWithChannelFPN(num_classes=2).to(device)
    effnet_model = EfficientNetWithChannelFPN(num_classes=2).to(device)
    vit_model    = ViTWithChannelFPN(num_classes=2).to(device)
    swin_model   = SwinWithChannelFPN(num_classes=2).to(device)
    
    # Dummy optimizer for checkpoint loading.
    optimizer = optim.Adam(resnet_model.parameters(), lr=1e-4)
    load_checkpoint(resnet_model, optimizer, "resnet_checkpoint.pth")
    load_checkpoint(dense_model, optimizer, "dense_checkpoint.pth")
    load_checkpoint(effnet_model, optimizer, "effnet_checkpoint.pth")
    load_checkpoint(vit_model, optimizer, "vit_checkpoint.pth")
    load_checkpoint(swin_model, optimizer, "swin_checkpoint.pth")
    
    deep_models = [resnet_model, dense_model, effnet_model, vit_model, swin_model]
    deep_preds = get_deep_ensemble_predictions(deep_models, test_loader, device)
    
    # -------------------------------
    # Merge Deep Predictions with Metadata
    # -------------------------------
    # Load metadata (must contain sample_id, n0, pt, label, etc.)
    meta_df = pd.read_csv("/kaggle/input/metadata/metadata.csv")
    # Assume the order of images in meta_df corresponds to the order in the test dataset.
    meta_df["deep_ensemble_pred"] = deep_preds
    
    # Engineer additional meta features.
    meta_df["n0_pt_ratio"] = meta_df["n0"] / (meta_df["pt"] + 1e-6)
    meta_df["n0_plus_pt"] = meta_df["n0"] + meta_df["pt"]
    deep_cols = ["deep_ensemble_pred"]
    meta_df["meta_deep_avg"] = meta_df[deep_cols].mean(axis=1)
    
    # -------------------------------
    # Train Diverse Gradient Boosting Models on Meta Data
    # -------------------------------
    # Features used: deep ensemble prediction, n0, pt, and engineered features.
    features = ["deep_ensemble_pred", "n0", "pt", "n0_pt_ratio", "n0_plus_pt", "meta_deep_avg"]
    target = "label"  # Must exist in meta_df.
    
    X = meta_df[features]
    y = meta_df[target]
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train XGBoost.
    xgb_model = XGBClassifier(use_label_encoder=False, eval_metric="logloss", random_state=42)
    xgb_model.fit(X_train, y_train)
    xgb_preds = xgb_model.predict_proba(X_val)[:, 1]
    
    # Train CatBoost (suppress output).
    cat_model = CatBoostClassifier(silent=True, random_state=42)
    cat_model.fit(X_train, y_train)
    cat_preds = cat_model.predict_proba(X_val)[:, 1]
    
    # Train Histogram-based Gradient Boosting.
    hist_model = HistGradientBoostingClassifier(random_state=42)
    hist_model.fit(X_train, y_train)
    hist_preds = hist_model.predict_proba(X_val)[:, 1]
    
    # Ensemble the GBT predictions (average).
    gbt_ensemble_val = (xgb_preds + cat_preds + hist_preds) / 3.0
    print("Validation Accuracy of GBT Ensemble:",
          accuracy_score(y_val, (gbt_ensemble_val > 0.5).astype(int)))
    
    # Predict on the full meta data using the three models and average.
    xgb_full = xgb_model.predict_proba(X)[:, 1]
    cat_full = cat_model.predict_proba(X)[:, 1]
    hist_full = hist_model.predict_proba(X)[:, 1]
    gbt_ensemble_full = (xgb_full + cat_full + hist_full) / 3.0
    
    # -------------------------------
    # Final Ensemble: Average Deep Ensemble and GBT Ensemble Predictions
    # -------------------------------
    meta_df["final_pred_prob"] = (meta_df["deep_ensemble_pred"] + gbt_ensemble_full) / 2.0
    meta_df["final_prediction"] = (meta_df["final_pred_prob"] > 0.5).astype(int)
    
    # Save final predictions.
    meta_df.to_csv("final_ensemble_predictions.csv", index=False)
    print("Final ensemble predictions saved to final_ensemble_predictions.csv")

In [None]:
if __name__ == "__main__":
    main_final()