**Group Name:** Group 10

**Members:** Wang Shenglin, Fu Qianli, Guo Hoi I

In [2]:
import os
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import timm
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, f1_score

# ==================== Config ====================
img_folder = "" # the path of test images folder
label_csv_path = "" # the path of CSV 


ensemble_config = [
    "checkpoints/convnextv2_large.fcmae_ft_in22k_in1k_384_best.pth:convnextv2_large.fcmae_ft_in22k_in1k_384:384:1.0",
    "checkpoints/tf_efficientnet_b7_best.pth:tf_efficientnet_b7.ns_jft_in1k:384:1.0",
    "checkpoints/tf_efficientnet_b5_best.pth:tf_efficientnet_b5.ns_jft_in1k:512:1.2",
    "checkpoints/deit3_large_patch16_384.fb_in22k_ft_in1k_best.pth:deit3_large_patch16_384.fb_in22k_ft_in1k:384:0.8"
]

tta = 8
threshold = 0.5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ==========================================================
# =============== Dataset + DataLoader ======================
class AIGCDataset(Dataset):
    def __init__(self, df, img_dir):
        self.df = df
        self.img_dir = img_dir

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_id = row["ID"]
        label = row["label"]

        img_path = os.path.join(self.img_dir, str(img_id) + ".jpg")

        return {
            "img_path": img_path,
            "id": img_id,
            "label": int(label)
        }


@torch.no_grad()
def predict_tta(model, img, transform, tta_times=8):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    preds = []

    for i in range(tta_times):
        if i == 0:
            aug = img
        elif i == 1:
            aug = cv2.flip(img, 1)
        elif i == 2:
            aug = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
        elif i == 3:
            aug = cv2.rotate(img, cv2.ROTATE_180)
        elif i == 4:
            aug = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
        elif i == 5:
            aug = cv2.flip(img, 0)
        elif i == 6:
            aug = cv2.flip(img, -1)
        else:
            aug = img

        augmented = transform(image=aug)
        x = augmented["image"].unsqueeze(0).to(device)

        logit = model(x)
        prob = torch.sigmoid(logit).item()
        preds.append(prob)

    return np.mean(preds)


# -----------------------------------------------------------
# ------------------- Model Loading ------------------------------
def load_model(ckpt_path, model_name, size):
    if "tf_efficientnet_b5" in model_name:
        model_name = 'tf_efficientnet_b5'
    elif "tf_efficientnet_b7" in model_name:
        model_name = 'tf_efficientnet_b7'

    model = timm.create_model(model_name, pretrained=False, num_classes=1)
    
    state_dict = torch.load(ckpt_path, map_location="cpu")
    if "model" in state_dict:
        state_dict = state_dict["model"]
    if "state_dict" in state_dict:
        state_dict = state_dict["state_dict"]

    if list(state_dict.keys())[0].startswith("module."):
        state_dict = {k[7:]: v for k, v in state_dict.items()}

    model.load_state_dict(state_dict, strict=True)
    model.to(device)
    model.eval()

    transform = A.Compose([
        A.Resize(size, size),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ])
    return model, transform


# ==========================================================
# ----------------------- Main Train---------------------------
print("Loading ground truth CSV...")
gt_df = pd.read_csv(label_csv_path, dtype={"ID": str})
dataset = AIGCDataset(gt_df, img_folder)
loader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=4)

models = []
transforms = []
weights = []
print("Loading models...")

for item in ensemble_config:
    ckpt, name, sz, w = item.split(':')
    sz = int(sz)
    w = float(w)
    print(f" → {os.path.basename(ckpt)} | {name} | {sz} | weight={w}")

    model, transform = load_model(ckpt, name, sz)
    models.append(model)
    transforms.append(transform)
    weights.append(w)

weights = np.array(weights)
weights = weights / weights.sum()
print("Normalized weights:", weights)


results = []

print("\nStarting inference with DataLoader ...\n")
for batch in tqdm(loader):
    img_path = batch["img_path"][0]
    img_id = batch["id"][0]
    true_label = int(batch["label"][0])

    img = cv2.imread(img_path)
    if img is None:
        print(f"Image read failed: {img_path}")
        pred_prob = 0.5
    else:
        probs = []
        for model, transform, w in zip(models, transforms, weights):
            prob = predict_tta(model, img, transform, tta)
            probs.append(prob * w)
        pred_prob = sum(probs)

    pred_label = 1 if pred_prob > threshold else 0

    results.append({
        "ID": img_id,
        "true_label": true_label,
        "pred_label": pred_label,
        "pred_prob": pred_prob
    })

result_df = pd.DataFrame(results)

# ================== Metric ==================
y_true = result_df["true_label"].values
y_pred = result_df["pred_label"].values

acc = accuracy_score(y_true, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(
    y_true, y_pred, average="binary", zero_division=0
)

final_score = 0.3 * precision + 0.3 * recall + 0.4 * f1

print("\n================ Final Scores ================")
print(f"Accuracy : {acc:.6f}")
print(f"Precision : {precision:.6f}")
print(f"Recall : {recall:.6f}")
print(f"F1 Score : {f1:.6f}")
print(f"→→ Final Score = {final_score:.6f} ←←")
print("=============================================\n")

Using device: cuda
Loading ground truth CSV...
Loading models...
 → convnextv2_large.fcmae_ft_in22k_in1k_384_best.pth | convnextv2_large.fcmae_ft_in22k_in1k_384 | 384 | weight=1.0


  state_dict = torch.load(ckpt_path, map_location="cpu")


 → tf_efficientnet_b7_best_retrain.pth | tf_efficientnet_b7.ns_jft_in1k | 384 | weight=1.0
 → tf_efficientnet_b5_best.pth | tf_efficientnet_b5.ns_jft_in1k | 512 | weight=1.2
 → deit3_large_patch16_384.fb_in22k_ft_in1k_best.pth | deit3_large_patch16_384.fb_in22k_ft_in1k | 384 | weight=0.8
Normalized weights: [0.25 0.25 0.3  0.2 ]

Starting inference with DataLoader ...



100%|█████████████████████████████████████████| 157/157 [00:50<00:00,  3.09it/s]


Accuracy : 1.000000
Precision : 1.000000
Recall : 1.000000
F1 Score : 1.000000
→→ Final Score = 1.000000 ←←




