In [1]:
import sys
import os
import random
import json
import gc
import cv2
import pandas as pd
import numpy as np
import tensorflow as tf

from tqdm import tqdm
from PIL import Image
from sklearn.metrics import accuracy_score
from functools import partial
from albumentations import (Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, CenterCrop, 
                            HorizontalFlip, VerticalFlip, Rotate, ShiftScaleRotate, Transpose)
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform

from tensorflow import keras

sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset

2025-05-25 12:17:42.246372: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748175462.436451      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748175462.489907      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  data = fetch_version_info()


In [2]:
path = "/kaggle/input/cassava-leaf-disease-classification/"
image_path = path+"test_images/"

IMAGE_SIZE = (512,512)
submission_df = pd.DataFrame(columns=("image_id","label"))
submission_df["image_id"] = os.listdir(image_path)
submission_df["label"] = 0

In [3]:
used_models_pytorch = {"noefficientnet": [f'../input/efficientnet-mixup-512/efficientnetv2_rw_s_fold{fold}_best.pth' for fold in [0,1,2,3,4]],
                        # "convnext":[f'../input/convnext-mixup/convnext_tiny_fold{fold}_best.pth' for fold in [0,1,2,3,4]],
                        "newconvnext":[f'../input/convnextv2-small/convnextv2_small_cassava_8903.pth'],
                        # "noresnext_old": [f'../input/resnext50/resnext50_32x4d_fold{fold}_best.pth' for fold in [0,1,2,3,4]],
                        "resnext_new": [f'../input/testrestnet/resnext50_32x4d_fold{fold}_best.pth' for fold in [0,1,2,3,4]],
                        "tf_efficientnetv2_xl":[f"../input/5fold-effxl-2/eff_fold{fold}/home/ccwang/dennis/dennislin0906/cvdl-final/5fold/tf_efficientnetv2_xl_fold{fold}_best.pth" for fold in [0,1,2,3,4]],
                        "tf_efficientnetv2_l_in21k":[f"../input/5fold-eff-l-v2/eff_fold{fold}/home/ccwang/dennis/dennislin0906/cvdl-final/v2_l_5fold/tf_efficientnetv2_l_fold{fold}_best.pth" for fold in [0,1,2,3,4]],
                        "swintransformer":["../input/swintransformer/swin_base_cassava_8857.pth"]}
used_models_keras = {"mobilenet": "../input/mobilenet/1426ba4824f415259798eb0bd7379e39cb679725"}


In [4]:

class ConvnextModel(nn.Module):
    def __init__(self, model_name='convnext_tiny', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        # print(self.model)
        n_features = self.model.head.fc.in_features
        self.model.head.fc = nn.Linear(n_features, 5)

    def forward(self, x):
        x = self.model(x)
        return x

class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_path_id'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        image = cv2.imread(file_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

if "convnext" in used_models_pytorch:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def get_transforms():
        return Compose([Resize(512, 512),
                        Transpose(p=0.5),
                        HorizontalFlip(p=0.5),
                        VerticalFlip(p=0.5),
                        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                        ToTensorV2()])

    def inference(model, states, test_loader, device):
        model.to(device)

        probabilities = []
        for i, (images) in enumerate(test_loader):
            images = images.to(device)
            avg_preds = []
            for state in states:
                model.load_state_dict(state['model'])
                model.eval()
                with torch.no_grad():
                    y_preds = model(images)
                avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
            avg_preds = np.mean(avg_preds, axis=0)
            probabilities.append(avg_preds)
        return np.concatenate(probabilities)
    

    predictions_convnext = pd.DataFrame(columns=("image_id",))
    predictions_convnext["image_id"] = submission_df["image_id"].values
    predictions_convnext['image_path_id'] = image_path + predictions_convnext['image_id'].astype(str)

    model = ConvnextModel('convnext_tiny', pretrained=False)
    states = [torch.load(f) for f in used_models_pytorch["convnext"]]

    test_dataset = TestDataset(predictions_convnext, transform=get_transforms())
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
    predictions = inference(model, states, test_loader, device)

    predictions_convnext['convnext'] = [np.squeeze(p) for p in predictions]
    predictions_convnext = predictions_convnext.drop(["image_path_id"], axis=1)
    

    torch.cuda.empty_cache()
    try:
        del(model)
        del(states)
    except:
        pass
    gc.collect()

In [5]:
# class coatnet(nn.Module):
#     def __init__(self, model_name='coatnet_0_rw_224.sw_in1k', pretrained=False):
#         super().__init__()
#         self.model = timm.create_model(model_name, pretrained=pretrained)
#         self.model.reset_classifier(num_classes=5)

#     def forward(self, x):
#         return self.model(x)

# class TestDataset(Dataset):
#     def __init__(self, df, transform=None):
#         self.df = df
#         self.file_names = df['image_path_id'].values
#         self.transform = transform

#     def __len__(self):
#         return len(self.df)

#     def __getitem__(self, idx):
#         file_name = self.file_names[idx]
#         image = cv2.imread(file_name)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#         if self.transform:
#             augmented = self.transform(image=image)
#             image = augmented['image']
#         return image

# if "coatnet" in used_models_pytorch:
#     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#     def get_transforms():
#         return Compose([Resize(224, 224),
#                         Transpose(p=0.5),
#                         HorizontalFlip(p=0.5),
#                         VerticalFlip(p=0.5),
#                         Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#                         ToTensorV2()])

#     def inference(model, states, test_loader, device):
#         model.to(device)

#         probabilities = []
#         for i, (images) in enumerate(test_loader):
#             images = images.to(device)
#             avg_preds = []
#             for state in states:
#                 model.load_state_dict(state['model'])
#                 model.eval()
#                 with torch.no_grad():
#                     y_preds = model(images)
#                 avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
#             avg_preds = np.mean(avg_preds, axis=0)
#             probabilities.append(avg_preds)
#         return np.concatenate(probabilities)
    

#     predictions_coatnet = pd.DataFrame(columns=("image_id",))
#     predictions_coatnet["image_id"] = submission_df["image_id"].values
#     predictions_coatnet['image_path_id'] = image_path + predictions_coatnet['image_id'].astype(str)

#     model = coatnet('coatnet_0_rw_224.sw_in1k', pretrained=False)
#     states = [torch.load(f) for f in used_models_pytorch["coatnet"]]

#     test_dataset = TestDataset(predictions_coatnet, transform=get_transforms())
#     test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
#     predictions = inference(model, states, test_loader, device)

#     predictions_coatnet['coatnet'] = [np.squeeze(p) for p in predictions]
#     predictions_coatnet = predictions_coatnet.drop(["image_path_id"], axis=1)
    

#     torch.cuda.empty_cache()
#     try:
#         del(model)
#         del(states)
#     except:
#         pass
#     gc.collect()

In [6]:
class EfficientNetV2(nn.Module):
    def __init__(self, model_name='efficientnetv2_rw_s', pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)

        # 根據模型類型選擇 head
        if hasattr(self.model, 'classifier'):  # e.g., efficientnetv2
            n_features = self.model.classifier.in_features
            self.model.classifier = nn.Linear(n_features, 5)
        elif hasattr(self.model.head, 'fc'):  # e.g., convnext
            n_features = self.model.head.fc.in_features
            self.model.head.fc = nn.Linear(n_features, 5)
        else:
            raise NotImplementedError("Unsupported model structure for head replacement.")

    def forward(self, x):
        return self.model(x)
        
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_path_id'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        image = cv2.imread(file_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

if "efficientnet" in used_models_pytorch:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def get_transforms():
        return Compose([CenterCrop(512,512),
                        Transpose(p=0.5),
                        HorizontalFlip(p=0.5),
                        VerticalFlip(p=0.5),
                        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                        ToTensorV2()])

    def inference(model, states, test_loader, device):
        model.to(device)

        probabilities = []
        for i, (images) in enumerate(test_loader):
            images = images.to(device)
            avg_preds = []
            for state in states:
                model.load_state_dict(state['model'])
                model.eval()
                with torch.no_grad():
                    y_preds = model(images)
                avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
            avg_preds = np.mean(avg_preds, axis=0)
            probabilities.append(avg_preds)
        return np.concatenate(probabilities)
    

    predictions_efficient = pd.DataFrame(columns=("image_id",))
    predictions_efficient["image_id"] = submission_df["image_id"].values
    predictions_efficient['image_path_id'] = image_path + predictions_efficient['image_id'].astype(str)

    model = EfficientNetV2('efficientnetv2_rw_s', pretrained=False)
    states = [torch.load(f) for f in used_models_pytorch["efficientnet"]]

    test_dataset = TestDataset(predictions_efficient, transform=get_transforms())
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
    predictions = inference(model, states, test_loader, device)

    predictions_efficient['efficientnet'] = [np.squeeze(p) for p in predictions]
    predictions_efficient = predictions_efficient.drop(["image_path_id"], axis=1)
    

    torch.cuda.empty_cache()
    try:
        del(model)
        del(states)
    except:
        pass
    gc.collect()

In [7]:
# ====================================================
# model initialization
# ====================================================

class CustomResNext(nn.Module):
    def __init__(self, model_name='resnext50_32x4d.a1h_in1k', pretrained=False):
        super().__init__()
        # self.model = timm.create_model(model_name, pretrained=pretrained)

        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, 5)
        # # 根據模型類型選擇 head
        # if hasattr(self.model, 'classifier'):  # e.g., efficientnetv2
        #     n_features = self.model.classifier.in_features
        #     self.model.classifier = nn.Linear(n_features, CFG.target_size)
        # elif hasattr(self.model.head, 'fc'):  # e.g., convnext
        #     n_features = self.model.head.fc.in_features
        #     self.model.head.fc = nn.Linear(n_features, CFG.target_size)
        # else:
        #     raise NotImplementedError("Unsupported model structure for head replacement.")

    def forward(self, x):
        return self.model(x)
        
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_path_id'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        image = cv2.imread(file_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image


In [8]:
if "resnext_old" in used_models_pytorch:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def get_transforms():
        return Compose([Resize(512, 512),
                        Transpose(p=0.5),
                        HorizontalFlip(p=0.5),
                        VerticalFlip(p=0.5),
                        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                        ToTensorV2()])

    def inference(model, states, test_loader, device):
        model.to(device)

        probabilities = []
        for i, (images) in enumerate(test_loader):
            images = images.to(device)
            avg_preds = []
            for state in states:
                model.load_state_dict(state['model'])
                model.eval()
                with torch.no_grad():
                    y_preds = model(images)
                avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
            avg_preds = np.mean(avg_preds, axis=0)
            probabilities.append(avg_preds)
        return np.concatenate(probabilities)
    

    predictions_resnext_old = pd.DataFrame(columns=("image_id",))
    predictions_resnext_old["image_id"] = submission_df["image_id"].values
    predictions_resnext_old['image_path_id'] = image_path + predictions_resnext_old['image_id'].astype(str)

    model = CustomResNext('resnext50_32x4d', pretrained=False)
    states = [torch.load(f) for f in used_models_pytorch["resnext_old"]]

    test_dataset = TestDataset(predictions_resnext_old, transform=get_transforms())
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
    predictions = inference(model, states, test_loader, device)

    predictions_resnext_old['resnext_old'] = [np.squeeze(p) for p in predictions]
    predictions_resnext_old = predictions_resnext_old.drop(["image_path_id"], axis=1)
    

    torch.cuda.empty_cache()
    try:
        del(model)
        del(states)
    except:
        pass
    gc.collect()

In [9]:
if "resnext_new" in used_models_pytorch:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def get_transforms():
        return Compose([Resize(512, 512),
                        Transpose(p=0.5),
                        HorizontalFlip(p=0.5),
                        VerticalFlip(p=0.5),
                        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                        ToTensorV2()])

    def inference(model, states, test_loader, device):
        model.to(device)

        probabilities = []
        for i, (images) in enumerate(test_loader):
            images = images.to(device)
            avg_preds = []
            for state in states:
                model.load_state_dict(state['model'])
                model.eval()
                with torch.no_grad():
                    y_preds = model(images)
                avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
            avg_preds = np.mean(avg_preds, axis=0)
            probabilities.append(avg_preds)
        return np.concatenate(probabilities)
    

    predictions_resnext_new = pd.DataFrame(columns=("image_id",))
    predictions_resnext_new["image_id"] = submission_df["image_id"].values
    predictions_resnext_new['image_path_id'] = image_path + predictions_resnext_new['image_id'].astype(str)

    model = CustomResNext('resnext50_32x4d', pretrained=False)
    states = [torch.load(f) for f in used_models_pytorch["resnext_new"]]

    test_dataset = TestDataset(predictions_resnext_new, transform=get_transforms())
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
    predictions = inference(model, states, test_loader, device)

    predictions_resnext_new['resnext_new'] = [np.squeeze(p) for p in predictions]
    predictions_resnext_new = predictions_resnext_new.drop(["image_path_id"], axis=1)
    

    torch.cuda.empty_cache()
    try:
        del(model)
        del(states)
    except:
        pass
    gc.collect()

  states = [torch.load(f) for f in used_models_pytorch["resnext_new"]]


In [10]:
class EfficientNetClassifier(nn.Module):
    def __init__(self, num_classes: int = 5, pretrained: bool = True):
        super().__init__()
        # EfficientNet-B4，僅取最後一層 feature map（stride=32）
        self.eff = timm.create_model(
            "tf_efficientnetv2_xl",
            pretrained=pretrained,
            features_only=True,
            out_indices=[-1],
        )
        eff_ch = self.eff.feature_info.channels()[-1]  # 1792

        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(eff_ch, num_classes)
        )


    def forward(self, x):
        feats = self.eff(x)[0]  # (B, 1792, H/32, W/32)
        return self.classifier(feats)


class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_path_id'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        image = cv2.imread(file_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image


In [11]:
def _clean_state_dict(state):
    if next(iter(state)).startswith("module."):
        return {k.replace("module.", ""): v for k, v in state.items()}
    return state



In [12]:
if "tf_efficientnetv2_xl" in used_models_pytorch:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def get_transforms():
        return Compose([Resize(512, 512),
                        Transpose(p=0.5),
                        HorizontalFlip(p=0.5),
                        VerticalFlip(p=0.5),
                        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                        ToTensorV2()])

    def inference(model, states, test_loader, device):
        model.to(device)

        probabilities = []
        for i, (images) in enumerate(test_loader):
            images = images.to(device)
            avg_preds = []
            for state in states:
                ckpt = torch.load(state, map_location="cpu")
                model.load_state_dict(_clean_state_dict(ckpt["model"]), strict=False)
                model.to(device)
                model.eval()
                with torch.no_grad():
                    y_preds = model(images)
                avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
            avg_preds = np.mean(avg_preds, axis=0)
            probabilities.append(avg_preds)
        return np.concatenate(probabilities)
    



    predictions_test = pd.DataFrame(columns=("image_id",))
    predictions_test["image_id"] = submission_df["image_id"].values
    predictions_test['image_path_id'] = image_path + predictions_test['image_id'].astype(str)

    model = EfficientNetClassifier(pretrained=False)
    states = [f for f in used_models_pytorch["tf_efficientnetv2_xl"]]

    test_dataset = TestDataset(predictions_test, transform=get_transforms())
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
    predictions = inference(model, states, test_loader, device)

    predictions_test['tf_efficientnetv2_xl'] = [np.squeeze(p) for p in predictions]
    predictions_test = predictions_test.drop(["image_path_id"], axis=1)
    

    torch.cuda.empty_cache()
    try:
        del(model)
        del(states)
    except:
        pass
    gc.collect()

  ckpt = torch.load(state, map_location="cpu")


In [13]:
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_path_id'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        image = cv2.imread(file_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

if "swintransformer" in used_models_pytorch:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def get_transforms():
        return Compose([Resize(384, 384),
                        # Transpose(p=0.5),
                        # HorizontalFlip(p=0.5),
                        # VerticalFlip(p=0.5),
                        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                        ToTensorV2()])

    def inference(model, states, test_loader, device):
        model.to(device)

        probabilities = []
        for i, (images) in enumerate(test_loader):
            images = images.to(device)
            avg_preds = []
            for state in states:
                ckpt = torch.load(state, map_location="cpu")
                model.load_state_dict(_clean_state_dict(ckpt), strict=False)
                model.eval()
                with torch.no_grad():
                    y_preds = model(images)
                avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
            avg_preds = np.mean(avg_preds, axis=0)
            probabilities.append(avg_preds)
        return np.concatenate(probabilities)
    

    predictions_swintransformer = pd.DataFrame(columns=("image_id",))
    predictions_swintransformer["image_id"] = submission_df["image_id"].values
    predictions_swintransformer['image_path_id'] = image_path + predictions_swintransformer['image_id'].astype(str)

    model = timm.create_model("swin_base_patch4_window12_384", pretrained=False, num_classes=5)
    states = [f for f in used_models_pytorch["swintransformer"]]

    test_dataset = TestDataset(predictions_swintransformer, transform=get_transforms())
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
    predictions = inference(model, states, test_loader, device)

    predictions_swintransformer['swintransformer'] = [np.squeeze(p) for p in predictions]
    predictions_swintransformer = predictions_swintransformer.drop(["image_path_id"], axis=1)
    

    torch.cuda.empty_cache()
    try:
        del(model)
        del(states)
    except:
        pass
    gc.collect()

  ckpt = torch.load(state, map_location="cpu")


In [14]:
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_path_id'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        image = cv2.imread(file_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

if "newconvnext" in used_models_pytorch:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def get_transforms():
        return Compose([Resize(512, 512),
                        Transpose(p=0.5),
                        HorizontalFlip(p=0.5),
                        VerticalFlip(p=0.5),
                        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                        ToTensorV2()])

    def inference(model, states, test_loader, device):
        model.to(device)

        probabilities = []
        for i, (images) in enumerate(test_loader):
            images = images.to(device)
            avg_preds = []
            for state in states:
                ckpt = torch.load(state, map_location="cpu")
                model.load_state_dict(_clean_state_dict(ckpt), strict=False)
                model.eval()
                with torch.no_grad():
                    y_preds = model(images)
                avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
            avg_preds = np.mean(avg_preds, axis=0)
            probabilities.append(avg_preds)
        return np.concatenate(probabilities)
    

    predictions_newconvnext = pd.DataFrame(columns=("image_id",))
    predictions_newconvnext["image_id"] = submission_df["image_id"].values
    predictions_newconvnext['image_path_id'] = image_path + predictions_newconvnext['image_id'].astype(str)

    
    model = timm.create_model("convnextv2_tiny", pretrained=False, num_classes=5)
    states = [f for f in used_models_pytorch["newconvnext"]]

    test_dataset = TestDataset(predictions_newconvnext, transform=get_transforms())
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
    predictions = inference(model, states, test_loader, device)

    predictions_newconvnext['newconvnext'] = [np.squeeze(p) for p in predictions]
    predictions_newconvnext = predictions_newconvnext.drop(["image_path_id"], axis=1)
    

    torch.cuda.empty_cache()
    try:
        del(model)
        del(states)
    except:
        pass
    gc.collect()

  ckpt = torch.load(state, map_location="cpu")


In [15]:
class GeM(nn.Module):
    def __init__(self, p=3.0, eps=1e-6, trainable=True):
        super().__init__()
        self.eps = eps
        if trainable:
            self.p = nn.Parameter(torch.ones(1) * p)  # 可訓練
        else:
            self.p = torch.tensor([p])                # 固定常數

    def forward(self, x):
        # x shape: (B, C, H, W)
        return F.adaptive_avg_pool2d(x.clamp(min=self.eps).pow(self.p), 1).pow(1.0 / self.p)

    def __repr__(self):
        return f"{self.__class__.__name__}(p={self.p.data.tolist()[0]:.4f}, eps={self.eps})"

class EfficientNetClassifier(nn.Module):
    def __init__(self, num_classes: int = 5, pretrained: bool = True):
        super().__init__()
        self.eff = timm.create_model(
            "tf_efficientnetv2_l_in21k",
            pretrained=pretrained,
            drop_path_rate=0.2,
            features_only=True,
            out_indices=[-1],
        )
        eff_ch = self.eff.feature_info.channels()[-1]  # 1792

        self.pool = GeM(p=3.0, trainable=True)          # GeM global pooling
        self.head = nn.Sequential(
            nn.Linear(eff_ch, 512),
            nn.SiLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes),
        )


    def forward(self, x):
        x = self.eff(x)[0]
        x = self.pool(x).flatten(1)
        return self.head(x)

class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_path_id'].values
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        image = cv2.imread(file_name)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

if "tf_efficientnetv2_l_in21k" in used_models_pytorch:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def get_transforms():
        return Compose([Resize(512,512),
                        Transpose(p=0.5),
                        HorizontalFlip(p=0.5),
                        VerticalFlip(p=0.5),
                        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                        ToTensorV2()])

    def inference(model, states, test_loader, device):
        model.to(device)

        probabilities = []
        for i, (images) in enumerate(test_loader):
            images = images.to(device)
            avg_preds = []
            for state in states:
                ckpt = torch.load(state, map_location="cpu")
                model.load_state_dict(_clean_state_dict(ckpt["model"]), strict=False)
                model.eval()
                with torch.no_grad():
                    y_preds = model(images)
                avg_preds.append(y_preds.softmax(1).to('cpu').numpy())
            avg_preds = np.mean(avg_preds, axis=0)
            probabilities.append(avg_preds)
        return np.concatenate(probabilities)
    

    predictions_tf_efficientnetv2_l_in21k = pd.DataFrame(columns=("image_id",))
    predictions_tf_efficientnetv2_l_in21k["image_id"] = submission_df["image_id"].values
    predictions_tf_efficientnetv2_l_in21k['image_path_id'] = image_path + predictions_tf_efficientnetv2_l_in21k['image_id'].astype(str)

    model = EfficientNetClassifier(pretrained=False)
    states = [f for f in used_models_pytorch["tf_efficientnetv2_l_in21k"]]

    test_dataset = TestDataset(predictions_tf_efficientnetv2_l_in21k, transform=get_transforms())
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
    predictions = inference(model, states, test_loader, device)

    predictions_tf_efficientnetv2_l_in21k['tf_efficientnetv2_l_in21k'] = [np.squeeze(p) for p in predictions]
    predictions_tf_efficientnetv2_l_in21k = predictions_tf_efficientnetv2_l_in21k.drop(["image_path_id"], axis=1)
    

    torch.cuda.empty_cache()
    try:
        del(model)
        del(states)
    except:
        pass
    gc.collect()

  model = create_fn(
  ckpt = torch.load(state, map_location="cpu")


In [16]:
import tensorflow as tf
import tensorflow_hub as hub

def build_mobilenet3(img_size=(224, 224), weights="../input/mobilenet/1426ba4824f415259798eb0bd7379e39cb679725"):
    # 載入 Hub 模型
    classifier = hub.KerasLayer(
        weights, 
        input_shape=(img_size[0], img_size[1], 3),
        trainable=False
    )

    return classifier

In [17]:
def image_augmentations(image):
    p_spatial = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_rotate = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    
    if p_spatial > 0.75:
        image = tf.image.transpose(image)
        
    if p_rotate > 0.75:
        image = tf.image.rot90(image, k = 3)
    elif p_rotate > 0.5:
        image = tf.image.rot90(image, k = 2)
    elif p_rotate > 0.25:
        image = tf.image.rot90(image, k = 1)

    image = tf.image.resize(image, size = IMAGE_SIZE)
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    
    return image

def read_preprocess_file(img_path, normalize=False):
    image = Image.open(img_path)
    if normalize:
        img_scaled = np.array(image)/ 255.0
    else:
        img_scaled = np.array(image)
    img_scaled = img_scaled.astype(np.float32)
    return (image.size[0], image.size[1]), img_scaled

def create_image_tiles(origin_dim, processed_img):
    crop_size = 512
    img_list = []
    # Cut image into 4 overlapping patches
    for x in [0, origin_dim[1] - crop_size]:
        for y in [0, origin_dim[0] - crop_size]:
            img_list.append(processed_img[x:x+crop_size , y:y+crop_size,:])
    # Keep one additional center cropped image 
    img_list.append(cv2.resize(processed_img[:, 100:700 ,:], dsize=(crop_size, crop_size)))
    return np.array(img_list)

def augment_tiles_light(tiles, ttas=2):
  # Copy central croped image to have same ratio to augmented images
  holdout = np.broadcast_to(tiles[-1,:,:,:],(ttas,) + tiles.shape[1:])
  augmented_batch = tf.map_fn(lambda x: image_augmentations(x), tf.concat(
      [tiles[:-1,:,:,:] for _ in range(ttas)], axis=0))
  return tf.concat([augmented_batch, holdout], axis=0)

def cut_crop_image(processed_img):
    image = tf.image.central_crop(processed_img, 0.8)
    image = tf.image.resize(image, (224, 224))
    return np.expand_dims(image, 0)

# CropNet class 6 (unknown) is distributed evenly over all 5 classes to match problem setting
def distribute_unknown(propabilities):
    return propabilities[:,:-1] + np.expand_dims(propabilities[:,-1]/5, 1)

def multi_predict_tfhublayer(img_path, modelinstance):
    img = cut_crop_image(read_preprocess_file(img_path, True)[1])
    yhat = modelinstance(img)
    return np.mean(distribute_unknown(yhat), axis=0)

def multi_predict_keras(img_path, modelinstance, *args):
    augmented_batch = augment_tiles_light(create_image_tiles(
        *read_preprocess_file(img_path)))
    Yhat = modelinstance(augmented_batch)
    return np.mean(Yhat, axis=0)

def predict_and_vote(image_list, modelinstances, onlykeras):
    predictions = [] 
    with tqdm(total=len(image_list)) as process_bar:       
      for img_path in image_list:
        process_bar.update(1)  
        Yhats = np.vstack([func(img_path, modelinstance) for func, modelinstance in modelinstances])
        if onlykeras:
            predictions.append(np.argmax(np.sum(Yhats, axis=0)))
        else:
            predictions.append(Yhats)    
    return predictions


inference_models = []

if "mobilenet" in used_models_keras:
    model_mobilenet = build_mobilenet3(weights=used_models_keras["mobilenet"])
    inference_models.append((multi_predict_tfhublayer, model_mobilenet))
    
# if "efficientnetb4" in used_models_keras:
#     model_efficientnetb4 =  keras.models.load_model(used_models_keras["efficientnetb4"], compile=False)
#     inference_models.append((multi_predict_keras, model_efficientnetb4))
    
# if "efficientnetb5" in used_models_keras:
#     model_efficientnetb5 =  keras.models.load_model(used_models_keras["efficientnetb5"])
#     inference_models.append((multi_predict_keras, model_efficientnetb5))
onlykeras=False
submission_df["label"] = predict_and_vote([image_path+id for id in submission_df["image_id"].values], inference_models, onlykeras)

I0000 00:00:1748175577.416893      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14985 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
  0%|          | 0/1 [00:00<?, ?it/s]I0000 00:00:1748175582.247171     153 cuda_dnn.cc:529] Loaded cuDNN version 90300
100%|██████████| 1/1 [00:02<00:00,  2.17s/it]


In [18]:
tf.keras.backend.clear_session()

try:
    del inference_models[:]
except:
    pass

gc.collect()

0

In [19]:
if len(list(used_models_keras.keys())) <= 1:
    submission_df.loc[:,list(used_models_keras)[0]] = submission_df["label"].explode()
else:
    tmp = (submission_df['label'].transform([lambda x:x[0], lambda x:x[1]]).set_axis(list(used_models_keras.keys()), axis=1, inplace=False))
    submission_df = submission_df.merge(tmp, right_index=True, left_index=True)
    
submission_df["label"] = 0

# if "coatnet" in used_models_pytorch:
#     submission_df = submission_df.merge(predictions_coatnet, on="image_id")

if "convnext" in used_models_pytorch:
    submission_df = submission_df.merge(predictions_convnext, on="image_id")

if "swintransformer" in used_models_pytorch:
    submission_df = submission_df.merge(predictions_swintransformer, on="image_id")
if "efficientnet" in used_models_pytorch:
    submission_df = submission_df.merge(predictions_efficient, on="image_id")
if "tf_efficientnetv2_xl" in used_models_pytorch:
    submission_df = submission_df.merge(predictions_test, on="image_id")
if "tf_efficientnetv2_l_in21k" in used_models_pytorch:
    submission_df = submission_df.merge(predictions_tf_efficientnetv2_l_in21k, on="image_id")
if "resnext_old" in used_models_pytorch:
    submission_df = submission_df.merge(predictions_resnext_old, on="image_id")
if "resnext_new" in used_models_pytorch:
    submission_df = submission_df.merge(predictions_resnext_new, on="image_id")
if "newconvnext" in used_models_pytorch:
    submission_df = submission_df.merge(predictions_newconvnext, on="image_id")
# if "efficientnetb3" in used_models_pytorch:
#     submission_df = submission_df.merge(predictions_cutmix, on="image_id")
    
# if "vit2020" in used_models_pytorch:
#     submission_df = submission_df.merge(predictions_vit, on="image_id")
    
# if "vit2019" in used_models_pytorch:
#     submission_df = submission_df.merge(predictions_vit2019, on="image_id")

In [20]:
stacked_mean = True
if stacked_mean:
    submission_df["stage_1"] = submission_df.apply(lambda row: [np.mean(e) for e in zip(row["swintransformer"], row["newconvnext"])], axis=1)
    submission_df["stage_2"] = submission_df.apply(lambda row: [np.mean(e) for e in zip(row["stage_1"], row["resnext_new"], row["tf_efficientnetv2_xl"])], axis=1)
    submission_df["stage_3"] = submission_df.apply(lambda row: [np.mean(e) for e in zip(row["stage_2"], row["tf_efficientnetv2_l_in21k"])], axis=1)
    # submission_df["stage_1"] = submission_df.apply(lambda row: [np.mean(e) for e in zip(row["efficientnet"],row["tf_efficientnetv2_xl"])], axis=1)
    # submission_df["stage_3"] = submission_df.apply(lambda row: [np.mean(e) for e in zip(row["tf_efficientnetv2_l_in21k"], row["stage_2"])], axis=1)
    submission_df["label"] = submission_df.apply(lambda row: np.argmax(
        [np.sum(e) for e in zip(row["mobilenet"],row["stage_3"])]), axis=1)
else:
    submission_df["label"] = submission_df.apply(lambda row: np.argmax(
        [np.sum(e) for e in zip(*[row[m] for m in list(used_models_pytorch.keys())+list(used_models_keras.keys())])]), axis=1)

In [21]:
submission_df.head(1)

Unnamed: 0,image_id,label,mobilenet,swintransformer,tf_efficientnetv2_xl,tf_efficientnetv2_l_in21k,resnext_new,newconvnext,stage_1,stage_2,stage_3
0,2216849948.jpg,2,"[0.0037992653, 0.0037114928, 0.87805223, 0.005...","[2.7445092e-05, 0.0001162478, 0.01800231, 7.43...","[0.006624048, 0.059600253, 0.17610297, 0.02277...","[0.05921451, 0.07604618, 0.2396855, 0.03580349...","[0.023723358, 0.0414746, 0.42054933, 0.0440246...","[0.0041348706, 0.0070461934, 0.20124628, 0.047...","[0.0020811579, 0.0035812205, 0.1096243, 0.0238...","[0.010809521, 0.034885358, 0.23542555, 0.03021...","[0.035012014, 0.055465773, 0.23755553, 0.03301..."


In [22]:
submission_df[["image_id","label"]].to_csv("submission.csv", index=False)
!head submission.csv

image_id,label
2216849948.jpg,2
