In [1]:
!pip install timm > /dev/null

In [2]:
# Importing necessary libraries for file handling and data manipulation
import os  # Provides functions to interact with the operating system (e.g., file paths)
import pandas as pd  # Data manipulation and analysis, especially for tabular data

# Importing image processing and augmentation tools
from PIL import Image  # Python Imaging Library (PIL) for opening, manipulating, and saving image files
from tqdm import tqdm  # Provides a progress bar for loops

# PyTorch libraries for deep learning and neural networks
import torch  # Core PyTorch library for tensor computation and automatic differentiation
import torch.nn as nn  # Neural network module in PyTorch
from torch.utils.data import Dataset, DataLoader  # Utilities for handling datasets and loading batches of data

# Torchvision for image preprocessing and transformation
from torchvision import transforms  # Common transformations for image preprocessing
from torchvision.transforms import AutoAugment, AutoAugmentPolicy  # Augmentation techniques to improve model robustness

# Learning rate scheduler for fine-tuning training process
from torch.optim.lr_scheduler import CosineAnnealingLR  # Schedules learning rate based on cosine annealing

# Machine learning evaluation and model selection utilities
from sklearn.metrics import f1_score  # Metric for evaluating model performance, especially for classification tasks
from sklearn.model_selection import train_test_split  # Utility for splitting dataset into training and testing sets

# Importing timm library for pre-trained models and model architectures
import timm  # Provides access to a collection of state-of-the-art pretrained models


In [3]:
label_mapping = {
    "Amphibia": 0,
    "Animalia": 1,
    "Arachnida": 2,
    "Aves": 3,
    "Fungi": 4,
    "Insecta": 5,
    "Mammalia": 6,
    "Mollusca": 7,
    "Plantae": 8,
    "Reptilia": 9,
}

In [4]:
class FloraFaunaDataset(Dataset):
    def __init__(self, image_paths, labels=None, transform=None, is_test=False):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform
        self.is_test = is_test

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        try:
            image = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            image = Image.new("RGB", (224, 224))
        if self.transform:
            image = self.transform(image)
        if self.is_test:
            return image, os.path.basename(img_path)
        else:
            label = self.labels[idx]
            return image, label

In [5]:
image_paths = []
labels = []

train_root = "/kaggle/input/deep-learning-practice-image-classification/train"
for label in os.listdir(train_root):
    class_dir = os.path.join(train_root, label)
    if os.path.isdir(class_dir):
        for img_name in os.listdir(class_dir):
            image_paths.append(os.path.join(class_dir, img_name))
            labels.append(label_mapping[label])

In [6]:
train_paths, val_paths, train_labels, val_labels = train_test_split(
    image_paths, labels, test_size=0.1, stratify=labels, random_state=42
)

In [7]:
train_transform = transforms.Compose(
    [
        transforms.Resize((448, 448)),
        transforms.CenterCrop(448),
        transforms.RandomHorizontalFlip(),
        AutoAugment(policy=AutoAugmentPolicy.IMAGENET),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
    ]
)

val_transform = transforms.Compose(
    [
        transforms.Resize((448, 448)),
        transforms.CenterCrop(448),
        transforms.RandomHorizontalFlip(),
        AutoAugment(policy=AutoAugmentPolicy.IMAGENET),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
    ]
)

test_transform = val_transform

In [8]:
batch_size = 64
num_workers = 4

In [9]:
train_dataset = FloraFaunaDataset(train_paths, train_labels, transform=train_transform)
val_dataset = FloraFaunaDataset(val_paths, val_labels, transform=val_transform)

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
)
val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
)

In [10]:
model = timm.create_model("eva02_large_patch14_448.mim_m38m_ft_in22k_in1k", pretrained=True)
# model = timm.create_model("eva_giant_patch14_224.clip_ft_in1k", pretrained=True)

num_features = model.head.in_features
model.head = nn.Linear(num_features, 10)  # 10 classes

model

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Eva(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (rope): RotaryEmbeddingCat()
  (blocks): ModuleList(
    (0-23): 24 x EvaBlock(
      (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (attn): EvaAttention(
        (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
        (k_proj): Linear(in_features=1024, out_features=1024, bias=False)
        (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (norm): Identity()
        (proj): Linear(in_features=1024, out_features=1024, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path1): Identity()
      (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (mlp): SwiGLU(
        (fc1_g): Linear(in_features=1024, out_features=2730, bias=True)
        (fc1_x): Linear(in

In [11]:
for param in model.parameters():
    param.requires_grad = False

for param in model.head.parameters():
    param.requires_grad = True

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001, weight_decay=1e-4)

In [13]:
scheduler = CosineAnnealingLR(optimizer, T_max=10)

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

scaler = torch.amp.GradScaler()

In [15]:
def train_epoch(epoch, num_epochs):
    model.train()
    running_loss = 0.0
    all_preds = []
    all_labels = []

    for images, labels in tqdm(
        train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}"
    ):
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item() * images.size(0)

        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

    epoch_loss = running_loss / len(train_dataset)
    epoch_f1 = f1_score(all_labels, all_preds, average="weighted")

    print(f"Train Loss: {epoch_loss:.4f}, Train Weighted F1 Score: {epoch_f1:.4f}")


def validate_epoch():
    model.eval()
    val_running_loss = 0.0
    val_preds = []
    val_labels_list = []

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Validation"):
            images = images.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)
            with torch.cuda.amp.autocast():
                outputs = model(images)
                loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)

            preds = torch.argmax(outputs, dim=1)
            val_preds.extend(preds.cpu().numpy())
            val_labels_list.extend(labels.cpu().numpy())

    val_loss = val_running_loss / len(val_dataset)
    val_f1 = f1_score(val_labels_list, val_preds, average="weighted")

    print(f"Val Loss: {val_loss:.4f}, Val Weighted F1 Score: {val_f1:.4f}")
    return val_loss, val_f1

In [16]:
num_epochs = 30
best_val_loss = float("inf")
patience = 5
trigger_times = 0

for epoch in range(num_epochs):
    train_epoch(epoch, num_epochs)
    val_loss, val_f1 = validate_epoch()
    scheduler.step()

    # Check for improvement
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        trigger_times = 0
        # Save the best model
        torch.save(model.state_dict(), "/kaggle/working/best_model_eva_448.pth")
        print("Saving Best Model...")
    else:
        trigger_times += 1
        print(f"Early stopping counter: {trigger_times} out of {patience}")
        if trigger_times >= patience:
            print("Early stopping!")
            break

  with torch.cuda.amp.autocast():
Training Epoch 1/30: 100%|██████████| 141/141 [19:22<00:00,  8.24s/it]


Train Loss: 1.2139, Train Weighted F1 Score: 0.6266


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:13<00:00,  8.32s/it]


Val Loss: 0.5441, Val Weighted F1 Score: 0.8749
Saving Best Model...


  with torch.cuda.amp.autocast():
Training Epoch 2/30: 100%|██████████| 141/141 [19:20<00:00,  8.23s/it]


Train Loss: 0.4114, Train Weighted F1 Score: 0.9094


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:12<00:00,  8.30s/it]


Val Loss: 0.3258, Val Weighted F1 Score: 0.9242
Saving Best Model...


  with torch.cuda.amp.autocast():
Training Epoch 3/30: 100%|██████████| 141/141 [19:20<00:00,  8.23s/it]


Train Loss: 0.2817, Train Weighted F1 Score: 0.9334


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:12<00:00,  8.31s/it]


Val Loss: 0.2746, Val Weighted F1 Score: 0.9360
Saving Best Model...


  with torch.cuda.amp.autocast():
Training Epoch 4/30: 100%|██████████| 141/141 [19:20<00:00,  8.23s/it]


Train Loss: 0.2364, Train Weighted F1 Score: 0.9437


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:12<00:00,  8.29s/it]


Val Loss: 0.2297, Val Weighted F1 Score: 0.9390
Saving Best Model...


  with torch.cuda.amp.autocast():
Training Epoch 5/30: 100%|██████████| 141/141 [19:20<00:00,  8.23s/it]


Train Loss: 0.2190, Train Weighted F1 Score: 0.9448


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:12<00:00,  8.29s/it]


Val Loss: 0.2257, Val Weighted F1 Score: 0.9441
Saving Best Model...


  with torch.cuda.amp.autocast():
Training Epoch 6/30: 100%|██████████| 141/141 [19:21<00:00,  8.23s/it]


Train Loss: 0.2043, Train Weighted F1 Score: 0.9486


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:12<00:00,  8.30s/it]


Val Loss: 0.2071, Val Weighted F1 Score: 0.9491
Saving Best Model...


  with torch.cuda.amp.autocast():
Training Epoch 7/30: 100%|██████████| 141/141 [19:20<00:00,  8.23s/it]


Train Loss: 0.1938, Train Weighted F1 Score: 0.9528


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:12<00:00,  8.30s/it]


Val Loss: 0.1985, Val Weighted F1 Score: 0.9441
Saving Best Model...


  with torch.cuda.amp.autocast():
Training Epoch 8/30: 100%|██████████| 141/141 [19:21<00:00,  8.23s/it]


Train Loss: 0.1907, Train Weighted F1 Score: 0.9526


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:12<00:00,  8.31s/it]


Val Loss: 0.2187, Val Weighted F1 Score: 0.9392
Early stopping counter: 1 out of 5


  with torch.cuda.amp.autocast():
Training Epoch 9/30: 100%|██████████| 141/141 [19:21<00:00,  8.24s/it]


Train Loss: 0.1805, Train Weighted F1 Score: 0.9547


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:13<00:00,  8.31s/it]


Val Loss: 0.2103, Val Weighted F1 Score: 0.9420
Early stopping counter: 2 out of 5


  with torch.cuda.amp.autocast():
Training Epoch 10/30: 100%|██████████| 141/141 [19:21<00:00,  8.24s/it]


Train Loss: 0.1816, Train Weighted F1 Score: 0.9554


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:12<00:00,  8.31s/it]


Val Loss: 0.1925, Val Weighted F1 Score: 0.9520
Saving Best Model...


  with torch.cuda.amp.autocast():
Training Epoch 11/30: 100%|██████████| 141/141 [19:21<00:00,  8.24s/it]


Train Loss: 0.1786, Train Weighted F1 Score: 0.9559


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:12<00:00,  8.30s/it]


Val Loss: 0.1927, Val Weighted F1 Score: 0.9493
Early stopping counter: 1 out of 5


  with torch.cuda.amp.autocast():
Training Epoch 12/30: 100%|██████████| 141/141 [19:21<00:00,  8.24s/it]


Train Loss: 0.1759, Train Weighted F1 Score: 0.9554


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:12<00:00,  8.30s/it]


Val Loss: 0.1992, Val Weighted F1 Score: 0.9501
Early stopping counter: 2 out of 5


  with torch.cuda.amp.autocast():
Training Epoch 13/30: 100%|██████████| 141/141 [19:21<00:00,  8.23s/it]


Train Loss: 0.1840, Train Weighted F1 Score: 0.9549


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:13<00:00,  8.33s/it]


Val Loss: 0.2037, Val Weighted F1 Score: 0.9441
Early stopping counter: 3 out of 5


  with torch.cuda.amp.autocast():
Training Epoch 14/30: 100%|██████████| 141/141 [19:21<00:00,  8.24s/it]


Train Loss: 0.1738, Train Weighted F1 Score: 0.9571


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:12<00:00,  8.30s/it]


Val Loss: 0.1957, Val Weighted F1 Score: 0.9441
Early stopping counter: 4 out of 5


  with torch.cuda.amp.autocast():
Training Epoch 15/30: 100%|██████████| 141/141 [19:21<00:00,  8.24s/it]


Train Loss: 0.1779, Train Weighted F1 Score: 0.9539


  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:13<00:00,  8.33s/it]

Val Loss: 0.2085, Val Weighted F1 Score: 0.9420
Early stopping counter: 5 out of 5
Early stopping!





In [17]:
model.load_state_dict(torch.load("/kaggle/working/best_model_eva_448.pth"))

  model.load_state_dict(torch.load("/kaggle/working/best_model_eva_448.pth"))


<All keys matched successfully>

In [18]:
validate_epoch()

  with torch.cuda.amp.autocast():
Validation: 100%|██████████| 16/16 [02:12<00:00,  8.31s/it]

Val Loss: 0.2214, Val Weighted F1 Score: 0.9421





(0.22144195461273194, 0.9421392159562101)

In [19]:
test_root = "/kaggle/input/deep-learning-practice-image-classification/test"
test_image_paths = [
    os.path.join(test_root, img_name) for img_name in os.listdir(test_root)
]

test_dataset = FloraFaunaDataset(
    test_image_paths, transform=test_transform, is_test=True
)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=8)



In [20]:
model.eval()
predictions = []
image_ids = []

with torch.no_grad():
    for images, img_names in tqdm(test_loader, desc="Predicting"):
        images = images.to(device)
        outputs = model(images)
        preds = torch.argmax(outputs, dim=1)
        predictions.extend(preds.cpu().numpy())
        image_ids.extend([name.split(".")[0] for name in img_names])

Predicting: 100%|██████████| 63/63 [04:25<00:00,  4.21s/it]


In [21]:
submission = pd.DataFrame({"Image_ID": image_ids, "Label": predictions})
submission.to_csv("/kaggle/working/submission.csv", index=False)