In [13]:
import torch
import torchvision

!python -V; printf '\n'
!nvcc -V; printf '\n'
print(f'CUDA is available: {torch.cuda.is_available()}')
print(f'Current device: {torch.cuda.current_device()}')
print(f'Device name: {torch.cuda.get_device_name(0)}')
print(f'CUDNN version: {torch.backends.cudnn.version()}')
print(f'CUDNN enabled: {torch.backends.cudnn.enabled}')
print(f'CUDA version by torch: {torch.version.cuda}')
print(f'Torchvision version: {torchvision.__version__}')
print(f'Torchvision loc: {torchvision.__file__}')

Python 3.11.11

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Jun__6_02:18:23_PDT_2024
Cuda compilation tools, release 12.5, V12.5.82
Build cuda_12.5.r12.5/compiler.34385749_0

CUDA is available: True
Current device: 0
Device name: Tesla T4
CUDNN version: 90300
CUDNN enabled: True
CUDA version by torch: 12.4
Torchvision version: 0.20.1+cu124
Torchvision loc: /usr/local/lib/python3.11/dist-packages/torchvision/__init__.py


In [5]:
import torchvision.models

model = torchvision.models.vit_b_16(pretrained=True)

num_classes = 75
model.heads.head = torch.nn.Linear(model.heads.head.in_features, num_classes)


Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:01<00:00, 203MB/s] 


In [9]:
from torch.utils.data import Dataset
from PIL import Image
import pandas as pd
import os
from torch.utils.data import random_split
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

class ButterflyDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.label_to_idx = {label: idx for idx, label in enumerate(self.annotations['label'].unique())}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[idx, 0])
        image = Image.open(img_path).convert("RGB")
        label_str = self.annotations.iloc[idx, 1]
        label = self.label_to_idx[label_str]
        if self.transform:
            image = self.transform(image)
        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5,), std=(0.5,))  # Adjust channels as needed
])

the_dataset = ButterflyDataset(
    csv_file='/kaggle/input/butterfly-image-classification/Training_set.csv',
    root_dir='/kaggle/input/butterfly-image-classification/train',
    transform=transform
)

train_size = int(0.8 * len(the_dataset))
val_size = len(the_dataset) - train_size

train_dataset, val_dataset = random_split(the_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [10]:
import torch.nn as nn
import torch.optim as optim

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=3e-5)

In [11]:
import time
from tqdm import tqdm

history = {
    'train_loss': [],
    'train_acc1': [],
    'train_acc5': [],
    'val_loss': [],
    'val_acc1': [],
    'val_acc5': []
}

def evaluate(model, dataloader, device):
    model.eval()
    correct_top1 = 0
    correct_top5 = 0
    total = 0
    running_loss = 0.0

    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Validating", leave=False):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, preds = outputs.topk(5, dim=1)
            correct = preds.eq(labels.view(-1, 1))

            correct_top1 += correct[:, 0].sum().item()
            correct_top5 += correct.any(dim=1).sum().item()
            total += labels.size(0)

    avg_loss = running_loss / len(dataloader)
    acc1 = correct_top1 / total
    acc5 = correct_top5 / total

    return avg_loss, acc1, acc5

num_epochs = 10

In [None]:
from IPython.display import clear_output, display
import matplotlib.pyplot as plt
from IPython.display import display as ipy_display
import ipywidgets as widgets

plot_output = widgets.Output()
ipy_display(plot_output)

def plot_history(history):
    with plot_output:
        clear_output(wait=True)
        epochs = range(1, len(history['train_loss']) + 1)

        plt.figure(figsize=(18, 5))

        # 1. Loss
        plt.subplot(1, 3, 1)
        plt.plot(epochs, history['train_loss'], label='Train Loss')
        plt.plot(epochs, history['val_loss'], label='Val Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Train vs Val Loss')
        plt.legend()

        # 2. ACC@1
        plt.subplot(1, 3, 2)
        plt.plot(epochs, history['train_acc1'], label='Train ACC@1')
        plt.plot(epochs, history['val_acc1'], label='Val ACC@1')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.title('Train vs Val ACC@1')
        plt.legend()

        # 3. ACC@5
        plt.subplot(1, 3, 3)
        plt.plot(epochs, history['train_acc5'], label='Train ACC@5')
        plt.plot(epochs, history['val_acc5'], label='Val ACC@5')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.title('Train vs Val ACC@5')
        plt.legend()

        plt.tight_layout()
        plt.show()



for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_top1 = 0
    correct_top5 = 0
    total = 0

    start_time = time.time()

    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)

    for images, labels in progress_bar:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, preds = outputs.topk(5, dim=1)
        correct = preds.eq(labels.view(-1, 1))

        correct_top1 += correct[:, 0].sum().item()
        correct_top5 += correct.any(dim=1).sum().item()
        total += labels.size(0)

        acc1_batch = correct[:, 0].sum().item() / labels.size(0)
        acc5_batch = correct.any(dim=1).sum().item() / labels.size(0)
        progress_bar.set_postfix(loss=loss.item(), acc1=acc1_batch, acc5=acc5_batch)

    train_time = time.time() - start_time
    train_loss = running_loss / len(train_loader)
    train_acc1 = correct_top1 / total
    train_acc5 = correct_top5 / total

    # Run validation
    val_loss, val_acc1, val_acc5 = evaluate(model, val_loader, device)

    # Store in history
    history['train_loss'].append(train_loss)
    history['train_acc1'].append(train_acc1)
    history['train_acc5'].append(train_acc5)
    history['val_loss'].append(val_loss)
    history['val_acc1'].append(val_acc1)
    history['val_acc5'].append(val_acc5)

    print(f"Epoch {epoch+1}/{num_epochs} | Time: {train_time:.1f}s")
    print(f"  Train     | Loss: {train_loss:.4f} | ACC@1: {train_acc1:.4f} | ACC@5: {train_acc5:.4f}")
    print(f"  Validation| Loss: {val_loss:.4f}   | ACC@1: {val_acc1:.4f}   | ACC@5: {val_acc5:.4f}")
    plot_history(history)

torch.save(model.state_dict(), 'vit_b_16_butterfly_classification.pth')

In [19]:
model_v2 = torchvision.models.convnext_base(weights=torchvision.models.ConvNeXt_Base_Weights.DEFAULT)
model_v2.classifier[2] = nn.Linear(model_v2.classifier[2].in_features, num_classes)
model_v2 = model_v2.to(device)

In [21]:
criterion_v2 = nn.CrossEntropyLoss()
optimizer_v2 = optim.Adam(model_v2.parameters(), lr=1e-4)

history_v2 = {
    'train_loss': [],
    'train_acc1': [],
    'train_acc5': [],
    'val_loss': [],
    'val_acc1': [],
    'val_acc5': []
}

In [22]:
num_epochs_v2 = 10

for epoch in range(num_epochs_v2):
    start_time = time.time()
    
    # Training
    model_v2.train()
    train_loss_v2 = 0.0
    correct1_train_v2 = 0
    correct5_train_v2 = 0
    total_train_v2 = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs_v2} [Train]"):
        images, labels = images.to(device), labels.to(device)

        optimizer_v2.zero_grad()
        outputs = model_v2(images)
        loss = criterion_v2(outputs, labels)
        loss.backward()
        optimizer_v2.step()

        train_loss_v2 += loss.item() * images.size(0)
        total_train_v2 += labels.size(0)

        _, preds = outputs.topk(5, dim=1, largest=True, sorted=True)
        correct1_train_v2 += (preds[:, 0] == labels).sum().item()
        correct5_train_v2 += sum([labels[i] in preds[i] for i in range(labels.size(0))])

    train_loss_v2 /= total_train_v2
    acc1_train_v2 = correct1_train_v2 / total_train_v2
    acc5_train_v2 = correct5_train_v2 / total_train_v2

    # Validation
    model_v2.eval()
    val_loss_v2 = 0.0
    correct1_val_v2 = 0
    correct5_val_v2 = 0
    total_val_v2 = 0

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs_v2} [Val]"):
            images, labels = images.to(device), labels.to(device)
            outputs = model_v2(images)
            loss = criterion_v2(outputs, labels)

            val_loss_v2 += loss.item() * images.size(0)
            total_val_v2 += labels.size(0)

            _, preds = outputs.topk(5, dim=1, largest=True, sorted=True)
            correct1_val_v2 += (preds[:, 0] == labels).sum().item()
            correct5_val_v2 += sum([labels[i] in preds[i] for i in range(labels.size(0))])

    val_loss_v2 /= total_val_v2
    acc1_val_v2 = correct1_val_v2 / total_val_v2
    acc5_val_v2 = correct5_val_v2 / total_val_v2

    # Time
    epoch_time = time.time() - start_time

    # Logging
    print(f"Epoch {epoch+1}/{num_epochs_v2} | Time: {epoch_time:.1f}s")
    print(f"  Train     | Loss: {train_loss_v2:.4f} | ACC@1: {acc1_train_v2:.4f} | ACC@5: {acc5_train_v2:.4f}")
    print(f"  Validation| Loss: {val_loss_v2:.4f}   | ACC@1: {acc1_val_v2:.4f}   | ACC@5: {acc5_val_v2:.4f}")

    # History update
    history_v2['train_loss'].append(train_loss_v2)
    history_v2['train_acc1'].append(acc1_train_v2)
    history_v2['train_acc5'].append(acc5_train_v2)
    history_v2['val_loss'].append(val_loss_v2)
    history_v2['val_acc1'].append(acc1_val_v2)
    history_v2['val_acc5'].append(acc5_val_v2)

    # Plot (uses original `plot_history()` but can be duplicated to `plot_history_v2` if needed)
    plot_history(history_v2)

Epoch 1/10 [Train]: 100%|██████████| 163/163 [03:55<00:00,  1.45s/it]
Epoch 1/10 [Val]: 100%|██████████| 41/41 [00:18<00:00,  2.21it/s]

Epoch 1/10 | Time: 254.2s
  Train     | Loss: 2.3661 | ACC@1: 0.6024 | ACC@5: 0.7884
  Validation| Loss: 0.6507   | ACC@1: 0.8815   | ACC@5: 0.9854



Epoch 2/10 [Train]: 100%|██████████| 163/163 [03:51<00:00,  1.42s/it]
Epoch 2/10 [Val]: 100%|██████████| 41/41 [00:18<00:00,  2.22it/s]

Epoch 2/10 | Time: 250.2s
  Train     | Loss: 0.4279 | ACC@1: 0.9327 | ACC@5: 0.9940
  Validation| Loss: 0.3011   | ACC@1: 0.9315   | ACC@5: 0.9915



Epoch 3/10 [Train]: 100%|██████████| 163/163 [03:53<00:00,  1.44s/it]
Epoch 3/10 [Val]: 100%|██████████| 41/41 [00:18<00:00,  2.25it/s]

Epoch 3/10 | Time: 252.2s
  Train     | Loss: 0.1678 | ACC@1: 0.9711 | ACC@5: 0.9987
  Validation| Loss: 0.2404   | ACC@1: 0.9408   | ACC@5: 0.9931



Epoch 4/10 [Train]: 100%|██████████| 163/163 [03:56<00:00,  1.45s/it]
Epoch 4/10 [Val]: 100%|██████████| 41/41 [00:18<00:00,  2.27it/s]

Epoch 4/10 | Time: 254.9s
  Train     | Loss: 0.0815 | ACC@1: 0.9871 | ACC@5: 0.9998
  Validation| Loss: 0.2411   | ACC@1: 0.9369   | ACC@5: 0.9923



Epoch 5/10 [Train]:  55%|█████▌    | 90/163 [02:10<01:46,  1.45s/it]


KeyboardInterrupt: 