**Loading the dataset**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

**Importing all the necessary libraries**

In [None]:
import os
import random
import torch
import torch.nn as nn
import torch.optim as optim

from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split, Subset
from PIL import Image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

**Preparing the Dataset**

In [None]:
DATASET_PATH = "/content/drive/MyDrive/cat_species"

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(0.3, 0.3, 0.3),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

full_dataset = datasets.ImageFolder(DATASET_PATH, transform=train_transform)
num_classes = len(full_dataset.classes)

print("Total classes:", num_classes)

**Train-Test Split**

In [18]:
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size

train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
val_dataset.dataset.transform = val_transform

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

**Channel Attention Module**

In [19]:
class ChannelAttention(nn.Module):
    def __init__(self, channels, reduction=16):
        super().__init__()
        self.avg = nn.AdaptiveAvgPool2d(1)
        self.max = nn.AdaptiveMaxPool2d(1)

        self.fc = nn.Sequential(
            nn.Conv2d(channels, channels // reduction, 1),
            nn.ReLU(),
            nn.Conv2d(channels // reduction, channels, 1)
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        return self.sigmoid(self.fc(self.avg(x)) + self.fc(self.max(x)))

**Residual Block**

In [20]:
class ResidualBlock(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(in_c, out_c, 3, padding=1),
            nn.BatchNorm2d(out_c),
            nn.ReLU(),
            nn.Conv2d(out_c, out_c, 3, padding=1),
            nn.BatchNorm2d(out_c)
        )

        self.att = ChannelAttention(out_c)
        self.shortcut = nn.Conv2d(in_c, out_c, 1) if in_c != out_c else nn.Identity()
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.conv(x)
        out = out * self.att(out)
        out += self.shortcut(x)
        return self.relu(out)

**Custom CNN Model**

In [21]:
class StrongCustomCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        self.layer1 = ResidualBlock(3, 64)
        self.layer2 = ResidualBlock(64, 128)
        self.layer3 = ResidualBlock(128, 256)
        self.layer4 = ResidualBlock(256, 512)

        self.pool = nn.MaxPool2d(2)
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(self.layer1(x))
        x = self.pool(self.layer2(x))
        x = self.pool(self.layer3(x))
        x = self.pool(self.layer4(x))
        x = self.gap(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

In [22]:
def evaluate(model, loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            preds = model(x).argmax(1)
            correct += (preds == y).sum().item()
            total += y.size(0)
    return 100 * correct / total

In [23]:
def train_model(model, train_loader, val_loader, epochs=20):
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, patience=3, factor=0.5
    )

    for epoch in range(epochs):
        model.train()
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            loss = criterion(model(x), y)
            loss.backward()
            optimizer.step()

        val_acc = evaluate(model, val_loader)
        scheduler.step(val_acc)

        print(f"Epoch {epoch+1}/{epochs} | Val Acc: {val_acc:.2f}%")

    return val_acc

**Training the model**

In [None]:
baseline_model = StrongCustomCNN(num_classes).to(device)
baseline_acc = train_model(baseline_model, train_loader, val_loader)

**Zero-Shot-Learning**

In [25]:
def zero_shot_accuracy(loader):
    model = models.resnet18(pretrained=True).to(device)
    model.eval()

    correct, total = 0, 0
    with torch.no_grad():
        for x, _ in loader:
            x = x.to(device)
            preds = model(x).argmax(1)
            correct += (preds >= 281).sum().item()  # ImageNet cat classes
            total += x.size(0)

    return 100 * correct / total

zero_shot_acc = zero_shot_accuracy(val_loader)

**Few Shot Learning**

In [26]:
def create_few_shot_dataset(dataset, shots=5):
    class_map = {i: [] for i in range(num_classes)}
    for idx, (_, label) in enumerate(dataset):
        class_map[label].append(idx)

    indices = []
    for c in class_map:
        indices.extend(random.sample(class_map[c], shots))

    return Subset(dataset, indices)

In [27]:
def few_shot_training(dataset):
    few_data = create_few_shot_dataset(dataset)
    few_loader = DataLoader(few_data, batch_size=16, shuffle=True)

    model = models.resnet18(pretrained=True)
    for p in model.parameters():
        p.requires_grad = False

    model.fc = nn.Linear(model.fc.in_features, num_classes)
    model = model.to(device)

    optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    model.train()
    for _ in range(5):
        for x, y in few_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            loss = criterion(model(x), y)
            loss.backward()
            optimizer.step()

    return evaluate(model, val_loader)

few_shot_acc = few_shot_training(full_dataset)

**Continue Learning**

In [None]:
def continual_learning(dataset):
    mid = len(dataset) // 2
    d1, d2 = random_split(dataset, [mid, len(dataset)-mid])

    l1 = DataLoader(d1, batch_size=32, shuffle=True)
    l2 = DataLoader(d2, batch_size=32, shuffle=True)

    model = StrongCustomCNN(num_classes).to(device)

    train_model(model, l1, val_loader, epochs=10)
    acc = train_model(model, l2, val_loader, epochs=8)

    return acc

continual_acc = continual_learning(full_dataset)

**Accuracy**

In [None]:
print(f"Custom CNN Accuracy      : {baseline_acc:.2f}%")
print(f"Zero-Shot Accuracy       : {zero_shot_acc:.2f}%")
print(f"Few-Shot Accuracy        : {few_shot_acc:.2f}%")
print(f"Continual Learning Acc   : {continual_acc:.2f}%")