#Mounting google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import shutil
import os

src = "/content/drive/MyDrive/cat_species"
dst = "/content/cat_species"

if os.path.exists(dst):
    shutil.rmtree(dst)

shutil.copytree(src, dst)

print("Folder copied from Drive to Colab.")


#Installing dependencies

In [None]:
!pip install -q ftfy regex tqdm
!pip install -q git+https://github.com/openai/CLIP.git

In [None]:
pip install diffusers transformers accelerate torch torchvision pillow


In [None]:
import torch
from diffusers import StableDiffusionPipeline
from PIL import Image
import os

#Loading Diffusion model

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
model_id = "runwayml/stable-diffusion-v1-5"

pipe = StableDiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16
)

pipe = pipe.to("cuda")

#Generating Synthetic dataset

In [None]:
cat_species = [
    "Abyssinian", "American Bobtail", "American Curl", "American Shorthair",
    "Balinese", "Bengal", "Birman", "Bombay",
    "British Shorthair", "Burmese", "Chartreux", "Cornish Rex",
    "Devon Rex", "Egyptian Mau", "Exotic Shorthair", "Himalayan",
    "Japanese Bobtail", "Korat", "LaPerm", "Maine Coon",
    "Manx", "Norwegian Forest", "Ocicat", "Oriental Shorthair",
    "Persian", "Peterbald", "Ragdoll", "Russian Blue",
    "Savannah", "Scottish Fold", "Selkirk Rex", "Siamese",
    "Siberian", "Singapura", "Somali", "Sphynx",
    "Tonkinese", "Toyger", "Turkish Angora", "Turkish Van",
    "Snowshoe", "Burmilla"
]
print(len(cat_species))


In [None]:
base_dir = "cat_species"
os.makedirs(base_dir, exist_ok=True)

print("Parent folder created:", base_dir)


In [None]:
for species in cat_species:
    folder_path = os.path.join(base_dir, species)
    os.makedirs(folder_path, exist_ok=True)

print("All 42 species folders created successfully.")


In [None]:
IMAGES_PER_LABEL = 10
NUM_STEPS = 20
GUIDANCE_SCALE = 6


In [None]:
for species in os.listdir(base_dir):
    species_path = os.path.join(base_dir, species)

    if not os.path.isdir(species_path):
        continue

    prompt = f"A high quality realistic photo of a {species.replace('_', ' ')} cat"

    for i in range(IMAGES_PER_LABEL):
        image = pipe(
            prompt,
            num_inference_steps=NUM_STEPS,
            guidance_scale=GUIDANCE_SCALE
        ).images[0]

        image_name = f"{species}_{i+1}.png"
        image.save(os.path.join(species_path, image_name))

    print(f"Generated {IMAGES_PER_LABEL} images for {species}")


#Classifying images with pre-trained Resnet-50 model

In [None]:
import torch
from torchvision import models

device = "cuda" if torch.cuda.is_available() else "cpu"

model = models.resnet50(pretrained=True)
model = model.to(device)
model.eval()


In [None]:
import os
from PIL import Image
from torchvision import transforms

image_dir = "cat_species_all_images"

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [None]:
import os
from PIL import Image
from torchvision import transforms, models
from torchvision.models import ResNet50_Weights

image_dir = "cat_species"

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

valid_cat_keywords = [
    "cat", "tabby", "tiger", "persian", "egyptian", "lynx"
]

correct = 0
total = 0

imagenet_classes = ResNet50_Weights.IMAGENET1K_V1.meta["categories"]

for species in os.listdir(image_dir):
    species_path = os.path.join(image_dir, species)

    if not os.path.isdir(species_path):
        continue

    for img_name in os.listdir(species_path):
        if not img_name.lower().endswith((".png", ".jpg", ".jpeg")):
            continue

        img_path = os.path.join(species_path, img_name)
        image = Image.open(img_path).convert("RGB")
        image = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(image)
            pred_idx = output.argmax(dim=1).item()

        label = imagenet_classes[pred_idx].lower()

        if any(k in label for k in valid_cat_keywords):
            correct += 1

        total += 1

accuracy = (correct / total) * 100
print(f"Proxy accuracy (cat detection): {accuracy:.2f}%")

#Importing libraries for Custom CNN

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
from PIL import Image
import os
from collections import defaultdict
import copy

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
from collections import defaultdict

from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Subset

from sklearn.metrics import accuracy_score, f1_score, classification_report


#Custom Residual CNN with Squeeze-and-Excitation Attention

In [None]:
import torch.nn.functional as F

class SEBlock(nn.Module):
    def __init__(self, channels, r=16):
        super().__init__()
        self.fc1 = nn.Linear(channels, channels // r)
        self.fc2 = nn.Linear(channels // r, channels)

    def forward(self, x):
        b, c, _, _ = x.shape
        y = x.mean((2,3))
        y = torch.relu(self.fc1(y))
        y = torch.sigmoid(self.fc2(y)).view(b, c, 1, 1)
        return x * y


class ResidualSEBlock(nn.Module):
    def __init__(self, in_ch, out_ch, stride=1):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, stride, 1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(),
            nn.Conv2d(out_ch, out_ch, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_ch)
        )
        self.se = SEBlock(out_ch)

        self.skip = nn.Identity() if in_ch == out_ch and stride == 1 else \
            nn.Sequential(
                nn.Conv2d(in_ch, out_ch, 1, stride, bias=False),
                nn.BatchNorm2d(out_ch)
            )

    def forward(self, x):
        return F.relu(self.se(self.conv(x)) + self.skip(x))


class ResidualSE_CNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        self.stem = nn.Sequential(
            nn.Conv2d(3, 64, 7, 2, 3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(3, 2, 1)
        )

        self.l1 = ResidualSEBlock(64, 64)
        self.l2 = ResidualSEBlock(64, 128, 2)
        self.l3 = ResidualSEBlock(128, 256, 2)
        self.l4 = ResidualSEBlock(256, 512, 2)

        self.gap = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.stem(x)
        x = self.l1(x)
        x = self.l2(x)
        x = self.l3(x)
        x = self.l4(x)
        x = self.gap(x).flatten(1)
        return self.fc(x)

    def get_features(self, x):
        x = self.stem(x)
        x = self.l1(x)
        x = self.l2(x)
        x = self.l3(x)
        x = self.l4(x)
        x = self.gap(x).flatten(1)
        return F.normalize(x, dim=1)

#Dataset Setup

In [None]:
def setup_data(data_dir, batch_size=32, img_size=224):
    transform = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

    dataset = ImageFolder(data_dir, transform=transform)
    indices = np.random.permutation(len(dataset))

    n = len(indices)
    train_idx = indices[:int(0.7*n)]
    val_idx   = indices[int(0.7*n):int(0.85*n)]
    test_idx  = indices[int(0.85*n):]

    train_loader = DataLoader(Subset(dataset, train_idx),
                              batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(Subset(dataset, val_idx),
                            batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(Subset(dataset, test_idx),
                             batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader, dataset.classes


#Training the model

In [None]:
def train_model(model, train_loader, val_loader, epochs=30, lr=1e-4):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)

    for epoch in range(epochs):

        # -------- Training --------
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for x, y in train_loader:
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            preds = outputs.argmax(dim=1)
            train_correct += (preds == y).sum().item()
            train_total += y.size(0)

        avg_train_loss = train_loss / len(train_loader)
        train_acc = 100.0 * train_correct / train_total

        # -------- Validation --------
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                outputs = model(x)
                loss = criterion(outputs, y)

                val_loss += loss.item()
                preds = outputs.argmax(dim=1)
                val_correct += (preds == y).sum().item()
                val_total += y.size(0)

        avg_val_loss = val_loss / len(val_loader)
        val_acc = 100.0 * val_correct / val_total

        print(
            f"Epoch [{epoch+1}/{epochs}] | "
            f"Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.2f}% | "
            f"Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.2f}%"
        )

    return model


#Model Evaluation

In [None]:
def evaluate_model(model, test_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    y_true, y_pred = [], []

    with torch.no_grad():
        for x, y in test_loader:
            x = x.to(device)
            pred = model(x).argmax(1).cpu()
            y_pred.extend(pred.numpy())
            y_true.extend(y.numpy())

    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average="macro")

    print("Test Accuracy:", acc*100)
    print("Macro F1:", f1)
    print(classification_report(y_true, y_pred))

    return acc, f1


#Few Shot Learning

In [None]:
def few_shot_learning(model, dataset, n_way=5, k_shot=5, n_query=5):
    device = next(model.parameters()).device
    model.eval()

    class_map = defaultdict(list)
    for i in range(len(dataset)):
        _, y = dataset[i]
        class_map[y].append(i)

    classes = np.random.choice(list(class_map.keys()), n_way, replace=False)

    support, query, q_labels = [], [], []

    with torch.no_grad():
        for i, c in enumerate(classes):
            idxs = np.random.choice(class_map[c], k_shot+n_query, replace=False)
            for j in idxs[:k_shot]:
                support.append(model.get_features(dataset[j][0].unsqueeze(0).to(device)))
            for j in idxs[k_shot:]:
                query.append(model.get_features(dataset[j][0].unsqueeze(0).to(device)))
                q_labels.append(i)

    support = torch.cat(support)
    query = torch.cat(query)
    q_labels = torch.tensor(q_labels).to(device)

    prototypes = torch.stack([
        support[i*k_shot:(i+1)*k_shot].mean(0)
        for i in range(n_way)
    ])

    preds = torch.cdist(query, prototypes).argmin(1)
    return (preds == q_labels).float().mean().item()


#Zero Shot Learning

In [None]:
def zero_shot_clip(data_dir):
    import clip, os
    from PIL import Image

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model, preprocess = clip.load("ViT-B/32", device=device)
    model.eval()

    classes = sorted(os.listdir(data_dir))
    prompts = [f"a photo of a {c.replace('_',' ')} cat" for c in classes]

    with torch.no_grad():
        text_feat = model.encode_text(clip.tokenize(prompts).to(device))
        text_feat /= text_feat.norm(dim=1, keepdim=True)

    correct, total = 0, 0
    for i, c in enumerate(classes):
        for img in os.listdir(f"{data_dir}/{c}"):
            image = preprocess(Image.open(f"{data_dir}/{c}/{img}").convert("RGB")).unsqueeze(0).to(device)
            with torch.no_grad():
                img_feat = model.encode_image(image)
                img_feat /= img_feat.norm(dim=1, keepdim=True)
                pred = (img_feat @ text_feat.T).argmax().item()
            correct += int(pred == i)
            total += 1

    return correct / total


#Continual Learning

In [None]:
def continual_learning_ewc(model, dataset, classes, num_tasks=3, lambda_ewc=2000):
    device = next(model.parameters()).device
    class_map = defaultdict(list)

    for i in range(len(dataset)):
        _, y = dataset[i]
        class_map[y].append(i)

    fisher, opt_params = {}, {}
    accs = []

    task_size = len(classes) // num_tasks

    for t in range(num_tasks):
        task_classes = classes[t*task_size:(t+1)*task_size]
        idxs = sum([class_map[c] for c in task_classes], [])

        loader = DataLoader(Subset(dataset, idxs), batch_size=32, shuffle=True)
        opt = optim.Adam(model.parameters(), lr=1e-4)

        for _ in range(5):
            for x, y in loader:
                x, y = x.to(device), y.to(device)
                opt.zero_grad()
                loss = F.cross_entropy(model(x), y)
                if fisher:
                    loss += lambda_ewc * sum(
                        (fisher[n] * (p - opt_params[n]).pow(2)).sum()
                        for n,p in model.named_parameters()
                    )
                loss.backward()
                opt.step()

        fisher_new = {n: torch.zeros_like(p) for n,p in model.named_parameters()}
        opt_params = {n: p.clone() for n,p in model.named_parameters()}

        for x, y in loader:
            x, y = x.to(device), y.to(device)
            model.zero_grad()
            F.nll_loss(F.log_softmax(model(x),1), y).backward()
            for n,p in model.named_parameters():
                if p.grad is not None:
                    fisher_new[n] += p.grad.pow(2)

        fisher = {n: fisher.get(n,0)+fisher_new[n]/len(loader) for n in fisher_new}

        correct, total = 0, 0
        for c in classes[:(t+1)*task_size]:
            for i in class_map[c][:20]:
                x,y = dataset[i]
                pred = model(x.unsqueeze(0).to(device)).argmax(1).item()
                correct += int(pred == y)
                total += 1

        accs.append(100 * correct / total)

    return model, accs


#Main Pipeline

In [None]:
data_dir = "/content/cat_species"

train_loader, val_loader, test_loader, classes = setup_data(data_dir)

model = ResidualSE_CNN(num_classes=len(classes))

model = train_model(model, train_loader, val_loader, epochs=30)

test_acc, test_f1 = evaluate_model(model, test_loader)

dataset = ImageFolder(data_dir, transform=transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
]))

few_shot_acc = few_shot_learning(model, dataset)
zero_shot_acc = zero_shot_clip(data_dir)

model_cl = ResidualSE_CNN(len(classes))
model_cl.load_state_dict(model.state_dict())
model_cl, cl_accs = continual_learning_ewc(model_cl, dataset, list(range(len(classes))))

print("\nSUMMARY")
print("Supervised Acc:", test_acc*100)
print("Few-shot Acc:", few_shot_acc*100)
print("Zero-shot Acc:", zero_shot_acc*100)
print("Continual Acc:", sum(cl_accs)/len(cl_accs))


#Results
The custom Res-SE-CNN trained from scratch achieved 15.87% supervised accuracy with a macro F1-score of 0.12. Few-shot learning significantly improved performance to 84.0% accuracy, while zero-shot learning achieved 42.14% accuracy. Continual learning using EWC resulted in an average accuracy of 13.29%, highlighting the impact of catastrophic forgetting.