In [None]:
from __future__ import print_function, division
import os
import time
import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from shutil import copyfile

# Install kagglehub and import it
!pip install kagglehub
import kagglehub

# Download the dataset using kagglehub
path = kagglehub.dataset_download("akash2sharma/tiny-imagenet")
print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/akash2sharma/tiny-imagenet?dataset_version_number=1...


100%|██████████| 474M/474M [00:24<00:00, 20.5MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/akash2sharma/tiny-imagenet/versions/1


In [None]:
import os
import time
import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, WeightedRandomSampler
from torchvision import datasets, transforms, models
import numpy as np
import torch.nn.functional as F
from shutil import copyfile

# Check for GPU availability
use_gpu = torch.cuda.is_available()
device = torch.device("cuda:0" if use_gpu else "cpu")
if use_gpu:
    print("Using CUDA")
else:
    print("Not Using CUDA")

# Dataset directories
dataset_path = "/root/.cache/kagglehub/datasets/akash2sharma/tiny-imagenet/versions/1"
train_dir = os.path.join(dataset_path, 'tiny-imagenet-200', 'tiny-imagenet-200', 'train')
val_dir = os.path.join(dataset_path, 'tiny-imagenet-200', 'tiny-imagenet-200', 'val')
val_annotations_file = os.path.join(val_dir, "val_annotations.txt")
val_images_dir = os.path.join(val_dir, "images")
organized_val_dir = "/content/tiny-imagenet-val-organized/"  # Temporary directory

# Number of classes and batch size
num_classes = 200  # Tiny ImageNet has 200 classes
batch_size = 128
num_epochs = 12
feature_extract = True
train_model = True

# Transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
    'val': transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ]),
}

# Organize validation dataset
if not os.path.exists(organized_val_dir):
    os.makedirs(organized_val_dir)
    print(f"Created directory: {organized_val_dir}")

with open(val_annotations_file, "r") as f:
    for line in f.readlines():
        parts = line.split("\t")
        img_name, class_id = parts[0], parts[1]
        class_dir = os.path.join(organized_val_dir, class_id)
        if not os.path.exists(class_dir):
            os.makedirs(class_dir)
        src_path = os.path.join(val_images_dir, img_name)
        dest_path = os.path.join(class_dir, img_name)
        if os.path.exists(src_path):
            copyfile(src_path, dest_path)

# Load datasets
train_dataset = datasets.ImageFolder(train_dir, transform=data_transforms['train'])
val_dataset = datasets.ImageFolder(organized_val_dir, transform=data_transforms['val'])

# Debugging: Ensure labels are in the correct range
print(f"Train dataset classes: {train_dataset.classes}")
print(f"Validation dataset classes: {val_dataset.classes}")

# Weighted Sampler for balancing classes
class_counts = np.bincount([s[1] for s in train_dataset.samples])
class_weights = 1. / class_counts
samples_weights = [class_weights[label] for _, label in train_dataset.samples]
sampler = WeightedRandomSampler(samples_weights, len(samples_weights), replacement=True)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

dataloaders_dict = {'train': train_loader, 'val': val_loader}

# Hybrid Model Definition
class HybridModel(nn.Module):
    def __init__(self, num_classes, use_pretrained=True, feature_extract=True):
        super(HybridModel, self).__init__()

        # VGG19
        self.vgg = models.vgg19_bn(pretrained=use_pretrained)
        self.set_parameter_requires_grad(self.vgg, feature_extract)
        num_ftrs_vgg = self.vgg.classifier[6].in_features
        self.vgg.classifier[6] = nn.Linear(num_ftrs_vgg, num_classes)

        # Vision Transformer
        self.vit = models.vit_b_16(pretrained=use_pretrained)
        self.set_parameter_requires_grad(self.vit, feature_extract)
        num_ftrs_vit = self.vit.heads.head.in_features
        self.vit.heads.head = nn.Linear(num_ftrs_vit, num_classes)

        # Combined classifier
        self.classifier = nn.Linear(num_classes * 2, num_classes)

    def set_parameter_requires_grad(self, model, feature_extracting):
        if feature_extracting:
            for param in model.parameters():
                param.requires_grad = False

    def forward(self, x):
        vgg_out = self.vgg(x)
        vit_out = self.vit(x)
        combined_out = torch.cat((vgg_out, vit_out), dim=1)
        output = self.classifier(combined_out)
        return output

# Initialize the model
model_name = "hybrid_vgg_vit"
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    if model_name == "hybrid_vgg_vit":
        model = HybridModel(num_classes, use_pretrained, feature_extract)
        input_size = 224
    else:
        raise ValueError("Invalid model name")
    return model, input_size

model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
model_ft = model_ft.to(device)

# Optimizer, Loss Function, Scheduler
params_to_update = model_ft.parameters()
optimizer_ft = optim.SGD(params_to_update, lr=0.01, momentum=0.9)
criterion = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=4, gamma=0.1)

# Training function with error checks
def train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs=25, patience=5):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = float('inf')
    early_stop_counter = 0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)

                # Debugging: Check the label range
                assert labels.max() < num_classes, f"Invalid label found: {labels.max()}"

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Save best model
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
                early_stop_counter = 0
            elif phase == 'val':
                early_stop_counter += 1

        # Step the scheduler
        scheduler.step()

        if early_stop_counter >= patience:
            print("Early stopping triggered")
            break

    print('Training complete')
    model.load_state_dict(best_model_wts)
    return model

# Train the model
model_ft = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, scheduler, num_epochs=num_epochs)


Using CUDA
Created directory: /content/tiny-imagenet-val-organized/
Train dataset classes: ['n01443537', 'n01629819', 'n01641577', 'n01644900', 'n01698640', 'n01742172', 'n01768244', 'n01770393', 'n01774384', 'n01774750', 'n01784675', 'n01855672', 'n01882714', 'n01910747', 'n01917289', 'n01944390', 'n01945685', 'n01950731', 'n01983481', 'n01984695', 'n02002724', 'n02056570', 'n02058221', 'n02074367', 'n02085620', 'n02094433', 'n02099601', 'n02099712', 'n02106662', 'n02113799', 'n02123045', 'n02123394', 'n02124075', 'n02125311', 'n02129165', 'n02132136', 'n02165456', 'n02190166', 'n02206856', 'n02226429', 'n02231487', 'n02233338', 'n02236044', 'n02268443', 'n02279972', 'n02281406', 'n02321529', 'n02364673', 'n02395406', 'n02403003', 'n02410509', 'n02415577', 'n02423022', 'n02437312', 'n02480495', 'n02481823', 'n02486410', 'n02504458', 'n02509815', 'n02666196', 'n02669723', 'n02699494', 'n02730930', 'n02769748', 'n02788148', 'n02791270', 'n02793495', 'n02795169', 'n02802426', 'n02808440'

Downloading: "https://download.pytorch.org/models/vgg19_bn-c79401a0.pth" to /root/.cache/torch/hub/checkpoints/vgg19_bn-c79401a0.pth
100%|██████████| 548M/548M [00:02<00:00, 213MB/s]
Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:01<00:00, 222MB/s]


Epoch 0/11
----------
train Loss: 2.5200 Acc: 0.4862
val Loss: 0.9400 Acc: 0.7677
Epoch 1/11
----------
train Loss: 1.5467 Acc: 0.6326
val Loss: 0.7599 Acc: 0.8014
Epoch 2/11
----------
train Loss: 1.4274 Acc: 0.6570
val Loss: 0.7087 Acc: 0.8118
Epoch 3/11
----------
train Loss: 1.3697 Acc: 0.6693
val Loss: 0.6876 Acc: 0.8200
Epoch 4/11
----------
train Loss: 1.3289 Acc: 0.6789
val Loss: 0.6728 Acc: 0.8237
Epoch 5/11
----------
train Loss: 1.3232 Acc: 0.6806
val Loss: 0.6675 Acc: 0.8224
Epoch 6/11
----------
train Loss: 1.3085 Acc: 0.6835
val Loss: 0.6643 Acc: 0.8241
Epoch 7/11
----------
train Loss: 1.3066 Acc: 0.6830
val Loss: 0.6623 Acc: 0.8245
Epoch 8/11
----------
train Loss: 1.3013 Acc: 0.6837
val Loss: 0.6618 Acc: 0.8247
Epoch 9/11
----------
train Loss: 1.3047 Acc: 0.6822
val Loss: 0.6608 Acc: 0.8248
Epoch 10/11
----------
train Loss: 1.3024 Acc: 0.6835
val Loss: 0.6605 Acc: 0.8248
Epoch 11/11
----------
train Loss: 1.3016 Acc: 0.6836
val Loss: 0.6602 Acc: 0.8247
Training compl

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
# Define the path in Google Drive where you want to save the model
save_dir = "/content/drive/My Drive/saved_models"
os.makedirs(save_dir, exist_ok=True)  # Create the directory if it doesn't exist

# Save the model
model_path = os.path.join(save_dir, "hybrid_vgg_vit_tiny_imagenet.pth")
torch.save(model_ft.state_dict(), model_path)

print(f"Model saved to {model_path}")


Mounted at /content/drive
Model saved to /content/drive/My Drive/saved_models/hybrid_vgg_vit_tiny_imagenet.pth


In [None]:
# Refined Knowledge Distillation Loss
class KnowledgeDistillationLoss(nn.Module):
    def __init__(self, temperature=3.0, alpha=0.7):
        super(KnowledgeDistillationLoss, self).__init__()
        self.temperature = temperature
        self.alpha = alpha
        self.ce_loss = nn.CrossEntropyLoss()  # Hard target loss
        self.kl_loss = nn.KLDivLoss(reduction="batchmean")  # Soft target loss

    def forward(self, student_logits, teacher_logits, labels):
        # Compute soft targets (teacher logits scaled by temperature)
        soft_teacher = torch.nn.functional.softmax(teacher_logits / self.temperature, dim=1)
        soft_student = torch.nn.functional.log_softmax(student_logits / self.temperature, dim=1)
        kl_div = self.kl_loss(soft_student, soft_teacher)

        # Hard target loss
        ce_loss = self.ce_loss(student_logits, labels)

        # Combined loss
        return self.alpha * kl_div * (self.temperature ** 2) + (1 - self.alpha) * ce_loss

# Initialize Student Model
def initialize_student_model(num_classes):
    student_model = models.efficientnet_b0(weights="IMAGENET1K_V1")  # Use the recommended weights
    student_model.classifier[1] = nn.Linear(student_model.classifier[1].in_features, num_classes)

    # Optionally freeze some layers to stabilize training
    for param in student_model.features.parameters():
        param.requires_grad = False

    return student_model.to(device)

# Training Knowledge Distillation
def train_knowledge_distillation(teacher_model, student_model, dataloaders, criterion, optimizer, scheduler, num_epochs=12, patience=5):
    best_model_wts = copy.deepcopy(student_model.state_dict())
    best_loss = float('inf')
    early_stop_counter = 0

    for epoch in range(num_epochs):
        print(f"Epoch {epoch}/{num_epochs - 1}")
        print("-" * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                teacher_model.eval()  # Teacher is fixed
                student_model.train()
            else:
                student_model.eval()

            running_loss = 0.0
            correct = 0
            total = 0

            for inputs, labels in tqdm(dataloaders[phase]):
                inputs, labels = inputs.to(device), labels.to(device)

                with torch.no_grad():
                    teacher_logits = teacher_model(inputs)  # Teacher predictions

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    student_logits = student_model(inputs)  # Student predictions
                    loss = criterion(student_logits, teacher_logits, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(student_logits, 1)
                correct += torch.sum(preds == labels.data)
                total += labels.size(0)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = correct.double() / total

            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(student_model.state_dict())
                early_stop_counter = 0
            elif phase == 'val':
                early_stop_counter += 1

        scheduler.step()

        if early_stop_counter >= patience:
            print("Early stopping triggered")
            break

    student_model.load_state_dict(best_model_wts)
    print("Knowledge Distillation complete")
    return student_model

# Main Execution
if __name__ == "__main__":
    # Hyperparameters
    num_classes = 200  # Update for your dataset
    num_epochs = 12
    patience = 5
    lr = 0.001

    # Teacher Model (Already trained HybridModel loaded)
    teacher_model = model_ft.to(device)  # Replace `model_ft` with your loaded teacher model variable

    # Student Model
    student_model = initialize_student_model(num_classes)

    # Loss and Optimizer
    distillation_loss = KnowledgeDistillationLoss(temperature=3.0, alpha=0.7)
    optimizer = optim.Adam(student_model.parameters(), lr=lr)  # Use Adam for better stability
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)

    # Train the student model
    trained_student_model = train_knowledge_distillation(
        teacher_model, student_model, dataloaders_dict, distillation_loss, optimizer, scheduler, num_epochs=num_epochs, patience=patience
    )

    # Save the trained student model
    save_dir = "/content/drive/My Drive/saved_models"
    os.makedirs(save_dir, exist_ok=True)
    student_model_path = os.path.join(save_dir, "efficientnet_student_fixed.pth")
    torch.save(trained_student_model.state_dict(), student_model_path)
    print(f"Student model saved to {student_model_path}")


Epoch 0/11
----------


100%|██████████| 782/782 [08:00<00:00,  1.63it/s]


train Loss: 3.4648 Acc: 0.2712


100%|██████████| 79/79 [00:45<00:00,  1.73it/s]


val Loss: 3.0470 Acc: 0.4665
Epoch 1/11
----------


100%|██████████| 782/782 [08:01<00:00,  1.62it/s]


train Loss: 2.8888 Acc: 0.3339


100%|██████████| 79/79 [00:45<00:00,  1.74it/s]


val Loss: 2.7646 Acc: 0.4951
Epoch 2/11
----------


100%|██████████| 782/782 [08:01<00:00,  1.62it/s]


train Loss: 2.8222 Acc: 0.3414


100%|██████████| 79/79 [00:45<00:00,  1.73it/s]


val Loss: 2.6188 Acc: 0.5165
Epoch 3/11
----------


100%|██████████| 782/782 [08:02<00:00,  1.62it/s]


train Loss: 2.7964 Acc: 0.3467


100%|██████████| 79/79 [00:45<00:00,  1.74it/s]


val Loss: 2.6057 Acc: 0.5182
Epoch 4/11
----------


100%|██████████| 782/782 [08:02<00:00,  1.62it/s]


train Loss: 2.7568 Acc: 0.3561


100%|██████████| 79/79 [00:45<00:00,  1.73it/s]


val Loss: 2.5622 Acc: 0.5278
Epoch 5/11
----------


100%|██████████| 782/782 [08:02<00:00,  1.62it/s]


train Loss: 2.7483 Acc: 0.3558


100%|██████████| 79/79 [00:45<00:00,  1.73it/s]


val Loss: 2.5536 Acc: 0.5273
Epoch 6/11
----------


100%|██████████| 782/782 [08:02<00:00,  1.62it/s]


train Loss: 2.7416 Acc: 0.3561


100%|██████████| 79/79 [00:45<00:00,  1.73it/s]


val Loss: 2.5616 Acc: 0.5234
Epoch 7/11
----------


100%|██████████| 782/782 [08:02<00:00,  1.62it/s]


train Loss: 2.7293 Acc: 0.3600


100%|██████████| 79/79 [00:45<00:00,  1.73it/s]


val Loss: 2.5594 Acc: 0.5246
Epoch 8/11
----------


100%|██████████| 782/782 [08:01<00:00,  1.62it/s]


train Loss: 2.7354 Acc: 0.3582


100%|██████████| 79/79 [00:45<00:00,  1.73it/s]


val Loss: 2.5520 Acc: 0.5256
Epoch 9/11
----------


100%|██████████| 782/782 [08:01<00:00,  1.62it/s]


train Loss: 2.7349 Acc: 0.3591


100%|██████████| 79/79 [00:45<00:00,  1.73it/s]


val Loss: 2.5101 Acc: 0.5316
Epoch 10/11
----------


100%|██████████| 782/782 [08:01<00:00,  1.62it/s]


train Loss: 2.7317 Acc: 0.3568


100%|██████████| 79/79 [00:45<00:00,  1.74it/s]


val Loss: 2.5670 Acc: 0.5241
Epoch 11/11
----------


100%|██████████| 782/782 [08:01<00:00,  1.62it/s]


train Loss: 2.7352 Acc: 0.3584


100%|██████████| 79/79 [00:45<00:00,  1.73it/s]

val Loss: 2.5723 Acc: 0.5219
Knowledge Distillation complete
Student model saved to /content/drive/My Drive/saved_models/efficientnet_student_fixed.pth





In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm

# Evaluation function
def evaluate_model(model, dataloader, num_classes):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(dataloader, desc="Evaluating"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average="macro", zero_division=0)
    recall = recall_score(all_labels, all_preds, average="macro", zero_division=0)
    f1 = f1_score(all_labels, all_preds, average="macro", zero_division=0)

    return accuracy, precision, recall, f1

# Evaluate Teacher Model
print("\nEvaluating Teacher Model:")
teacher_accuracy, teacher_precision, teacher_recall, teacher_f1 = evaluate_model(teacher_model, dataloaders_dict["val"], num_classes)
print(f"Teacher Model - Accuracy: {teacher_accuracy:.4f}, Precision: {teacher_precision:.4f}, Recall: {teacher_recall:.4f}, F1-Score: {teacher_f1:.4f}")

# Evaluate Student Model
print("\nEvaluating Student Model:")
student_accuracy, student_precision, student_recall, student_f1 = evaluate_model(trained_student_model, dataloaders_dict["val"], num_classes)
print(f"Student Model - Accuracy: {student_accuracy:.4f}, Precision: {student_precision:.4f}, Recall: {student_recall:.4f}, F1-Score: {student_f1:.4f}")



Evaluating Teacher Model:


Evaluating: 100%|██████████| 79/79 [00:43<00:00,  1.80it/s]


Teacher Model - Accuracy: 0.8247, Precision: 0.8280, Recall: 0.8247, F1-Score: 0.8247

Evaluating Student Model:


Evaluating: 100%|██████████| 79/79 [00:18<00:00,  4.26it/s]

Student Model - Accuracy: 0.5316, Precision: 0.5385, Recall: 0.5316, F1-Score: 0.5233



