In [68]:
import numpy as np
import pandas as pd
import torch
import torchvision
from torchvision import transforms
import os
from PIL import Image
import matplotlib.pyplot as plt

In [69]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [70]:
from torch.utils.data import Dataset

class NestedImageFolder(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.samples = []
        self.class_to_idx = {}

        classes = sorted(os.listdir(root))
        for idx, class_name in enumerate(classes):
            class_dir = os.path.join(root, class_name, 'images')
            if not os.path.isdir(class_dir):
                continue
            self.class_to_idx[class_name] = idx

            for fname in os.listdir(class_dir):
                if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
                    path = os.path.join(class_dir, fname)
                    self.samples.append((path, idx))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, _ = self.samples[idx]  # Ignore label
        img = Image.open(path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img  # Return only image


transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = NestedImageFolder(root='/kaggle/input/tiny-imagenet/tiny-imagenet-200/train/', transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True, num_workers=2)


In [71]:
class vgg16(torch.nn.Module):
    def __init__(self):
        super().__init__()
        vgg = torchvision.models.vgg16(pretrained=True)
        self.features = vgg.features
        self.avgpool = vgg.avgpool
        self.fc1 = torch.nn.Sequential(*list(vgg.classifier.children())[:2])  # Linear + ReLU

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        return x# (1, 4096) → ReLU

teacher = vgg16().to(device).eval()

In [72]:
student = torch.nn.Sequential(
    torch.nn.Conv2d(in_channels=3,out_channels=32,stride=1,kernel_size=2,padding=1),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2),# (None,112,112,32)
    
    torch.nn.Conv2d(in_channels=32,out_channels=64,stride=1,kernel_size=2,padding=1),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2),# (None,56,56,64)
    
    torch.nn.Dropout(0.1),

    torch.nn.Conv2d(in_channels=64,out_channels=128,stride=1,kernel_size=2,padding=1),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2),# (None,28,28,128)
    
    torch.nn.Dropout(0.1),
    
    torch.nn.Conv2d(in_channels=128,out_channels=256,stride=1,kernel_size=2,padding=1),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2),# (None,14,14,256)
    
    torch.nn.Conv2d(in_channels=256,out_channels=512,stride=1,kernel_size=2,padding=2),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2),# (None,8,8,512)

    torch.nn.Conv2d(in_channels=512,out_channels=512,stride=1,kernel_size=2,padding=1),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2),# (None,4,4,512)

    torch.nn.Flatten(),
    torch.nn.Dropout(0.1),
    torch.nn.Linear(in_features=4*4*512,out_features=4096)
)
student = student.to(device)

In [73]:
optimizer = torch.optim.Adam(student.parameters(), lr=1e-4)
loss_fn = torch.nn.MSELoss()
student.train()

Sequential(
  (0): Conv2d(3, 32, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(32, 64, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
  (4): ReLU(inplace=True)
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Dropout(p=0.1, inplace=False)
  (7): Conv2d(64, 128, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Dropout(p=0.1, inplace=False)
  (11): Conv2d(128, 256, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
  (12): ReLU(inplace=True)
  (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (14): Conv2d(256, 512, kernel_size=(2, 2), stride=(1, 1), padding=(2, 2))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False

In [74]:
epochs = 50

for epoch in range(epochs):
    for step, images in enumerate(train_loader):
        images = images.to(device)
        student_output = student(images)
        with torch.no_grad():
            teacher_output = teacher(images)

        loss = loss_fn(teacher_output,student_output)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch : {epoch+1}")
    print(f"Loss ; {loss.item():.4f}")

OutOfMemoryError: CUDA out of memory. Tried to allocate 392.00 MiB. GPU 0 has a total capacity of 15.89 GiB of which 177.12 MiB is free. Process 2566 has 15.71 GiB memory in use. Of the allocated memory 14.88 GiB is allocated by PyTorch, and 546.56 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)