In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets

# Define a simple CNN teacher model (larger model)
class TeacherModel(nn.Module):
    def __init__(self):
        super(TeacherModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.fc = nn.Linear(32 * 32 * 32, 10)  # Example: 10 classes

    def forward(self, x):
        x = self.conv1(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Define a simpler CNN student model
class StudentModel(nn.Module):
    def __init__(self):
        super(StudentModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.fc = nn.Linear(16 * 32 * 32, 10)  # Example: 10 classes

    def forward(self, x):
        x = self.conv1(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [None]:
# Load CIFAR-10 dataset as an example
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch
import torch.nn.functional as F

def custom_kl_div_loss(student_logits, teacher_logits, reduction='batchmean'):
    # Apply softmax to the logits
    student_probs = F.softmax(student_logits, dim=1)
    teacher_probs = F.softmax(teacher_logits, dim=1)

    # Compute KL Divergence
    kl_div = torch.sum(teacher_probs * (torch.log(teacher_probs) - torch.log(student_probs)), dim=1)

    if reduction == 'none':
        return kl_div
    elif reduction == 'sum':
        return torch.sum(kl_div)
    elif reduction == 'mean':
        return torch.mean(kl_div)
    elif reduction == 'batchmean':
        return torch.mean(kl_div)

In [None]:
# Initialize teacher and student models
teacher_model = TeacherModel()
student_model = StudentModel()

# Define loss functions
criterion = nn.CrossEntropyLoss()
#distillation_criterion = nn.KLDivLoss()
# Define optimizer for the student model
optimizer1 = optim.Adam(teacher_model.parameters(), lr=0.001)
optimizer2 = optim.Adam(student_model.parameters(), lr=0.001)


In [None]:
# Training loop for Teacher model model
num_epochs = 1
alpha = 0.7
T=0.5
for epoch in range(num_epochs):
    teacher_model.train()
    for inputs, labels in train_dataset:
        optimizer1.zero_grad()

        # Forward pass through the models
        teacher_logits = teacher_model(inputs.unsqueeze(0))

        labels = torch.tensor([labels])
        ce_loss = criterion(teacher_logits, labels)
        total_loss = ce_loss

        total_loss.backward()
        optimizer1.step()

In [None]:
# Training loop for student model
num_epochs = 1
alpha = 0.7
T=0.5
for epoch in range(num_epochs):
    student_model.train()
    for inputs, labels in train_dataset:
        optimizer2.zero_grad()

        # Forward pass through the models
        teacher_logits = teacher_model(inputs.unsqueeze(0))
        student_logits = student_model(inputs.unsqueeze(0))
        labels = torch.tensor([labels])
        ce_loss = criterion(student_logits, labels)
        distillation_loss = custom_kl_div_loss(student_logits/T,teacher_logits/T)
        total_loss = ce_loss + (alpha * distillation_loss)

        total_loss.backward()
        optimizer2.step()

In [None]:
# Validation loop
student_model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_dataset:
        student_logits = student_model(inputs.unsqueeze(0))
        _, predicted = torch.max(student_logits, 1)
        total += 1
        correct += (predicted == labels).sum().item()

print(f'Validation Accuracy: {100 * correct / total:.2f}%')

# Save the trained student model for later use
torch.save(student_model.state_dict(), 'student_model.pth')

Validation Accuracy: 10.00%


In [None]:
# Testing loop
student_model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_dataset:
        student_logits = student_model(inputs.unsqueeze(0))
        _, predicted = torch.max(student_logits, 1)
        total += 1
        correct += (predicted == labels).sum().item()

print(f'Test Accuracy: {100 * correct / total:.2f}%')

Test Accuracy: 10.00%


In [None]:
for inputs, labels in train_dataset:
    inputs


In [None]:
inputs.size()
x=student_model(inputs.unsqueeze(0))
labels
x

tensor([[-0.1038, -0.2898, -0.2397,  0.3103,  0.0420, -0.3298, -0.1199,  0.0911,
         -0.2800, -0.0858]], grad_fn=<AddmmBackward0>)

In [None]:
x.size()

torch.Size([1, 10])

In [None]:
predicted = torch.max(x, 1)
predicted

torch.return_types.max(
values=tensor([0.3103], grad_fn=<MaxBackward0>),
indices=tensor([3]))

# Knowledge distillation from ResNet18

In [None]:
# Load CIFAR-10 dataset as an example
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)

In [None]:
# child model
student_model = StudentModel()
# Define loss functions
criterion = nn.CrossEntropyLoss()
#distillation_criterion = nn.KLDivLoss()
# Define optimizer for the student model
optimizer1 = optim.Adam(teacher_model.parameters(), lr=0.001)
optimizer2 = optim.Adam(student_model.parameters(), lr=0.001)

In [None]:
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import requests
import torch
import torch.nn as nn

# Load pre-trained ResNet-18 model
modelteacher = models.resnet18(pretrained=True)
modelteacher.eval()
# child model
class SimpleStudModel(nn.Module):
    def __init__(self, num_classes):
        super(SimpleStudModel, self).__init__()
        # Define a simple architecture with fewer parameters
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Define CIFAR-specific transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match ResNet input size
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
# Instantiate the student model with the number of classes in your task
num_classes = 10  # You may adjust this based on your specific task
student_model = SimpleStudModel(num_classes)

# Function to perform inference on an image
def inference(image_path):
    image = Image.open(image_path)
    image_tensor = transform(image).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        output = modelteacher(image_tensor)
    probabilities = torch.nn.functional.softmax(output[0], dim=0)
    return probabilities

# Example usage
image_path = "/content/Sans titre.jpeg"  # Replace with the actual image path

result = inference(image_path)
class_idx = torch.argmax(result).item()

# You may need to download the CIFAR-10 class labels from https://www.cs.toronto.edu/~kriz/cifar.html
# and load them here to interpret the output class index.

print(f"Predicted class index: {class_idx}")
print(len(result))

# Training loop for student model
num_epochs = 1
alpha = 0.7
T=0.5
for epoch in range(num_epochs):
    student_model.train()
    for inputs, labels in train_dataset:
        optimizer2.zero_grad()

        # Forward pass through the models
        teacher_logits = modelteacher(inputs.unsqueeze(0))
        student_logits = student_model(inputs.unsqueeze(0))
        labels = torch.tensor([labels])
        ce_loss = criterion(student_logits, labels)
        distillation_loss = custom_kl_div_loss(student_logits/T,teacher_logits/T)
        total_loss = ce_loss + (alpha * distillation_loss)

        total_loss.backward()
        optimizer2.step()




Predicted class index: 257
1000
