In [5]:
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.models import ViT_B_16_Weights
from PIL import Image

In [6]:
# Data loading and preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  
])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform,  download=True)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32,  shuffle=False)

In [7]:
# Model loading and modification
weights = ViT_B_16_Weights.IMAGENET1K_V1  # Look at the torchvision documentation for the different weights available
model = torchvision.models.vit_b_16(weights=weights)  

# Freeze all layers except the classifier head
for param in model.parameters():
    param.requires_grad = False

num_ftrs = model.heads.head.in_features
model.heads.head = torch.nn.Linear(num_ftrs, len(train_dataset.classes))

In [8]:
# Training setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.heads.head.parameters(), lr=0.001)

In [9]:
# Training loop
num_epochs = 1  # Adjust as needed
for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Evaluation (you can add more detailed metrics here)
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f'Epoch [{epoch+1}/{num_epochs}], Test Accuracy: {100 * correct / total:.2f}%')

Epoch [1/1], Test Accuracy: 94.86%


In [10]:
# Prediction function
def predict_image(image_path):
    image = Image.open(image_path)
    image = transform(image).unsqueeze(0).to(device)  # Add batch dimension
    with torch.no_grad():
        output = model(image)
        _, predicted_class = torch.max(output.data, 1)
        return train_dataset.classes[predicted_class.item()]

In [11]:
# Example usage
predicted_class = predict_image("plane.jpg")
print(f"Predicted class: {predicted_class}")

Predicted class: airplane
