In [1]:
import torch
import torchvision
import torch.nn as nn
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
from PIL import Image
import os
from torchvision.datasets import OxfordIIITPet

In [2]:
# Step 1: Set device (GPU if available, else CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
# Step 2: Define data transforms
# We need to match MobileNet's expected input size (224x224) and normalization
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [4]:
# Step 3: Load and transform the dataset
# Replace the dataset loading section with Oxford-IIIT Pet Dataset
print("Downloading and preparing the Oxford-IIIT Pet Dataset...")
dataset = OxfordIIITPet(
    root='./data',
    split='trainval',
    download=True,
    transform=data_transforms
)

# Split dataset into train and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Downloading and preparing the Oxford-IIIT Pet Dataset...
Downloading https://thor.robots.ox.ac.uk/pets/images.tar.gz to data/oxford-iiit-pet/images.tar.gz


100%|██████████| 792M/792M [00:30<00:00, 25.9MB/s]


Extracting data/oxford-iiit-pet/images.tar.gz to data/oxford-iiit-pet
Downloading https://thor.robots.ox.ac.uk/pets/annotations.tar.gz to data/oxford-iiit-pet/annotations.tar.gz


100%|██████████| 19.2M/19.2M [00:01<00:00, 10.6MB/s]


Extracting data/oxford-iiit-pet/annotations.tar.gz to data/oxford-iiit-pet


In [5]:
# Step 4: Load pre-trained MobileNet model
# We're using MobileNetV2 which is smaller and faster than many other models
model = torchvision.models.mobilenet_v2(pretrained=True)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 128MB/s]


In [6]:
# Step 5: Freeze the feature extraction layers
# This prevents the pre-trained weights from being updated during initial training
for param in model.features.parameters():
    param.requires_grad = False

In [7]:
# Step 6: Modify the classifier for our classification task
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 37)  # 37 classes in Oxford-IIIT
model = model.to(device)

In [8]:
# Step 7: Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
# We only optimize the classifier parameters to speed up training
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

In [9]:
# Step 8: Training function
def train_model(model, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        epoch_loss = running_loss / len(train_loader)
        accuracy = 100. * correct / total
        print(f'Epoch {epoch+1}: Loss = {epoch_loss:.4f}, Accuracy = {accuracy:.2f}%')

In [10]:
# Step 9: Testing function
def test_model(model):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    accuracy = 100. * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy

# Add new functions for prediction
def load_and_preprocess_image(image_path):
    """Load and preprocess a single image for prediction"""
    image = Image.open(image_path)
    return data_transforms(image).unsqueeze(0)  # Add batch dimension

def get_class_name(idx):
    """Convert class index to pet breed name"""
    class_names = dataset.classes
    return class_names[idx]

def predict_image(model, image_path):
    """Predict the class of a single image"""
    model.eval()
    with torch.no_grad():
        image = load_and_preprocess_image(image_path).to(device)
        outputs = model(image)
        _, predicted = outputs.max(1)
        return predicted.item(), get_class_name(predicted.item())

def load_saved_model():
    """Load the saved model"""
    model = torchvision.models.mobilenet_v2(pretrained=False)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, 37)
    model.load_state_dict(torch.load('cat_dog_classifier.pth'))
    model = model.to(device)
    return model

In [11]:
# Step 10: Train and test the model
if __name__ == "__main__":
    # Training section
    print("Starting training...")
    train_model(model)
    print("\nTesting the model...")
    test_accuracy = test_model(model)

    # Save the model
    torch.save(model.state_dict(), 'cat_dog_classifier.pth')
    print("\nModel saved successfully!")

    # Prediction section
    print("\nPrediction Demo:")
    # Load the saved model
    loaded_model = load_saved_model()
    loaded_model.eval()

    # Example of predicting images from a test directory
    test_dir = '/content/test_images'  # Create this directory and put some test images in it
    if os.path.exists(test_dir):
        for image_file in os.listdir(test_dir):
            if image_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(test_dir, image_file)
                _, breed_name = predict_image(loaded_model, image_path)
                print(f"Image: {image_file} -> Predicted: {breed_name}")


Starting training...
Epoch 1: Loss = 1.7612, Accuracy = 59.68%
Epoch 2: Loss = 0.6381, Accuracy = 86.48%
Epoch 3: Loss = 0.4357, Accuracy = 89.50%
Epoch 4: Loss = 0.3411, Accuracy = 92.05%
Epoch 5: Loss = 0.2902, Accuracy = 93.48%
Epoch 6: Loss = 0.2422, Accuracy = 94.36%
Epoch 7: Loss = 0.2096, Accuracy = 95.31%
Epoch 8: Loss = 0.1882, Accuracy = 95.89%
Epoch 9: Loss = 0.1757, Accuracy = 95.89%
Epoch 10: Loss = 0.1645, Accuracy = 96.30%

Testing the model...
Test Accuracy: 89.27%

Model saved successfully!

Prediction Demo:


  model.load_state_dict(torch.load('cat_dog_classifier.pth'))


Image: 3.jpg -> Predicted: Egyptian Mau
Image: 1.jpg -> Predicted: Bengal
Image: 2.jpeg -> Predicted: Egyptian Mau
