# Prepare the workspace
Check torch version and CUDA status if GPU is enabled.


In [1]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())  # Should return True when GPU is enabled.

2.0.1+cu117
False


# Imports here


In [2]:
import pandas as pd
import numpy as np
import torch
from torch import nn, optim
from torchvision import datasets, transforms, models
import json
from PIL import Image
import matplotlib.pyplot as plt


## Load the data
Here you'll use `torchvision` to load the data.


In [3]:
data_dir = 'flower_data'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
test_dir = data_dir + '/test'


# Define your transforms for the training, validation, and testing sets


In [4]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
}


# Load the datasets with ImageFolder


In [5]:
image_datasets = {
    'train': datasets.ImageFolder(train_dir, transform=data_transforms['train']),
    'valid': datasets.ImageFolder(valid_dir, transform=data_transforms['valid']),
    'test': datasets.ImageFolder(test_dir, transform=data_transforms['test'])
}


# Using the image datasets and the transforms, define the dataloaders


In [6]:
dataloaders = {
    'train': torch.utils.data.DataLoader(image_datasets['train'], batch_size=32, shuffle=True),
    'valid': torch.utils.data.DataLoader(image_datasets['valid'], batch_size=32, shuffle=True),
    'test': torch.utils.data.DataLoader(image_datasets['test'], batch_size=32, shuffle=True)
}


### Label mapping
You'll also need to load in a mapping from category label to category name. You can find this in the file `cat_to_name.json`.

In [7]:
with open('cat_to_name.json', 'r') as f:
    cat_to_name = json.load(f)


# Building and training the classifier
Now that the data is ready, it's time to build and train the classifier.

In [8]:
def train_network(model, dataloaders, criterion, optimizer, epochs=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    for epoch in range(epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in dataloaders['train']:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()  # Zero the gradients
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()  # Accumulate the loss
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        train_accuracy = correct / total
        print(f"Epoch {epoch + 1}/{epochs}")
        print(f"Training Loss: {running_loss / len(dataloaders['train']):.4f}, Accuracy: {train_accuracy:.4f}")
    return model  # Return the trained model


## Testing your network
It's good practice to test your trained network on test data.

In [9]:
def test_network(model, dataloaders, criterion):
    model.eval()  # Set the model to evaluation mode
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloaders['test']:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    test_accuracy = correct / total
    print(f"Test Loss: {running_loss / len(dataloaders['test']):.4f}, Accuracy: {test_accuracy:.4f}")
    return test_accuracy  # Return test accuracy


## Save the checkpoint
Now that your network is trained, save the model so you can load it later for making predictions.

In [10]:
def save_checkpoint(model, optimizer, epoch, loss, file_path='model_checkpoint.pth'):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss
    }
    torch.save(checkpoint, file_path)
    print(f"Checkpoint saved to {file_path}")


## Loading the checkpoint
At this point it's good to write a function that can load a checkpoint and rebuild the model.

In [11]:
def load_model_from_checkpoint(model_class, model_params, checkpoint_path='model_checkpoint.pth'):
    model = model_class(**model_params)
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer = model_params['optimizer_class'](model.parameters(), **model_params['optimizer_params'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    print(f"Checkpoint loaded from {checkpoint_path}. Model trained until epoch {epoch} with loss {loss}")
    return model, optimizer, epoch, loss


## Inference for classification
Now you'll write a function to use a trained network for inference.

In [12]:
def predict_class(image_path, model=None, input_size=(224, 224), mean=None, std=None):
    processed_image = process_image(image_path, input_size, mean, std)
    processed_image = processed_image.unsqueeze(0)
    if model is None:
        model = models.resnet18(pretrained=True)
        model.eval()
    with torch.no_grad():
        output = model(processed_image)
    _, predicted_class = torch.max(output, 1)
    if not hasattr(predict_class, 'imagenet_class_index'):
        url = 'https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json'
        response = requests.get(url)
        predict_class.imagenet_class_index = response.json()
    class_idx = predicted_class.item()
    class_label = predict_class.imagenet_class_index[str(class_idx)][1]
    return class_label, class_idx


## Example usage:
image_path = 'assets/Flowers.png'
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
predicted_label, predicted_class_idx = predict_class(image_path, mean=mean, std=std)
print(f'Predicted class: {predicted_label} (Class index: {predicted_class_idx})')


## Sanity Checking
Now that you can use a trained model for predictions, check to make sure it makes sense.

In [13]:
def display_image_with_predictions(image_path, top5_labels, probs, image_tensor):
    image_tensor = image_tensor.squeeze(0)
    image = transforms.ToPILImage()(image_tensor)
    plt.figure(figsize=(8, 6))
    plt.imshow(image)
    plt.axis('off')
    plt.title("Top 5 Predictions")
    for i in range(5):
        plt.text(0, i*20, f'{top5_labels[i]}: {probs[i]*100:.2f}%', color='white', fontsize=12)
    plt.show()
