In [5]:
# Lab Session: Tuesday
# Demo: 12/12/2024
# Anishka Raina(Ar933), YI GU(yg642), Group 5
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split

# Paths
train_dir = '/home/gu/Desktop/5725/dataset/train'

# Hyperparameters
batch_size = 32
img_size = 224
epochs = 10
learning_rate = 0.001
validation_split = 0.2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data Transforms
transform = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load Dataset
dataset = datasets.ImageFolder(train_dir, transform=transform)

# Split Dataset
train_size = int((1 - validation_split) * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Data Loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Load Pretrained MobileNet
model = models.mobilenet_v2(pretrained=True)
model.classifier[1] = nn.Linear(model.last_channel, len(dataset.classes))  # Adjust for your number of classes
model = model.to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training Loop
for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_accuracy = 100. * correct / total
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {train_loss / len(train_loader):.4f}, Accuracy: {train_accuracy:.2f}%")

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    val_accuracy = 100. * correct / total
    print(f"Validation Loss: {val_loss / len(val_loader):.4f}, Accuracy: {val_accuracy:.2f}%")

# Save the Model
torch.save(model.state_dict(), "mobilenet_v2.pth")
print("Model saved as mobilenet_v2.pth")


Epoch 1/10, Loss: 0.3815, Accuracy: 90.83%
Validation Loss: 0.1255, Accuracy: 95.36%
Epoch 2/10, Loss: 0.0360, Accuracy: 99.33%
Validation Loss: 0.7231, Accuracy: 66.23%
Epoch 3/10, Loss: 0.0120, Accuracy: 99.83%
Validation Loss: 0.0013, Accuracy: 100.00%
Epoch 4/10, Loss: 0.0144, Accuracy: 99.50%
Validation Loss: 0.0746, Accuracy: 98.01%
Epoch 5/10, Loss: 0.0667, Accuracy: 98.50%
Validation Loss: 0.0574, Accuracy: 98.68%
Epoch 6/10, Loss: 0.0565, Accuracy: 98.00%
Validation Loss: 0.2103, Accuracy: 94.04%
Epoch 7/10, Loss: 0.0695, Accuracy: 98.17%
Validation Loss: 0.1180, Accuracy: 96.69%
Epoch 8/10, Loss: 0.0563, Accuracy: 98.50%
Validation Loss: 0.0821, Accuracy: 97.35%
Epoch 9/10, Loss: 0.0113, Accuracy: 99.83%
Validation Loss: 0.0013, Accuracy: 100.00%
Epoch 10/10, Loss: 0.0140, Accuracy: 99.50%
Validation Loss: 0.0074, Accuracy: 100.00%
Model saved as mobilenet_v2.pth


In [None]:
import matplotlib.pyplot as plt
from torchvision.transforms.functional import to_pil_image

def denormalize(tensor, mean, std):
    """ Denormalize the tensor back to original image values. """
    for t, m, s in zip(tensor, mean, std):
        t.mul_(s).add_(m)
    return tensor

def test_model_with_predictions(model, test_dir, batch_size, transform, device, class_names, mean, std):
    # Load the test dataset
    test_dataset = datasets.ImageFolder(test_dir, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    model.eval()
    correct = 0
    total = 0
    test_loss = 0.0
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            # Predictions
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            # Display predictions: 4 images per row
            for i in range(0, inputs.size(0), 4):
                fig, axes = plt.subplots(1, 4, figsize=(16, 8))
                for j, ax in enumerate(axes):
                    if i + j >= inputs.size(0):
                        break
                    img = inputs[i + j].cpu()

                    # Denormalize the image
                    img = denormalize(img, mean, std)

                    # Convert the tensor to a PIL image
                    img = to_pil_image(img)

                    true_label = class_names[labels[i + j].item()]
                    predicted_label = class_names[predicted[i + j].item()]
                    ax.imshow(img)
                    ax.set_title(f"True: {true_label}\nPredicted: {predicted_label}", fontsize=10)
                    ax.axis('off')
                plt.tight_layout()
                plt.show()

    accuracy = 100. * correct / total
    avg_loss = test_loss / len(test_loader)

    print(f"Test Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")

# Paths
test_dir = '/home/gu/Desktop/5725/dataset/test'

# Class names
class_names = dataset.classes

# Mean and std for denormalization (adjust according to your normalization values)
mean = [0.485, 0.456, 0.406]  # Example mean for ImageNet normalization
std = [0.229, 0.224, 0.225]   # Example std for ImageNet normalization

# Call the test function
test_model_with_predictions(model, test_dir, batch_size, transform, device, class_names, mean, std)


In [4]:
import torch
from torchvision import models, transforms
from PIL import Image

def load_model(model_path, device, num_classes):
    # Load MobileNetV2 architecture
    model = models.mobilenet_v2(pretrained=False)
    
    # Adjust the classifier to match the number of classes
    model.classifier[1] = torch.nn.Linear(model.last_channel, num_classes)
    
    # Load the saved state_dict
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()
    return model

def predict_single_image(model, image_path, transform, device, class_names):
    # Load and preprocess the image
    image = Image.open(image_path).convert('RGB')
    input_tensor = transform(image).unsqueeze(0)  # Add batch dimension
    input_tensor = input_tensor.to(device)

    with torch.no_grad():
        # Forward pass
        outputs = model(input_tensor)
        _, predicted = outputs.max(1)
        predicted_label = class_names[predicted.item()]

    return predicted_label

# Paths
model_path = 'mobilenet_v2.pth'
image_path = 'dataset/test/egg/IMG_5276.jpg'

# Define device (use CUDA if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Class names
class_names = ['banana', 'broccoli', 'butter', 'corn','cream', 'egg','garlic','onion','pepper','potato', 'tomato']
num_classes = len(class_names)

# Load the model
model = load_model(model_path, device, num_classes)

# Define the transform (should match the one used during training)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Adjust based on your model's input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Predict the label for the given image
predicted_label = predict_single_image(model, image_path, transform, device, class_names)
print(f"Predicted Label: {predicted_label}")


Predicted Label: egg
