![alt](https://research.utm.my/wp-content/uploads/sites/26/2022/06/logo-300x122.png)
# Center for Artificial Intelligence and Robotics
#### Universiti Teknologi Malaysia


### Data Acquisition Training

*Author: Dr. Ibrahim, Azzam, Thaqif & Syahmi*

**Resnet 50**

In [2]:
# Imports
import torch
import torch.nn as nn  # All neural network modules, nn.Linear, nn.Conv2d, BatchNorm, Loss functions
import torch.optim as optim  # For all Optimization algorithms, SGD, Adam, etc.
import torchvision.transforms as transforms  # Transformations we can perform on our dataset
import torchvision
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from skimage import io
from skimage.transform import rescale, resize, downscale_local_mean
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision.models import resnet50
from torch.utils.data import (
    Dataset,
    DataLoader,
)  # Gives easier dataset management and creates mini batches
from torchvision.datasets import ImageFolder
from torchvision import transforms
import matplotlib.pyplot as plt

In [3]:
print("CUDA available: ", torch.cuda.is_available())
print("CUDA device count: ", torch.cuda.device_count())
if torch.cuda.is_available():
    print("CUDA device name: ", torch.cuda.get_device_name(0))

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device: ", device)

CUDA available:  True
CUDA device count:  1
CUDA device name:  Orin
Device:  cuda


In [4]:
# Define transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [None]:
def is_valid_file(path):
    # Only accept files with valid extensions and ignore .ipynb_checkpoints directory
    valid_extensions = {'.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'}
    return path.endswith(tuple(valid_extensions))

# Directories for datasets #Replace wit your files directories
train_dir = '___' # FIX ME # Replace with you dataset path
val_dir = '___' # FIX ME # Replace with you dataset path

# Load datasets using the specified method
train_dataset = ImageFolder(root=train_dir, transform=data_transforms['train'], is_valid_file=is_valid_file)
val_dataset = ImageFolder(root=val_dir, transform=data_transforms['val'], is_valid_file=is_valid_file)

train_loader = DataLoader(dataset=train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=8, shuffle=False)

In [None]:
# Initialize ResNet50 model
model = ______ # FIX ME # Replace wtih model name
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 3)  # Adjust the output layer to match the number of classes

model = model.to(device)

In [None]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
# Training the model with a scheduler
num_epochs = 15

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    # Step the scheduler
    scheduler.step()

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = 100. * correct / total

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%")

In [None]:
# Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

val_acc = 100. * correct / total
print(f"Validation Accuracy: {val_acc:.2f}%")

In [None]:
# Save the trained model
model_save_path = os.path.expanduser('~/camera_classification.pth')
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")

In [None]:
# Define class labels #Replace wit your own class
class_names = # FIX ME 

In [None]:
# Prediction function
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

def predict_image(model, image_path, class_names):
    """
    Predict the class of an image using a trained model and display the image.
    
    Args:
    - model (torch.nn.Module): Trained model for prediction.
    - image_path (str): Path to the image file.
    - class_names (list): List of class names.
    
    Returns:
    - str: Formatted prediction result.
    - dict: Probabilities for each class.
    """
    # Transform to match the training preprocessing
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    # Load and preprocess the image
    image = Image.open(image_path).convert('RGB')
    image_tensor = transform(image)
    image_tensor = image_tensor.unsqueeze(0)  # Add batch dimension

    # Move the image to the device
    image_tensor = image_tensor.to(device)

    # Make prediction
    model.eval()
    with torch.no_grad():
        output = model(image_tensor)
        probabilities = F.softmax(output, dim=1).cpu().numpy().squeeze()
        predicted_idx = probabilities.argmax()
    
    predicted_class = class_names[predicted_idx]
    confidence = probabilities[predicted_idx] * 100
    result = f"This image most likely belongs to {predicted_class} with a {confidence:.2f} percent confidence."
    
    # Display the image
    plt.imshow(image)
    plt.title(result)
    plt.axis('off')
    plt.show()
    
    return result, dict(zip(class_names, probabilities))

# Example usage:
image_path = '___' # FIX ME # Replace with your image path
result, probabilities = # FIX ME # Call the model
print(result)
print(f"Probabilities: {probabilities}")