<a href="https://colab.research.google.com/github/SEXYLIULIU/assignment-2-thermal-dog-and-person/blob/main/Assignment2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import os

# List all files in /content to confirm the file name and location
print("Files in /content:")
print(os.listdir('/content'))


Files in /content:
['.config', 'thermal_data', 'sample_data']


**1.Organize extracted file with ideal structure**

In [7]:
import zipfile
import os

# Define the path to the uploaded zip files
dog_zip_path = '/content/Thermal Dogs and People.v5-raw-images_dogclassonly.yolov11.zip'
person_zip_path = '/content/Thermal Dogs and People.v4-raw-images_personclassonly.yolov11.zip'

# Define the extraction directories
dog_extract_dir = '/content/thermal_data/dog'
person_extract_dir = '/content/thermal_data/person'

# Ensure extraction directories exist
os.makedirs(dog_extract_dir, exist_ok=True)
os.makedirs(person_extract_dir, exist_ok=True)

# Unzip the files
with zipfile.ZipFile(dog_zip_path, 'r') as zip_ref:
    zip_ref.extractall(dog_extract_dir)

with zipfile.ZipFile(person_zip_path, 'r') as zip_ref:
    zip_ref.extractall(person_extract_dir)

print("Files extracted successfully.")


Files extracted successfully.


**2.Data Preparation and Preprocessing**

In [8]:
import os
import shutil
from sklearn.model_selection import train_test_split
import glob

# Base directory for the classification dataset
base_dir = '/content/thermal_classification'
os.makedirs(base_dir, exist_ok=True)

# Create train, valid, and test directories for each class
for split in ['train', 'valid', 'test']:
    for class_name in ['dog', 'person']:
        os.makedirs(os.path.join(base_dir, split, class_name), exist_ok=True)

# Gather all images from the extracted directories
dog_images = glob.glob(f"{dog_extract_dir}/**/*.jpg", recursive=True)
person_images = glob.glob(f"{person_extract_dir}/**/*.jpg", recursive=True)

# Split data into train (70%), valid (15%), and test (15%)
dog_train, dog_temp = train_test_split(dog_images, test_size=0.3, random_state=42)
dog_valid, dog_test = train_test_split(dog_temp, test_size=0.5, random_state=42)

person_train, person_temp = train_test_split(person_images, test_size=0.3, random_state=42)
person_valid, person_test = train_test_split(person_temp, test_size=0.5, random_state=42)

# Function to move files to the classification folders
def move_files(file_list, target_dir):
    for file_path in file_list:
        shutil.move(file_path, target_dir)

# Move images to respective directories
move_files(dog_train, os.path.join(base_dir, 'train', 'dog'))
move_files(dog_valid, os.path.join(base_dir, 'valid', 'dog'))
move_files(dog_test, os.path.join(base_dir, 'test', 'dog'))

move_files(person_train, os.path.join(base_dir, 'train', 'person'))
move_files(person_valid, os.path.join(base_dir, 'valid', 'person'))
move_files(person_test, os.path.join(base_dir, 'test', 'person'))

print("Files organized into classification structure.")


Files organized into classification structure.


In [10]:
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

# Define transformations including resizing, normalization, and augmentation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load datasets
train_dataset = ImageFolder(root=os.path.join(base_dir, 'train'), transform=transform)
valid_dataset = ImageFolder(root=os.path.join(base_dir, 'valid'), transform=transform)
test_dataset = ImageFolder(root=os.path.join(base_dir, 'test'), transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print("Data loaders created for train, validation, and test sets.")


Data loaders created for train, validation, and test sets.


In [11]:
import torch
import torchvision.models as models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# AlexNet
alexnet = models.alexnet(pretrained=True)
alexnet.classifier[6] = torch.nn.Linear(alexnet.classifier[6].in_features, 2)  # 2 classes: dog and person
alexnet = alexnet.to(device)


Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:03<00:00, 65.5MB/s]


In [12]:
# ResNet-18
resnet = models.resnet18(pretrained=True)
resnet.fc = torch.nn.Linear(resnet.fc.in_features, 2)
resnet = resnet.to(device)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 149MB/s]


In [13]:
# MobileNet
mobilenet = models.mobilenet_v2(pretrained=True)
mobilenet.classifier[1] = torch.nn.Linear(mobilenet.classifier[1].in_features, 2)
mobilenet = mobilenet.to(device)


Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 129MB/s]


**4.Training and Hyperparameter Tuning**

In [14]:
import torch.optim as optim

# Optimizers for each model
alexnet_optimizer = optim.SGD(alexnet.parameters(), lr=0.001, momentum=0.9)
resnet_optimizer = optim.Adam(resnet.parameters(), lr=0.0001)
mobilenet_optimizer = optim.SGD(mobilenet.parameters(), lr=0.001, momentum=0.9)

# Training function
def train_model(model, optimizer, train_loader, val_loader, num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        total_loss, correct = 0, 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = torch.nn.CrossEntropyLoss()(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).type(torch.float).sum().item()

        # Validation accuracy
        val_correct = sum((model(images.to(device)).argmax(1) == labels.to(device)).type(torch.float).sum().item() for images, labels in val_loader)
        val_accuracy = val_correct / len(val_loader.dataset)

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}, Val Acc: {val_accuracy:.4f}")

# Train each model
print("Training AlexNet...")
train_model(alexnet, alexnet_optimizer, train_loader, valid_loader)

print("Training ResNet-18...")
train_model(resnet, resnet_optimizer, train_loader, valid_loader)

print("Training MobileNet...")
train_model(mobilenet, mobilenet_optimizer, train_loader, valid_loader)


Training AlexNet...
Epoch 1/5, Loss: 0.7448, Val Acc: 0.4833
Epoch 2/5, Loss: 0.8028, Val Acc: 0.4667
Epoch 3/5, Loss: 0.7420, Val Acc: 0.4667
Epoch 4/5, Loss: 0.7559, Val Acc: 0.5000
Epoch 5/5, Loss: 0.7842, Val Acc: 0.5000
Training ResNet-18...
Epoch 1/5, Loss: 0.8485, Val Acc: 0.5833
Epoch 2/5, Loss: 0.7022, Val Acc: 0.4333
Epoch 3/5, Loss: 0.7255, Val Acc: 0.3667
Epoch 4/5, Loss: 0.7009, Val Acc: 0.4833
Epoch 5/5, Loss: 0.6894, Val Acc: 0.4333
Training MobileNet...
Epoch 1/5, Loss: 0.7167, Val Acc: 0.5833
Epoch 2/5, Loss: 0.7226, Val Acc: 0.4833
Epoch 3/5, Loss: 0.7019, Val Acc: 0.4667
Epoch 4/5, Loss: 0.7138, Val Acc: 0.4667
Epoch 5/5, Loss: 0.7104, Val Acc: 0.4167


**5.Evaluate Models on the Test Set**

In [15]:
from sklearn.metrics import accuracy_score, f1_score

def evaluate_model(model, test_loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            all_preds.extend(outputs.argmax(1).cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate metrics
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    print(f"Test Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}")

# Evaluate each model
print("\nEvaluating AlexNet on test set:")
evaluate_model(alexnet, test_loader)

print("\nEvaluating ResNet-18 on test set:")
evaluate_model(resnet, test_loader)

print("\nEvaluating MobileNet on test set:")
evaluate_model(mobilenet, test_loader)



Evaluating AlexNet on test set:
Test Accuracy: 0.5000, F1 Score: 0.3333

Evaluating ResNet-18 on test set:
Test Accuracy: 0.2903, F1 Score: 0.2896

Evaluating MobileNet on test set:
Test Accuracy: 0.3387, F1 Score: 0.3245


The results show low test accuracy and F1 scores for each model, which might indicate issues in model performance due to factors such as:

Dataset Size: Limited training data can make it hard for deep models to generalize well, leading to overfitting or underfitting.

Class Imbalance: If there’s an imbalance between the number of "dog" and "person" images, models may struggle to learn equally for both classes.

Insufficient Training Epochs: Five epochs may not be enough for the models to reach optimal performance.

Hyperparameter Settings: The learning rates, batch size, or other settings might need tuning.

Complexity of the Task: Thermal images might be harder to classify directly without specific adjustments, like customized preprocessing or augmentation.

**1.Data Augmentation and Preprocessing Enhancements**

In [16]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # New augmentation
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),   # Random crop for more variability
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


**2. Increase Training Epochs**

In [None]:
# Increase epochs in the training loop
train_model(alexnet, alexnet_optimizer, train_loader, valid_loader, num_epochs=15)
train_model(resnet, resnet_optimizer, train_loader, valid_loader, num_epochs=15)
train_model(mobilenet, mobilenet_optimizer, train_loader, valid_loader, num_epochs=15)


Epoch 1/15, Loss: 0.7008, Val Acc: 0.4500
Epoch 2/15, Loss: 0.6958, Val Acc: 0.4667
Epoch 3/15, Loss: 0.6896, Val Acc: 0.4500
Epoch 4/15, Loss: 0.6974, Val Acc: 0.3167
Epoch 5/15, Loss: 0.6907, Val Acc: 0.4167
Epoch 6/15, Loss: 0.6911, Val Acc: 0.4000
Epoch 7/15, Loss: 0.6928, Val Acc: 0.3833
Epoch 8/15, Loss: 0.6893, Val Acc: 0.3667
Epoch 9/15, Loss: 0.6776, Val Acc: 0.4333
Epoch 10/15, Loss: 0.6864, Val Acc: 0.3833
Epoch 11/15, Loss: 0.6770, Val Acc: 0.4167
Epoch 12/15, Loss: 0.6865, Val Acc: 0.3500
Epoch 13/15, Loss: 0.6890, Val Acc: 0.2667
Epoch 14/15, Loss: 0.6865, Val Acc: 0.4500
Epoch 15/15, Loss: 0.6852, Val Acc: 0.4333
Epoch 1/15, Loss: 0.6687, Val Acc: 0.4333
Epoch 2/15, Loss: 0.6401, Val Acc: 0.4167
Epoch 3/15, Loss: 0.6935, Val Acc: 0.4833
Epoch 4/15, Loss: 0.6485, Val Acc: 0.4667
Epoch 5/15, Loss: 0.6602, Val Acc: 0.4167


**3. Adjust Learning Rates and Optimizers**

In [None]:
# Adjusted learning rates
alexnet_optimizer = optim.SGD(alexnet.parameters(), lr=0.0005, momentum=0.9)
resnet_optimizer = optim.Adam(resnet.parameters(), lr=0.00005)
mobilenet_optimizer = optim.SGD(mobilenet.parameters(), lr=0.0005, momentum=0.9)

# Add a scheduler to reduce the learning rate during training if validation accuracy plateaus
alexnet_scheduler = optim.lr_scheduler.StepLR(alexnet_optimizer, step_size=5, gamma=0.1)
resnet_scheduler = optim.lr_scheduler.StepLR(resnet_optimizer, step_size=5, gamma=0.1)
mobilenet_scheduler = optim.lr_scheduler.StepLR(mobilenet_optimizer, step_size=5, gamma=0.1)


**4. Re-run Training with Modified Settings**

In [9]:
def train_model_with_scheduler(model, optimizer, scheduler, train_loader, val_loader, num_epochs=15):
    model.train()
    for epoch in range(num_epochs):
        total_loss, correct = 0, 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = torch.nn.CrossEntropyLoss()(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).type(torch.float).sum().item()

        # Scheduler step
        scheduler.step()

        # Validation accuracy
        val_correct = sum((model(images.to(device)).argmax(1) == labels.to(device)).type(torch.float).sum().item() for images, labels in val_loader)
        val_accuracy = val_correct / len(val_loader.dataset)

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}, Val Acc: {val_accuracy:.4f}")

# Re-train each model with scheduler
print("Training AlexNet with scheduler...")
train_model_with_scheduler(alexnet, alexnet_optimizer, alexnet_scheduler, train_loader, valid_loader)

print("Training ResNet-18 with scheduler...")
train_model_with_scheduler(resnet, resnet_optimizer, resnet_scheduler, train_loader, valid_loader)

print("Training MobileNet with scheduler...")
train_model_with_scheduler(mobilenet, mobilenet_optimizer, mobilenet_scheduler, train_loader, valid_loader)


Training AlexNet with scheduler...


NameError: name 'alexnet' is not defined