In [1]:
import os
import pandas as pd
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
from torch import nn
import torch
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from torchvision import transforms
from matplotlib import pyplot as plt
from torchviz import make_dot

# modules from YOLOv5
from yolov5.models.common import DetectMultiBackend
from yolov5.models.experimental import attempt_load

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Paths
mat_file_path = "imagelabels.mat"
image_dir = "images"
model_path = "models/yolov5s-cls.pt"

mat_data = loadmat(mat_file_path)

print("Keys in .mat file:", mat_data.keys())

labels = mat_data['labels'].squeeze()

image_files = sorted(os.listdir(image_dir))

assert len(labels) == len(image_files), "Number of labels and images do not match!"

# create a DataFrame with image names and labels
flowers = pd.DataFrame({
    "Image_Name": image_files,
    "Label": labels
})

Keys in .mat file: dict_keys(['__header__', '__version__', '__globals__', 'labels'])


In [4]:
# Split the dataset into train, validation, and test sets (50%, 25%, 25%)
train_val, test = train_test_split(flowers, test_size=0.25, stratify=flowers['Label'])
train, val = train_test_split(train_val, test_size=0.33, stratify=train_val['Label'])

print(f"Training set: {len(train)}, Validation set: {len(val)}, Test set: {len(test)}")

Training set: 4114, Validation set: 2027, Test set: 2048


In [5]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to fit pre-trained models
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalization for pre-trained models
])


class FlowerDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        img_path = os.path.join(self.image_dir, row["Image_Name"])
        image = Image.open(img_path).convert("RGB")
        label = torch.tensor(row["Label"] - 1, dtype=torch.long)
        if self.transform:
            image = self.transform(image)
        return image, label


# Create datasets
train_dataset = FlowerDataset(train, image_dir, transform)
val_dataset = FlowerDataset(val, image_dir, transform)
test_dataset = FlowerDataset(test, image_dir, transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [6]:
def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=10):
    train_losses, val_losses, val_accuracies = [], [], []

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_losses.append(train_loss / len(train_loader))

        # Validation phase
        model.eval()
        val_loss, correct, total = 0.0, 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)
        val_losses.append(val_loss / len(val_loader))
        val_accuracies.append(correct / total)

        print(f"Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, "
              f"Val Loss: {val_losses[-1]:.4f}, Val Accuracy: {val_accuracies[-1]:.4f}")

    return train_losses, val_losses, val_accuracies

In [7]:
def test_model(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            # Get predictions
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    # Calculate metrics
    accuracy = correct / total
    avg_loss = test_loss / len(test_loader)

    print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.4f}")
    return avg_loss, accuracy

# YOLO V5

In [10]:
# Load the YOLOv5 classification model
model = DetectMultiBackend(model_path, device=device, dnn=False, data=None, fp16=False)
stride, names, pt = model.stride, model.names, model.pt

# Modify the model's head for your specific number of classes
num_classes = flowers['Label'].nunique()

# Load the model as a PyTorch model
model_pt = attempt_load(model_path, device=device)
# Replace the final layer
in_features = model_pt.model[-1].linear.in_features
model_pt.model[-1].linear = nn.Linear(in_features, num_classes).to(device)
model = model_pt.to(device)

Fusing layers... 
Model summary: 149 layers, 5453480 parameters, 0 gradients, 11.5 GFLOPs
Fusing layers... 
Model summary: 149 layers, 5453480 parameters, 0 gradients, 11.5 GFLOPs


In [12]:
dummy_input = torch.randn(1, 3, 224, 224).to(device)  # Batch size 1, 3 color channels, 224x224 resolution
output = model(dummy_input)
graph = make_dot(output, params=dict(model.named_parameters()))
graph.render("yolo", format="png", cleanup=True)

'yolo.png'

In [20]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Train YOLOv5 model
train_losses, val_losses, accuracies = train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=10)

Epoch 1/10, Train Loss: 4.1585, Val Loss: 3.6745, Val Accuracy: 0.2630
Epoch 2/10, Train Loss: 3.3216, Val Loss: 2.9663, Val Accuracy: 0.4924
Epoch 3/10, Train Loss: 2.6702, Val Loss: 2.4150, Val Accuracy: 0.6576
Epoch 4/10, Train Loss: 2.1655, Val Loss: 1.9949, Val Accuracy: 0.7597
Epoch 5/10, Train Loss: 1.7761, Val Loss: 1.6852, Val Accuracy: 0.7943
Epoch 6/10, Train Loss: 1.4768, Val Loss: 1.4348, Val Accuracy: 0.8323
Epoch 7/10, Train Loss: 1.2527, Val Loss: 1.2603, Val Accuracy: 0.8550
Epoch 8/10, Train Loss: 1.0853, Val Loss: 1.1239, Val Accuracy: 0.8648
Epoch 9/10, Train Loss: 0.9476, Val Loss: 1.0057, Val Accuracy: 0.8747
Epoch 10/10, Train Loss: 0.8417, Val Loss: 0.9150, Val Accuracy: 0.8791


In [21]:
# Plot Accuracy and Cross-Entropy
plt.figure(figsize=(10, 5))
plt.plot(accuracies, label="Accuracy")
plt.title("Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

plt.figure(figsize=(10, 5))
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses, label="Validation Loss")
plt.title("Cross-Entropy Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [22]:
# Test the trained model
test_loss, test_accuracy = test_model(model, test_loader, criterion, device)
print(f"Final Test Loss: {test_loss:.5f}")
print(f"Final Test Accuracy: {test_accuracy * 100:.2f}%")

Test Loss: 0.9324, Test Accuracy: 0.8765
Final Test Loss: 0.93237
Final Test Accuracy: 87.65%
