In [5]:
import itertools
import os

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from sklearn.metrics import confusion_matrix, f1_score
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms
from torchvision.transforms import InterpolationMode


# Custom Dataset that reads images from a root directory structured by class
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (string): Directory with all the class folders.
            transform (callable, optional): Optional transform to be applied on an image.
        """
        self.root_dir = root_dir
        self.transform = transform
        self.samples = []
        # List and sort the class directories
        self.classes = sorted([d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))])
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(self.classes)}
        
        # Traverse each class directory and store image paths with their corresponding label
        for cls in self.classes:
            cls_folder = os.path.join(root_dir, cls)
            for fname in os.listdir(cls_folder):
                if fname.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                    self.samples.append((os.path.join(cls_folder, fname), self.class_to_idx[cls]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index):
        img_path, label = self.samples[index]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# A simple CNN model with four convolutional blocks
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        # Assuming input images are resized to 224x224, four max pools reduce spatial dims: 224->56->28->14.
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),  # (B, 16, 224, 224)
            nn.ReLU(),
            nn.MaxPool2d(4, 4),                          # (B, 16, 56, 56)
            
            nn.Conv2d(16, 32, kernel_size=3, padding=1), # (B, 32, 56, 56)
            nn.ReLU(),
            nn.MaxPool2d(2, 2),                          # (B, 32, 28, 28)
        
            nn.Conv2d(32, 32, kernel_size=3, padding=1),# (B, 32, 28, 28)
            nn.ReLU(),
            nn.MaxPool2d(2, 2)                           # (B, 32, 14, 14)
        )
        self.classifier = nn.Sequential(
            nn.Linear(32 * 14 * 14, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        # Flatten the feature maps
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
    
def main():
    # Define transforms: following MobileNetV3 preprocessing
    transform = transforms.Compose([
        transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
        transforms.CenterCrop(224),
        transforms.ToTensor(),  # scales to [0.0, 1.0]
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    
    # Create the dataset from the root folder
    dataset = CustomDataset(root_dir='archive/Clothes_Dataset', transform=transform)
    num_classes = len(dataset.classes)
    print(f"Found {len(dataset)} images belonging to {num_classes} classes: {dataset.classes}")
    
    # Split dataset into training (80%) and validation (20%) sets
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    
    # Create DataLoaders for training and validation
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
    
    # Initialize the model, loss function, and optimizer
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = SimpleCNN(num_classes=num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Training for 3 epochs with aggregated loss calculation
    num_epochs = 3
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()
        avg_train_loss = total_train_loss / len(train_loader)
        
        # Validation Phase
        model.eval()
        total_val_loss = 0.0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                total_val_loss += loss.item()
        avg_val_loss = total_val_loss / len(val_loader)
        
        # Print the aggregated losses in the required format
        print(f"Epoch {epoch+1}: training loss = {avg_train_loss:.4f}, validation loss = {avg_val_loss:.4f}")
    
    # ----- Compute and Display the Confusion Matrix on Validation Set -----
    all_preds = []
    all_labels = []
    model.eval()
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    cm = confusion_matrix(all_labels, all_preds)
    print("\nConfusion Matrix:")
    print(cm)
    print(f"F1 Score: {f1_score(all_labels, all_preds, average='macro'):.4f}")

if __name__ == '__main__':
    main()
    torch.cuda.empty_cache()

Found 7500 images belonging to 15 classes: ['Blazer', 'Celana_Panjang', 'Celana_Pendek', 'Gaun', 'Hoodie', 'Jaket', 'Jaket_Denim', 'Jaket_Olahraga', 'Jeans', 'Kaos', 'Kemeja', 'Mantel', 'Polo', 'Rok', 'Sweter']
Epoch 1: training loss = 2.4088, validation loss = 2.1808
Epoch 2: training loss = 1.9752, validation loss = 1.9010
Epoch 3: training loss = 1.6598, validation loss = 1.7518

Confusion Matrix:
[[39  2  5  1  1  2  4  5  1  2  3 16  5 14  9]
 [ 5 66  4  3  0  3  1  2  4  0  0  5  1  2  3]
 [ 3  2 55  1  0  5  0  0  0  0  1  1  1  9  4]
 [ 4  5  5 19  5  5  0  4  0  0  5 12  5 20 10]
 [ 1  2  0  1 35 14  2  5  0  1  2  6  1  4  8]
 [ 3  0  5  2  6 42  6  4  0  0  5  6  2  4  7]
 [ 7  2  3  0  4 12 55  4  0  0  2  2  1  3  4]
 [ 9  1  4  0  5 18  1 27  1  2  3  6  4  6  5]
 [ 0 41  5  1  3  3  3  1 42  1  0  3  0  4  0]
 [ 1  1  1  3  5  6  4  4  0 44  3  0 18  6  8]
 [ 6  1  4  0  3  9  2  2  0  3 40  7 10 13  8]
 [13  4  2  1  2  6  6  2  0  1  1 50  1  4  6]
 [ 6  3  2  0  5  5 

In [6]:
import os

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from sklearn.metrics import f1_score
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.transforms import InterpolationMode


# Custom Dataset for our Clothes_Dataset
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (str): Directory with all class folders.
            transform (callable, optional): Transformations to be applied to an image.
        """
        self.root_dir = root_dir
        self.transform = transform
        self.samples = []
        # Get sorted list of class directories
        self.classes = sorted([d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))])
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(self.classes)}
        
        # Traverse each class directory and store image paths with their corresponding label
        for cls in self.classes:
            cls_folder = os.path.join(root_dir, cls)
            for fname in os.listdir(cls_folder):
                if fname.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                    self.samples.append((os.path.join(cls_folder, fname), self.class_to_idx[cls]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index):
        img_path, label = self.samples[index]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

def evaluate_model(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    # Compute macro F1 score
    f1 = f1_score(all_labels, all_preds, average='macro')
    return f1

def main():
    # Define transforms: using MobileNetV3 preprocessing
    transform = transforms.Compose([
        transforms.Resize(256, interpolation=InterpolationMode.BILINEAR),
        transforms.CenterCrop(224),
        transforms.ToTensor(),  # scales pixel values to [0.0, 1.0]
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    
    # Create the dataset from the root folder
    dataset = CustomDataset(root_dir='archive/Clothes_Dataset', transform=transform)
    num_classes = len(dataset.classes)
    print(f"Found {len(dataset)} images belonging to {num_classes} classes: {dataset.classes}")
    
    # Split dataset into training (80%) and validation (20%) sets
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    
    # Create DataLoaders for training and validation with batch_size 16
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
    
    # Device configuration
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load pre-trained MobileNetV3_small model
    model = models.mobilenet_v3_small(weights='IMAGENET1K_V1')
    # Replace the classifier's final linear layer with one matching our number of classes
    in_features = model.classifier[-1].in_features
    model.classifier[-1] = nn.Linear(in_features, num_classes)
    model = model.to(device)
    
    # Define loss function and optimizer for fine-tuning
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # ----- Evaluate initial model (without fine-tuning) on validation set -----
    initial_f1 = evaluate_model(model, val_loader, device)
    print(f"Initial F1 score on validation set: {initial_f1:.4f}")
    
    # ----- Fine-tuning for 3 epochs -----
    num_epochs = 3
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{num_epochs}: Training Loss = {avg_loss:.4f}")
    
    # ----- Evaluate the model after fine-tuning -----
    final_f1 = evaluate_model(model, val_loader, device)
    print(f"F1 score on validation set after fine-tuning: {final_f1:.4f}")

if __name__ == '__main__':
    main()
    torch.cuda.empty_cache()

Found 7500 images belonging to 15 classes: ['Blazer', 'Celana_Panjang', 'Celana_Pendek', 'Gaun', 'Hoodie', 'Jaket', 'Jaket_Denim', 'Jaket_Olahraga', 'Jeans', 'Kaos', 'Kemeja', 'Mantel', 'Polo', 'Rok', 'Sweter']


Downloading: "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth" to /home/strongich/.cache/torch/hub/checkpoints/mobilenet_v3_small-047dcff4.pth
100%|██████████| 9.83M/9.83M [00:00<00:00, 51.2MB/s]


Initial F1 score on validation set: 0.0313
Epoch 1/3: Training Loss = 1.4874
Epoch 2/3: Training Loss = 0.9925
Epoch 3/3: Training Loss = 0.8193
F1 score on validation set after fine-tuning: 0.6200
