# Office Item Classifier

## 1. Introduction

The **Office Item Classifier** project aims to develop an image classification model capable of distinguishing between 10 common office items. The 10 items are eraser, glue sticks, highlighter, mug, paper clips, pencil  pens, pencils, staplers, tape and USB sticks.

The project uses a balanced dataset of 37,950 images collected from multiple sources including Kaggle, Roboflow Universe, and manually captured images. The dataset includes significant variations in lighting, angle, and background, with both static and dynamic data augmentations applied to enhance model robustness.

The objective is to **train and evaluate models** to determine the most accurate and efficient approach for classifying office items.
This notebook documents the entire process with explanations provided at each stage to justify design decisions and reflect on results.

## 2. First Attempt - Custom CNN (train v1)

The first attempt consist of building a custom CNN from scratch using TensorFlow/Keras.

### Reasoning

- Wanted to understand how the performance of a simple CNN would be before using a pre-trained model.
- CNNs are widely used for image classification and can be customised easily
- Used of 3 convolutional blocks and two dense layers with ReLy and softmax activations.

```python
# Import dependencies.
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import confusion_matrix, classification_report, f1_score
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from pathlib import Path

# Define the paths to datasets/
TRAIN_DIR = "data/train"
VAL_DIR = "data/val"
TEST_DIR = "data/test"

# Define image and training parameters.
IMG_HEIGHT = 224 # Input image height.
IMG_WIDTH = 224 # Input image width.
BATCH_SIZE = 16 # Number of images per batch.
EPOCHS = 20 # Maximum number of training epochs.
NUM_CLASSES = 10 # Number of categories in the dataset.

# Date augmentation and preprocessing.
# Rescale pixel values to [0,1].
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

# Load dataset using flow_flow_directiory.
train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH), # Resize image to same size.
    batch_size=BATCH_SIZE,
    class_mode='categorical', # For multi-class classfication.
    shuffle=True
)

val_generator = val_datagen.flow_from_directory(
    VAL_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH), # Resize image to same size.
    batch_size=BATCH_SIZE,
    class_mode='categorical', # For multi-class classfication.
    shuffle=False # Shuffle the data for better training.
)

test_generator = test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH), # Resize image to same size.
    batch_size=BATCH_SIZE,
    class_mode='categorical', # For multi-class classfication.
    shuffle=False
)


# Build the CNN model.
model = models.Sequential()

# 1st Convolutional Block.
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)))
model.add(layers.MaxPooling2D((2, 2))) # Downsameple feature maps by a factor of 2.

# 2nd Convolutional Block.
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

# 3rd convolution block.
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

# Flatten the 3D feature maps to 1D vector.
model.add(layers.Flatten())

# Fully connect layer for learning non-linear combinations of features.
model.add(layers.Dense(128, activation='relu'))

# Output the layer with softmax for multi class classification.
model.add(layers.Dense(NUM_CLASSES, activation='softmax'))


# Compile the model with adam optimiser as it is adaptive.
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary() # Print model architecture.


# Callback for better training.
# Save the best model based on the validation accuracy.
checkpoint = ModelCheckpoint("best_model.h5", monitor='val_accuracy', save_best_only=True, verbose=1)
# Stop training early if validation loss doesn't improve for 5 epochs.
earlystop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)

callbacks = [checkpoint, earlystop]


# Model.fit trains the model using batches from train_generator and validates on val_generator.
# Epoch = one pass through the full training data.
# Batches are shuffled to prevent the model from learning order instead of features.
# EarlyStopping prevents overfitting by stopping if validation loss plateaus
# ModelCheckpoint ensures you keep the best performing model.
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=callbacks
)

# Evaluate the model on the test set
test_generator.reset()  # Important to reset generator before predicting.
test_loss, test_acc = model.evaluate(test_generator)
print(f"\nTest Accuracy: {test_acc*100:.2f}%")

# Generate predictions.
y_pred = model.predict(test_generator)
y_pred_classes = np.argmax(y_pred, axis=1) # Convert probailities to class indices.
y_true = test_generator.classes

# Genereate confustion matrix
cm = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(10,8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=test_generator.class_indices.keys(), yticklabels=test_generator.class_indices.keys())
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

# Classification metrrics.
f1 = f1_score(y_true, y_pred_classes, average='macro')
print(f"Macro F1 Score: {f1:.4f}")

print(classification_report(y_true, y_pred_classes, target_names=test_generator.class_indices.keys()))

# Plot trainig history.
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
```

### Result
- The training accuracy was high, but the validation accuracy stayed low. This might have indicated the model was overfitted.
- The model struggle to generalise.
- This result motivated the team to move to YOLOv8 classifcation.

## 3. YOLOv8 Classification - train_v2

After evaluating the custom CNN, we decided to use **YOLOv8 classification (YOLOv8-cls)**.  
YOLOv8 leverages transfer learning from pre-trained weights, which improves accuracy and reduces training time compared to training a CNN from scratch.  

### Reason:
- Benefit from a modern architecture optimised for classification tasks.

``` python
# Import dependencies.
from ultralytics import YOLO # Import YOLO class from ultralytics library for object detection and classification.
import torch # PyTorch library for tensor operations and GPU acceleration.
from pathlib import Path # Pathlib for OS-independent file paths.
import yaml # YAML library to read/write YAML configuration files.


# Path to the root of the dataset folder containing train/val/test subfolders.
DATASET_ROOT = Path("data")  
PROJECT_NAME = "office_supplies_classifier" # Name of the project; will be used to save outputs.
MODEL_SIZE = "x" # Set model size.


# List of class names for the classification task.
CLASSES = [
    "erasers", "glue_sticks", "highlighters", "mugs", "paper_clips",
    "pencils", "pens", "staplers", "tapes", "usb_sticks"
]


# Training hyperparameters.
EPOCHS = 50 # Total number of training epochs.
BATCH_SIZE = 32 # Number of images per training batch.
IMG_SIZE = 224 # Input image size.
PATIENCE = 10 # Early stopping patience; stops training if val loss doesn't improve for 10 epochs.
LEARNING_RATE = 0.001 # Initial learning rate.


# Function to create the data.yaml configuration file required by YOLOv8.
def create_data_yaml():
    # Dictionary structure needed by YOLOv8.
    data_config = {
        'path': str(DATASET_ROOT.absolute()), # Absolute path to dataset root.
        'train': 'train', # Folder containing training images.
        'val': 'val', # Folder containing validation images.
        'test': 'test', # Folder containing test images.
        'names': {i: name for i, name in enumerate(CLASSES)} # Map numeric label → class name.
    }
    
    yaml_path = DATASET_ROOT / 'data.yaml' # Path where YAML will be saved.
    with open(yaml_path, 'w') as f: # Open file for writing.
        yaml.dump(data_config, f, sort_keys=False) # Write YAML content without sorting keys.
    
    print(f"Created data.yaml at {yaml_path}") # Notify user.
    return yaml_path # Return path to YAML file.


# Function to train the YOLOv8 classification model.
def train_model():
    
    # Create YAML config for dataset.
    yaml_path = create_data_yaml()
    
    # Select device: use GPU if available, otherwise CPU.
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # Load pre-trained YOLOv8 classification model of specified size.
    model = YOLO(f'yolov8{MODEL_SIZE}-cls')
    
    # Train the model.
    results = model.train(
        data=str(DATASET_ROOT), # Path to dataset.
        epochs=EPOCHS, # Number of training epochs.
        batch=BATCH_SIZE, # Batch size.
        imgsz=IMG_SIZE, # Resize all images to this size.
        patience=PATIENCE, # Early stopping patience.
        save=True, # Save trained model checkpoints.
        device=device, # Training device.
        project=PROJECT_NAME, # Project folder to save results.
        name='train_v2', # Name of this training run.
        exist_ok=True, # Overwrite existing project folder if it exists.
        pretrained=True, # Use pretrained weights for transfer learning.
        optimizer='AdamW', # Optimiser used (AdamW = Adam with weight decay).
        lr0=LEARNING_RATE, # Initial learning rate.
        lrf=0.01, # Final learning rate as fraction of initial LR (cosine annealing).
        momentum=0.937, # Momentum for optimiser.
        weight_decay=0.0005, # L2 weight decay for regularisation.
        warmup_epochs=3, # Number of warmup epochs to gradually increase LR.
        warmup_momentum=0.8, # Initial momentum during warmup.
        cos_lr=True, # Use cosine annealing for learning rate schedule.
        verbose=True, # Print detailed training logs.
        cache=True, # Cache images in RAM for faster training.
        amp=True, # Automatic Mixed Precision for faster training on GPU.
        
        # Data augmentation parameters.
        hsv_h=0.015, # Hue adjustment factor.
        hsv_s=0.5, # Saturation adjustment factor.
        hsv_v=0.3, # Brightness adjustment factor.
        degrees=10.0, # Random rotation in degrees.
        translate=0.1, # Random translation.
        scale=0.3, # Random scaling factor.
        shear=0.0, # Shear angle.
        perspective=0.0, # Perspective transform.
        flipud=0.0, # Probability of flipping image vertically.
        fliplr=0.5, # Probability of flipping image horizontally.
        mosaic=0.0, # Mosaic augmentation probability.
        mixup=0.0, # Mixup augmentation probability.
        copy_paste=0.0 # Copy-paste augmentation probability.
    )
    
    return model # Return trained model.

# Entry point of the script.
if __name__ == "__main__":
    trained_model = train_model() # Run training and store the trained model.
```

### Result:
- There was signs of overfitting.
- The model was able to correctly recognise **some items some of the time**, especially distinctive objects like staplers and mugs, but struggled with visually similar items like pens vs pencils or glue sticks vs highlighters.

## 4. YOLOv8 Classification - train_v3

### Key Changes in train_v3:
- Epochs increased from 50 to 100 to give model more time to learn while using dropout and augmentations to mitigate overfitting.
- Learning rate reduced from 0.001 t0 0.0005 to stabilise training.
- Patience reduced from 10 to 5 for faster early stopping.
- Dropout added (0.3) to reduce overfitting on similar-looking classes.
- Data augmentation parameters strengthened to increase model robustness.
- Mixup (0.15) added to further improve generalisation.

``` python
# Import dependencies.
from ultralytics import YOLO # YOLO class for classification.
import torch # PyTorch for tensor operations and GPU support.
from pathlib import Path # OS-independent path handling.
import yaml # For reading/writing YAML configuration files.


# Path to dataset root and project setup.
DATASET_ROOT = Path("data")  
PROJECT_NAME = "office_supplies_classifier" # Project folder for training outputs.
MODEL_SIZE = "m" # Changed from 'x' → 'm' to reduce memory usage and speed up training.


# List of class names for classification.
CLASSES = [
    "erasers", "glue_sticks", "highlighters", "mugs", "paper_clips",
    "pencils", "pens", "staplers", "tapes", "usb_sticks"
]


# Training hyperparameters (modified based on v2 results).
EPOCHS = 100 # Increased from 50 → 100 because the previous model showed overfitting early; more epochs allow better learning with proper regularization.
BATCH_SIZE = 32 # Same as v2, balances GPU memory and stability.
IMG_SIZE = 224 # Same input size for consistency.
PATIENCE = 5 # Reduced from 10 → 5; early stopping triggers faster to prevent overfitting.
LEARNING_RATE = 0.0005 # Lowered from 0.001 → 0.0005 to stabilize training on smaller batch size and prevent large weight updates.
DROPOUT = 0.3 # Added dropout to combat overfitting, especially for visually similar classes.


# Function to create YOLOv8 data.yaml file.
def create_data_yaml():
    data_config = {
        'path': str(DATASET_ROOT.absolute()),
        'train': 'train',
        'val': 'val',
        'test': 'test',
        'names': {i: name for i, name in enumerate(CLASSES)}
    }
    
    yaml_path = DATASET_ROOT / 'data.yaml'
    with open(yaml_path, 'w') as f:
        yaml.dump(data_config, f, sort_keys=False)
    
    print(f"✓ Created data.yaml at {yaml_path}")
    return yaml_path


# Function to train YOLOv8 classification model.
def train_model():
    
    # Create dataset YAML configuration.
    yaml_path = create_data_yaml()
    
    # Select device: GPU if available, else CPU.
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # Load pre-trained YOLOv8 classification model of chosen size.
    model = YOLO(f'yolov8{MODEL_SIZE}-cls')
    
    # Train model with updated hyperparameters and augmentations.
    results = model.train(
        data=str(DATASET_ROOT), # Dataset path.
        epochs=EPOCHS, # Increased epochs for more learning capacity.
        batch=BATCH_SIZE,
        imgsz=IMG_SIZE,
        patience=PATIENCE, # Early stopping patience reduced to 5.
        save=True,
        device=device,
        workers=8, # Set number of data loading threads for speed.
        project=PROJECT_NAME,
        name='train_v3', # Name of this run.
        exist_ok=True,
        pretrained=True,
        optimizer='AdamW',
        
        lr0=LEARNING_RATE, # Initial LR lowered to 0.0005 for stability.
        lrf=0.001, # Final LR fraction adjusted for slower decay.
        momentum=0.937,
        weight_decay=0.001, # Slightly increased from 0.0005 → 0.001 for stronger regularization.
        
        warmup_epochs=5, # Warmup extended to avoid sudden gradient spikes.
        warmup_momentum=0.8,
        cos_lr=True,
        verbose=True,
        
        cache=False, # Disabled caching to save RAM; could be enabled if dataset is small.
        
        amp=True, # Mixed precision for faster GPU training.
        
        # Data augmentation (increased compared to v2 for robustness)
        hsv_h=0.03, # Slightly stronger hue shift.
        hsv_s=0.7, # Stronger saturation augmentation.
        hsv_v=0.4, # Stronger brightness variation.
        degrees=25.0, # More rotation augmentation.
        translate=0.2,
        scale=0.5,
        shear=5.0,
        perspective=0.0005,
        flipud=0.0,
        fliplr=0.5,
        
        mosaic=0.0,
        mixup=0.15, # Mixup added to reduce overfitting.
        copy_paste=0.0,
        
        dropout=DROPOUT, # Dropout added to reduce overfitting.
        
        label_smoothing=0.1 # Helps model generalise better on visually similar classes.
    )
    
    return model


# Entry point.
if __name__ == "__main__":
    trained_model = train_model() # Train the model and store it.
```

### Result (train_v3):

- Training accuracy improved slightly compared to v2, reaching around **92–94%**, while validation accuracy also increased to about **85–87%**.  
- The model was now able to **detect objects more reliably**, especially distinctive items like staplers, mugs, and USB sticks.  
- Visually similar items, such as pens vs pencils or glue sticks vs highlighters, were still misclassified most of the time, indicating that the model had not fully learned the subtle features.  
- Confusion matrix showed reduced misclassifications, but errors still remained between similar-looking classes.

## 5. YOLOv8 Classification - train_v4

###  Key Changes in train_v4:
-Model size increased from m to l to improve accuracy on more complex features.
-Batch size reduced from 32 to 16 to accommodate larger model and avoid GPU memory issues.
-Input image size increased from 224 to 320 for higher resolution feature extraction.
-Patience increased from 5 to 15 to allow more epochs for early stopping due to larger model.
-Learning rate reduced from 0.0005 to 0.0001 for more stable training with large model.
-Dropout reduced from 0.3 to 0.2 to balance regularisation and training capacity.
-Number of data loading workers reduced from 8 to 4, matching smaller batch size and avoiding potential bottlenecks.
-Optimiser parameters adjusted: momentum lowered from 0.937 to 0.9, weight_decay reduced from 0.001 to 0.0005.

#### Augmentation changes:
-HSV, rotation, translate, scale, shear, perspective, fliplr slightly reduced for more conservative augmentations.
-Added auto_augment='randaugment' for improved generalisation.
-crop_fraction added to control crop augmentation.
-Mixup reduced from 0.15 to 0.1 due to stronger augmentations.
-Label smoothing reduced from 0.1 to 0.05 to allow model to learn larger dataset more confidently.
-Added save_period=10 to periodically save checkpoints.
-Added warmup_bias_lr=0.01 for better bias initialisation.
-Added close_mosaic=10 for YOLO-specific training optimisation.
-Added plots=True and val=True to enable validation and visualisations during training.

``` python
# Import dependencies.
from ultralytics import YOLO # YOLO class for classification.
import torch  # PyTorch for tensor operations and GPU support.
from pathlib import Path  # OS-independent path handling.
import yaml  # For reading/writing YAML configuration files.


# Path to dataset root and project setup.
DATASET_ROOT = Path("data")  
PROJECT_NAME = "office_supplies_classifier" # Project folder for training outputs.
MODEL_SIZE = "l" # Changed from 'm' to 'l' to improve accuracy on more complex features.


# List of class names for classification.
CLASSES = [
    "erasers", "glue_sticks", "highlighters", "mugs", "paper_clips",
    "pencils", "pens", "staplers", "tapes", "usb_sticks"
]

# Training hyperparameters (modified based on v3 results).
EPOCHS = 100
BATCH_SIZE = 16 # Reduced from 32 to 16 to handle larger model on GPU.
IMG_SIZE = 320 # Increased from 224 to 320 for higher resolution input.
PATIENCE = 15 # Increased from 5 to 15 to allow more epochs for early stopping.
LEARNING_RATE = 0.0001 # Reduced from 0.0005 to 0.0001 for stability on larger model.
DROPOUT = 0.2 # Reduced from 0.3 to 0.2 to balance regularisation and learning capacity.


# Function to create YOLOv8 data.yaml file.
def create_data_yaml():
    data_config = {
        'path': str(DATASET_ROOT.absolute()),
        'train': 'train',
        'val': 'val',
        'test': 'test',
        'names': {i: name for i, name in enumerate(CLASSES)}
    }
    
    yaml_path = DATASET_ROOT / 'data.yaml'
    with open(yaml_path, 'w') as f:
        yaml.dump(data_config, f, sort_keys=False)
    
    print(f"✓ Created data.yaml at {yaml_path}")
    return yaml_path


# Function to train YOLOv8 classification model.
def train_model():
    
    # Create dataset YAML configuration.
    yaml_path = create_data_yaml()
    
    # Select device: GPU if available, else CPU.
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # Load pre-trained YOLOv8 classification model of chosen size.
    model = YOLO(f'yolov8{MODEL_SIZE}-cls')
    
    # Train model with updated hyperparameters and augmentations.
    results = model.train(
        data=str(DATASET_ROOT),
        epochs=EPOCHS,
        batch=BATCH_SIZE,
        imgsz=IMG_SIZE,
        patience=PATIENCE,
        save=True,
        save_period=10, # Added periodic checkpoint saving.
        device=device,
        workers=4, # Reduced from 8 to 4 for smaller batch.
        project=PROJECT_NAME,
        name='train_v4',
        exist_ok=True,
        pretrained=True,
        optimizer='AdamW',
        
        lr0=LEARNING_RATE,
        lrf=0.0001, # Adjusted to smaller final LR fraction.
        momentum=0.9, # Reduced from 0.937 to 0.9
        weight_decay=0.0005, # Reduced from 0.001 to 0.0005
        
        warmup_epochs=5,
        warmup_momentum=0.8,
        warmup_bias_lr=0.01, # Added for better bias initialisation.
        cos_lr=True,
        
        verbose=True,
        cache=False,
        amp=True,
        
        hsv_h=0.02, # Slightly reduced from 0.03
        hsv_s=0.5, # Reduced from 0.7
        hsv_v=0.3, # Reduced from 0.4
        degrees=15.0, # Reduced from 25
        translate=0.15, # Reduced from 0.2
        scale=0.4, # Reduced from 0.5
        shear=2.0, # Reduced from 5
        perspective=0.0002, # Reduced from 0.0005
        flipud=0.0,
        fliplr=0.3, # Reduced from 0.5
        
        mosaic=0.0,
        mixup=0.1, # Reduced from 0.15 due to stronger augmentations.
        copy_paste=0.0,
        auto_augment='randaugment', # Added for stronger generalisation.
        erasing=0.0,
        crop_fraction=1.0, # Added for crop control.
        
        dropout=DROPOUT,
        label_smoothing=0.05, # Reduced from 0.1 to let larger model learn more confidently.
        
        close_mosaic=10, # Added YOLO-specific optimisation.
        plots=True, # Added to visualise training progress.
        val=True # Added to perform validation during training.
    )
    
    
    return model

# Entry point.
if __name__ == "__main__":
    trained_model = train_model() # Train the model and store it.
```

### Result (train_v4):
- The model could now **detect a larger number of objects reliably**, especially medium-sized and distinctive items like staplers, mugs, and USB sticks.  
- Misclassifications still occurred for very similar classes (pens vs pencils, glue sticks vs highlighters), but these errors were less frequent than in train_v3.  
- Confusion matrices showed improved separation between visually similar items, indicating better feature learning.

## 6. YOLOv8 Classification - train_v5

### Key Changes in train_v5:
- Epochs increased from 100 to 150 to allow more training on stronger augmentations.
- Patience increased from 15 to 25 to allow longer early stopping window for extreme augmentations.
- Learning rate reduced from 0.0001 to 0.00005 for more stable training with extreme augmentations.
- Dropout increased from 0.2 to 0.3 to prevent overfitting under extreme augmentations.

#### Augmentation changes:
- HSV augmentation significantly increased: hsv_h 0.02 to 0.5, hsv_s 0.5 to 0.9, hsv_v 0.3 to 0.9.
- Rotation (degrees) increased from 15 to 180 for extreme rotational augmentation.
-Translation increased from 0.15 to 0.3.
-Scale increased from 0.4 to 0.9.
-Shear increased from 2 to 10.
-Perspective increased from 0.0002 to 0.001.
-Vertical flip (flipud) added 0 to 0.5.
-Mixup increased from 0.1 to 0.3.
-Erasing added from 0 to 0.4.

``` python
# Import dependencies.
from ultralytics import YOLO # YOLO class for classification.
import torch  # PyTorch for tensor operations and GPU support.
from pathlib import Path  # OS-independent path handling.
import yaml  # For reading/writing YAML configuration files.


# Path to dataset root and project setup.
DATASET_ROOT = Path("data")  
PROJECT_NAME = "office_supplies_classifier" # Project folder for training outputs.
MODEL_SIZE = "l" # Changed from 'm' to 'l' to improve accuracy on more complex features.


# List of class names for classification.
CLASSES = [
    "erasers", "glue_sticks", "highlighters", "mugs", "paper_clips",
    "pencils", "pens", "staplers", "tapes", "usb_sticks"
]

# Training hyperparameters (modified based on v4 results).
EPOCHS = 150 # Increased from 100 to 150 for longer training with extreme augmentations.
BATCH_SIZE = 16
IMG_SIZE = 320
PATIENCE = 25 # Increased from 15 to 25 to allow longer early stopping.
LEARNING_RATE = 0.00005 # Reduced from 0.0001 to 0.00005 for more stable training.
DROPOUT = 0.3 # Increased from 0.2 to 0.3 to prevent overfitting under extreme augmentations.


# Function to create YOLOv8 data.yaml file.
def create_data_yaml():
    data_config = {
        'path': str(DATASET_ROOT.absolute()),
        'train': 'train',
        'val': 'val',
        'test': 'test',
        'names': {i: name for i, name in enumerate(CLASSES)}
    }
    
    yaml_path = DATASET_ROOT / 'data.yaml'
    with open(yaml_path, 'w') as f:
        yaml.dump(data_config, f, sort_keys=False)
    
    print(f"✓ Created data.yaml at {yaml_path}")
    return yaml_path


# Function to train YOLOv8 classification model.
def train_model():
    
    # Create dataset YAML configuration.
    yaml_path = create_data_yaml()
    
    # Select device: GPU if available, else CPU.
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # Load pre-trained YOLOv8 classification model of chosen size.
    model = YOLO(f'yolov8{MODEL_SIZE}-cls')
    
    # Train model with updated hyperparameters and augmentations.
    results = model.train(
        data=str(DATASET_ROOT),
        epochs=EPOCHS,
        batch=BATCH_SIZE,
        imgsz=IMG_SIZE,
        patience=PATIENCE,
        save=True,
        save_period=10,
        device=device,
        workers=4,
        project=PROJECT_NAME,
        name='train_v5',
        exist_ok=True,
        pretrained=True,
        optimizer='AdamW',
        
        lr0=LEARNING_RATE,
        lrf=0.0001,
        momentum=0.9,
        weight_decay=0.0005,
        
        warmup_epochs=5,
        warmup_momentum=0.8,
        warmup_bias_lr=0.01,
        cos_lr=True,
        
        verbose=True,
        cache=False,
        amp=True,
        
        hsv_h=0.5, # Increased from 0.02 to 0.5 for extreme hue augmentation.
        hsv_s=0.9, # Increased from 0.5 to 0.9
        hsv_v=0.9, # Increased from 0.3 to 0.9
        degrees=180.0, # Increased from 15 to 180 for extreme rotation.
        translate=0.3, # Increased from 0.15 to 0.3
        scale=0.9, # Increased from 0.4 to 0.9
        shear=10.0, # Increased from 2 to 10
        perspective=0.001, # Increased from 0.0002 to 0.001
        flipud=0.5, # Added vertical flip
        fliplr=0.5, # Increased from 0.3 to 0.5
        
        mosaic=0.0,
        mixup=0.3, # Increased from 0.1 to 0.3
        copy_paste=0.0,
        auto_augment='randaugment',
        erasing=0.4, # Added erasing for stronger augmentation.
        
        dropout=DROPOUT,
        
        close_mosaic=10,
        plots=True,
        val=True,
    )
    
    return model

# Entry point.
if __name__ == "__main__":
    trained_model = train_model() # Train the model and store it.
```

### Result (train_v5):

- The model was able to **reliably recognise 7 out of 10 classes**, including distinctive items like staplers, mugs, USB sticks, erasers, glue sticks, tapes, and paper clips.  
- The remaining **3 visually similar classes — pens, pencils, and highlighters — were misclassified**, indicating that the model still finds it difficult to capture subtle differences between these items.  
- Confusion matrices confirmed that misclassifications were mostly limited to these similar classes, while the rest were almost perfectly recognised.

## 7. YOLOv8 Classification - train_v6

### Key Changes in train_v6:
- Input image size increased from 320  to 384 to allow better recognition of visually similar objects (pens, pencils, highlighters).
- Patience increased from 25  to 30 to allow more epochs for early stopping with extreme augmentations.
- Learning rate reduced from 0.00005  to 0.00003 for more stable training under extreme augmentations.
- Dropout increased from 0.3  to 0.4 to prevent overfitting on high-resolution inputs and extreme augmentations.
- Final learning rate fraction (lrf) reduced from 0.0001  to 0.00001 for slower decay with larger input.
- Momentum increased from 0.9  to 0.937.
- Weight decay increased from 0.0005  to 0.001 for stronger regularization.

#### Augmentation changes:
- HSV augmentation further increased: hsv_h 0.5  to 0.8, hsv_s 0.9  to 0.95, hsv_v 0.9  to 0.95.
- Translation increased from 0.3  to 0.4.
- Scale increased from 0.9  to 0.95.
- Shear increased from 10  to 15.
- Perspective increased from 0.001  to 0.002.
- Mixup increased from 0.3  to 0.5.
- Erasing increased from 0.4  to 0.6.
- Crop fraction added from 1.0  to 0.5 for better extreme augmentation control.
- Close mosaic increased from 10  to 15 for YOLO-specific optimization.
- Label smoothing increased from 0.0  to 0.15 to improve generalization for visually similar classes.

```python
# Import dependencies.
from ultralytics import YOLO # YOLO class for classification.
import torch  # PyTorch for tensor operations and GPU support.
from pathlib import Path  # OS-independent path handling.
import yaml  # For reading/writing YAML configuration files.


# Path to dataset root and project setup.
DATASET_ROOT = Path("data")  
PROJECT_NAME = "office_supplies_classifier" # Project folder for training outputs.
MODEL_SIZE = "l" # Changed from 'm' to 'l' to improve accuracy on more complex features.


# List of class names for classification.
CLASSES = [
    "erasers", "glue_sticks", "highlighters", "mugs", "paper_clips",
    "pencils", "pens", "staplers", "tapes", "usb_sticks"
]

# Training hyperparameters (modified based on v3 results).
EPOCHS = 150
BATCH_SIZE = 16
IMG_SIZE = 384 # Increased from 320  to 384 to improve recognition of pens, pencils, and highlighters.
PATIENCE = 30 # Increased from 25  to 30 to allow more early stopping epochs.
LEARNING_RATE = 0.00003 # Reduced from 0.00005  to 0.00003 for stable training with extreme augmentations.
DROPOUT = 0.4 # Increased from 0.3  to 0.4 to prevent overfitting with higher resolution and extreme augmentations.

# Function to create YOLOv8 data.yaml file.
def create_data_yaml():
    data_config = {
        'path': str(DATASET_ROOT.absolute()),
        'train': 'train',
        'val': 'val',
        'test': 'test',
        'names': {i: name for i, name in enumerate(CLASSES)}
    }
    
    yaml_path = DATASET_ROOT / 'data.yaml'
    with open(yaml_path, 'w') as f:
        yaml.dump(data_config, f, sort_keys=False)
    
    print(f"✓ Created data.yaml at {yaml_path}")
    return yaml_path


# Function to train YOLOv8 classification model.
def train_model():
    
    # Create dataset YAML configuration.
    yaml_path = create_data_yaml()
    
    # Select device: GPU if available, else CPU.
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # Load pre-trained YOLOv8 classification model of chosen size.
    model = YOLO(f'yolov8{MODEL_SIZE}-cls')
    
    # Train model with updated hyperparameters and augmentations.
    results = model.train(
        data=str(DATASET_ROOT),
        epochs=EPOCHS,
        batch=BATCH_SIZE,
        imgsz=IMG_SIZE,
        patience=PATIENCE,
        save=True,
        save_period=10,
        device=device,
        workers=4,
        project=PROJECT_NAME,
        name='train_v6',
        exist_ok=True,
        pretrained=True,
        optimizer='AdamW',
        
        lr0=LEARNING_RATE,
        lrf=0.00001, # Reduced from 0.0001 to 0.00001 for slower decay with larger input.
        momentum=0.937, # Increased from 0.9 to 0.937
        weight_decay=0.001, # Increased from 0.0005 to 0.001
        
        warmup_epochs=5,
        warmup_momentum=0.8,
        warmup_bias_lr=0.01,
        cos_lr=True,
        
        verbose=True,
        cache=False,
        amp=True,
        
        hsv_h=0.8, # Increased from 0.5 to 0.8 for stronger hue augmentation.
        hsv_s=0.95, # Increased from 0.9 to 0.95
        hsv_v=0.95, # Increased from 0.9 to 0.95
        
        degrees=180.0,
        translate=0.4, # Increased from 0.3 to 0.4
        scale=0.95, # Increased from 0.9 to 0.95
        shear=15.0, # Increased from 10 to 15
        perspective=0.002, # Increased from 0.001 to 0.002
        flipud=0.5,
        fliplr=0.5,
        
        mosaic=0.0,
        mixup=0.5, # Increased from 0.3 to 0.5
        copy_paste=0.0,
        auto_augment='randaugment',
        erasing=0.6, # Increased from 0.4 to 0.6
        crop_fraction=0.5, # Added from 1.0 to 0.5 for better augmentation control.
        
        dropout=DROPOUT,
        label_smoothing=0.15, # Addedto improve generalisation for visually similar classes.
        
        close_mosaic=15, # Increased from 10 to 15 for YOLO-specific optimisation.
        plots=True,
        val=True,
    )
    
    return model

# Entry point.
if __name__ == "__main__":
    trained_model = train_model() # Train the model and store it.
```

### Result (train_v6):

- The model was able to **successfully recognise all 10 classes**, including visually similar items like pens, pencils, and highlighters.  
- Remaining challenges were related to **object orientation and cluttered backgrounds**: some images where objects were rotated unusually or partially occluded still caused minor misclassifications. 

## 8. YOLOv8 Classification - train_v7

### Downlaod all neccessary libraies.

In [25]:
!python -m pip install ultralytics==8.3.218
!python -m pip install torch==2.9.0
!python -m pip install PyYAML==6.0.2
!python -m pip install scikit-learn==1.7.2
!python -m pip install numpy==2.1.2 matplotlib==3.9.2




[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


^C



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip

[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip






[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


### Train 7th Model

#### Key Changes in train_v7:
- Epochs reduced from 150 to 50 for faster training and experimentation.
- Input image scale slightly reduced from 0.95 to 0.9 to balance extreme augmentation effects.
- Rotation (degrees) increased from 180 to 270 for more aggressive rotational augmentation.
- Erasing slightly reduced from 0.6 to 0.5.
- Crop fraction slightly increased from 0.5 to 0.55 for better augmentation coverage.

In [None]:
# Import dependencies.
from ultralytics import YOLO # YOLO class for classification.
import torch  # PyTorch for tensor operations and GPU support.
from pathlib import Path  # OS-independent path handling.
import yaml  # For reading/writing YAML configuration files.


# Path to dataset root and project setup.
DATASET_ROOT = Path("data")  
PROJECT_NAME = "office_supplies_classifier" # Project folder for training outputs.
MODEL_SIZE = "l" # Changed from 'm' to 'l' to improve accuracy on more complex features.


# List of class names for classification.
CLASSES = [
    "erasers", "glue_sticks", "highlighters", "mugs", "paper_clips",
    "pencils", "pens", "staplers", "tapes", "usb_sticks"
]

# Training hyperparameters (modified based on v3 results).
EPOCHS = 50 # Reduced from 150 to 50 for faster training/experimentation.
BATCH_SIZE = 16
IMG_SIZE = 384
PATIENCE = 30
LEARNING_RATE = 0.00003
DROPOUT = 0.4


# Function to create YOLOv8 data.yaml file.
def create_data_yaml():
    data_config = {
        'path': str(DATASET_ROOT.absolute()),
        'train': 'train',
        'val': 'val',
        'test': 'test',
        'names': {i: name for i, name in enumerate(CLASSES)}
    }
    
    yaml_path = DATASET_ROOT / 'data.yaml'
    with open(yaml_path, 'w') as f:
        yaml.dump(data_config, f, sort_keys=False)
    
    print(f"✓ Created data.yaml at {yaml_path}")
    return yaml_path


# Function to train YOLOv8 classification model.
def train_model():
    
    # Create dataset YAML configuration.
    yaml_path = create_data_yaml()
    
    # Select device: GPU if available, else CPU.
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # Load pre-trained YOLOv8 classification model of chosen size.
    model = YOLO(f'yolov8{MODEL_SIZE}-cls')
    
    # Train model with updated hyperparameters and augmentations.
    results = model.train(
        data=str(DATASET_ROOT),
        epochs=EPOCHS,
        batch=BATCH_SIZE,
        imgsz=IMG_SIZE,
        patience=PATIENCE,
        save=True,
        save_period=10,
        device=device,
        workers=4,
        project=PROJECT_NAME,
        name='train_v7',
        exist_ok=True,
        pretrained=True,
        optimizer='AdamW',
        
        lr0=LEARNING_RATE,
        lrf=0.00001,
        momentum=0.937,
        weight_decay=0.001,
        
        warmup_epochs=5,
        warmup_momentum=0.8,
        warmup_bias_lr=0.01,
        cos_lr=True,
        
        verbose=True,
        cache=False,
        amp=True,
   
        hsv_h=0.8,
        hsv_s=0.95,
        hsv_v=0.95,
        
        degrees=270.0, # Increased from 180 to 270 for more aggressive rotation.
        translate=0.4,
        scale=0.9, # Reduced from 0.95 to 0.9
        shear=15.0,
        perspective=0.002,
        flipud=0.5,
        fliplr=0.5,
        
        mosaic=0.0,
        mixup=0.5,
        copy_paste=0.0,
        auto_augment='randaugment',
        erasing=0.5, # Reduced from 0.6 to 0.5
        crop_fraction=0.55, # Slightly increased from 0.5 to 0.55
        
        dropout=DROPOUT,
        label_smoothing=0.15,
        
        close_mosaic=15,
        plots=True,
        val=True,
    )
    
    return model

# Entry point.
if __name__ == "__main__":
    trained_model = train_model() # Train the model and store it.

#### Result (train_v7):

- The model successfully handled **busy backgrounds** and **object orientation variations**, which were major challenges in earlier versions.  
- Orientation-related misclassifications (e.g., upside-down pens or rotated staplers) were effectively resolved through aggressive rotation augmentations (`degrees=270`).  
- The model also became more robust to partial occlusions and cluttered desks thanks to improved cropping and erasing augmentations.  
- Visual testing confirmed that predictions remained consistent across lighting and background variations.

### Validate YOLO Office Supplies Model

In [None]:
# Import all dependencies.
from ultralytics import YOLO # YOLO model for object classification.
from pathlib import Path # Import pathlib for path manipulation.
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report # Evaluation metrics.
import numpy as np
import matplotlib
matplotlib.use('Agg')  # To save without gui
import matplotlib.pyplot as plt # To create graph.


# Function to validate a test dataset and generates metrics and plots.
def validate_model(model_path=None):
    
    # Root folder containing dataset.
    DATASET_ROOT = Path("data")  
    PROJECT_NAME = "office_supplies_classifier"
    
    # List of class names for classification.
    CLASSES = [
        "erasers", "glue_sticks", "highlighters", "mugs", "paper_clips",
        "pencils", "pens", "staplers", "tapes", "usb_sticks"
    ]
    
    # Use default model path if non is provided.
    if model_path is None:
        model_path = f"{PROJECT_NAME}/train_v7/weights/best.pt"
    
    # Save all reports inside train_v7.
    save_dir = Path(PROJECT_NAME) / "train_v7"
    save_dir.mkdir(parents=True, exist_ok=True) # Create diretory if none exist.

    # Load the YOLO mode.
    model = YOLO(model_path)

    # Path to test directory.
    test_dir = DATASET_ROOT / "test"
    if not test_dir.exists():
        raise FileNotFoundError(f"Test directory not found: {test_dir}")

    # Initialise lists for true labels and predicted labels.
    y_true, y_pred = [], []
    
    # Loop through each class folder in test dataset.
    for class_idx, class_name in enumerate(CLASSES):
        class_path = test_dir / class_name
        if not class_path.exists():
            print(f"Skipping missing class folder: {class_name}")
            continue
        
        # Loop through all image files in the class folder.
        for img_path in class_path.glob("*.*"):
            
            # Only process valid image extensions.
            if img_path.suffix.lower() not in {".jpg", ".png", ".jpeg", ".bmp"}:
                continue
            try:
                result = model(img_path, verbose=False)
                y_pred.append(result[0].probs.top1)
                y_true.append(class_idx)
            except Exception as e:
                print(f"Error processing {img_path}: {e}")

    # Ensure some test images were processed.
    if not y_true:
        raise ValueError("No test images found!")

    # Convert to numpy arrays for metric calculations.
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    
    # Compute standard evaluation metrics.
    acc = accuracy_score(y_true, y_pred)
    macro_f1 = f1_score(y_true, y_pred, average="macro")
    cm = confusion_matrix(y_true, y_pred, labels=range(len(CLASSES)))
    class_report = classification_report(y_true, y_pred, target_names=CLASSES, output_dict=True)

    # Extract F1-scores for plotting.
    f1_scores = [class_report.get(cls, {}).get("f1-score", 0.0) for cls in CLASSES]

    # Plot per-class F1 scores as a line graph.
    plt.figure(figsize=(10, 10))
    x = range(len(CLASSES))
    plt.plot(x, f1_scores, marker="o", linewidth=2, markersize=6, color="teal")
    plt.axhline(y=macro_f1, color="red", linestyle="--", linewidth=2, label=f"Macro F1 = {macro_f1:.3f}")

    plt.ylim(0, 1.05)
    plt.title(f"Per-Class F1-Scores (Line Graph)\nAccuracy: {acc:.1%} | Macro F1: {macro_f1:.3f}", fontsize=14)
    plt.xlabel("Class", fontsize=12)
    plt.ylabel("F1-Score", fontsize=12)
    plt.xticks(x, CLASSES, rotation=45, ha="right")
    plt.grid(True, linestyle=":", alpha=0.7)
    plt.legend()
    plt.tight_layout() 

    # Plot per-class F1 scores as a line graph
    f1_graph_path = save_dir / "f1_line_graph.png"
    plt.savefig(f1_graph_path, dpi=300, bbox_inches="tight")
    plt.close()

    # Save the F1-score graph.
    f1_graph_path = save_dir / "f1_line_graph.png"
    plt.savefig(f1_graph_path, dpi=300, bbox_inches="tight")
    plt.close()

    # Save evaluation metrics to a text file.
    text_report_path = save_dir / "metrics.txt"
    with open(text_report_path, "w") as f:
        f.write("OFFICE SUPPLIES CLASSIFIER - ACCURACY REPORT\n")
        f.write("=" * 50 + "\n")
        f.write(f"Model: {model_path}\n")
        f.write(f"Accuracy: {acc:.6f} ({acc:.2%})\n")
        f.write(f"Macro F1-Score: {macro_f1:.6f}\n\n")

        f.write("Classification Report:\n")
        f.write("                 precision    recall  f1-score   support\n\n")
        for cls in CLASSES:
            metrics = class_report[cls]
            f.write(f"{cls:>15}    {metrics['precision']:.2f}    {metrics['recall']:.2f}    {metrics['f1-score']:.2f}       {int(metrics['support'])}\n")

        f.write("\n")
        f.write(f"{'accuracy':>15}                       {acc:.2f}      {int(class_report['accuracy'])}\n")
        f.write(f"{'macro avg':>15}    {class_report['macro avg']['precision']:.2f}    {class_report['macro avg']['recall']:.2f}    {class_report['macro avg']['f1-score']:.2f}      {int(class_report['macro avg']['support'])}\n")
        f.write(f"{'weighted avg':>15}    {class_report['weighted avg']['precision']:.2f}    {class_report['weighted avg']['recall']:.2f}    {class_report['weighted avg']['f1-score']:.2f}      {int(class_report['weighted avg']['support'])}\n")

    print("Done.")
    
    return acc, macro_f1, cm, class_report


# Run the validation if script is executed directly.
if __name__ == "__main__":
    validate_model()

KeyboardInterrupt: 