In [1]:
# Install necessary libraries
!pip install torch torchvision opencv-python matplotlib albumentations efficientnet-pytorch wandb

Collecting efficientnet-pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16424 sha256=e06f83b1d40411ca91611dc201f53e8891360be04bca23712b64362780e87a2c
  Stored in directory: /root/.cache/pip/wheels/03/3f/e9/911b1bc46869644912bda90a56bcf7b960f20b5187feea3baf
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.1


In [2]:
import os
import torch
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import albumentations as A
from albumentations.pytorch import ToTensorV2
from efficientnet_pytorch import EfficientNet
import wandb
import numpy as np
from sklearn.metrics import classification_report

  check_for_updates()


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Define Image Transformations

In [4]:
# Weights & Biases
wandb.init(project="children_drawing_analysis")

# the image transformations with Albumentations
transform = A.Compose([
    A.Resize(128, 128),
    A.RandomCrop(120, 120),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=10),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    A.Normalize(mean=(0.5,), std=(0.5,)),
    ToTensorV2()
])

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Custom Dataset Class

In [5]:
# dataset class to use Albumentations transforms
class AlbumentationsDataset(torch.utils.data.Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image, label = self.dataset[idx]
        image = np.array(image)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image, label

Load Data

In [6]:
data_dir = '/content/drive/MyDrive/data'
dataset = datasets.ImageFolder(data_dir)

# Split the dataset
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_dataset = AlbumentationsDataset(train_dataset, transform=transform)
test_dataset = AlbumentationsDataset(test_dataset, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Load Pre-trained Model and Modify

In [7]:
model = EfficientNet.from_pretrained('efficientnet-b0')
num_ftrs = model._fc.in_features
model._fc = nn.Linear(num_ftrs, 4)
model.dropout = nn.Dropout(p=0.5)
for param in model.parameters():
    param.requires_grad = True

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth
100%|██████████| 20.4M/20.4M [00:00<00:00, 80.2MB/s]


Loaded pretrained weights for efficientnet-b0


Define Loss Function, Optimizer, Scheduler,training Loop and evaluating the model.

In [8]:
# the criterion, optimizer, and learning rate scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)

# Track with Weights & Biases
wandb.watch(model, log="all")

# early stopping and model checkpointing
num_epochs = 50
best_val_loss = float('inf')
patience, trials = 30, 0
best_model_path = 'best_model.pt'

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

    val_loss /= len(test_loader)
    running_loss /= len(train_loader)
    print(f"Epoch {epoch+1}, Training Loss: {running_loss}, Validation Loss: {val_loss}")

    scheduler.step(val_loss)
    wandb.log({"training_loss": running_loss, "validation_loss": val_loss})

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        trials = 0
        torch.save(model.state_dict(), best_model_path)
    else:
        trials += 1
        if trials >= patience:
            print("Early stopping triggered.")
            break

print("Training complete.")
model.load_state_dict(torch.load(best_model_path))

# evaluate the model
model.eval()
all_labels = []
all_preds = []
correct_predictions = 0
total_images = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_labels.extend(labels.numpy())
        all_preds.extend(preds.numpy())

        for i in range(len(labels)):
            print(f"Image {i+1}:")
            print(f"Ground Truth: {dataset.classes[labels[i]]}")
            print(f"Predicted: {dataset.classes[preds[i]]}")
            print("--------")

        correct_predictions += torch.sum(preds == labels).item()
        total_images += labels.size(0)

accuracy = correct_predictions / total_images
report = classification_report(all_labels, all_preds, target_names=dataset.classes)

print(f"Overall Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(report)

Epoch 1, Training Loss: 1.375597443845537, Validation Loss: 1.3538201808929444
Epoch 2, Training Loss: 1.2499527467621698, Validation Loss: 1.323795747756958
Epoch 3, Training Loss: 1.13219177391794, Validation Loss: 1.263758659362793
Epoch 4, Training Loss: 1.02785904539956, Validation Loss: 1.2199821829795838
Epoch 5, Training Loss: 0.950058745013343, Validation Loss: 1.1460837841033935
Epoch 6, Training Loss: 0.8629686236381531, Validation Loss: 1.1268176317214966
Epoch 7, Training Loss: 0.7837329275078244, Validation Loss: 1.0841444611549378
Epoch 8, Training Loss: 0.7095690766970316, Validation Loss: 1.0336975812911988
Epoch 9, Training Loss: 0.6420399331384234, Validation Loss: 0.9514254331588745
Epoch 10, Training Loss: 0.5911740511655807, Validation Loss: 0.9686207890510559
Epoch 11, Training Loss: 0.49231422775321537, Validation Loss: 1.0022237181663514
Epoch 12, Training Loss: 0.4974269883500205, Validation Loss: 0.9823102474212646
Epoch 13, Training Loss: 0.4387323442432616,

  model.load_state_dict(torch.load(best_model_path))


Image 1:
Ground Truth: Angry
Predicted: Happy
--------
Image 2:
Ground Truth: Angry
Predicted: Angry
--------
Image 3:
Ground Truth: Happy
Predicted: Happy
--------
Image 4:
Ground Truth: Fear
Predicted: Sad
--------
Image 5:
Ground Truth: Angry
Predicted: Angry
--------
Image 6:
Ground Truth: Fear
Predicted: Angry
--------
Image 7:
Ground Truth: Happy
Predicted: Happy
--------
Image 8:
Ground Truth: Angry
Predicted: Angry
--------
Image 9:
Ground Truth: Happy
Predicted: Happy
--------
Image 10:
Ground Truth: Sad
Predicted: Sad
--------
Image 11:
Ground Truth: Fear
Predicted: Sad
--------
Image 12:
Ground Truth: Angry
Predicted: Angry
--------
Image 13:
Ground Truth: Sad
Predicted: Sad
--------
Image 14:
Ground Truth: Sad
Predicted: Sad
--------
Image 15:
Ground Truth: Happy
Predicted: Happy
--------
Image 16:
Ground Truth: Sad
Predicted: Sad
--------
Image 17:
Ground Truth: Sad
Predicted: Sad
--------
Image 18:
Ground Truth: Fear
Predicted: Angry
--------
Image 19:
Ground Truth: Fear


Children's Drawing Analysis Report

Project Overview

In this project I aimed to classify drawings using a deep learning model. The EfficientNet pre-trained model was fine-tuned on a dataset of images, which were augmented using Albumentations to enhance generalization.

Data Preprocessing


The dataset was preprocessed with various transformations: resizing images to 128x128 pixels, randomly cropping to 120x120 pixels, applying horizontal flips, rotations, color jittering, and normalization to standardize the pixel values.

The dataset was sourced from kaggle(https://www.kaggle.com/datasets/vishmiperera/children-drawings/data), containing a total of len 702 images. The dataset was split into training and test sets, with len 561 images for training and len 141 for testing. The dataset included multiple classes, such as
 Angry, Fear, Happy, Sad.

The EfficientNet-b0 architecture was modified by replacing the final fully connected layer to classify into four classes. Dropout with a probability of 0.5 was added to prevent overfitting. The model was fine-tuned with the entire network's parameters set to be trainable.

The training process involved using CrossEntropyLoss as the criterion for handling multi-class classification. The AdamW optimizer, with a learning rate of 0.0001, was employed due to its adaptability to sparse gradients. A learning rate scheduler, ReduceLROnPlateau, was used to reduce the learning rate by a factor of 0.1 if the validation loss did not improve for five epochs. The model was trained for a maximum of 50 epochs with a batch size of 32. Early stopping was implemented with a patience of 30 trials, ensuring training stopped early if the validation loss did not improve. Model checkpointing was used to save the best model based on validation loss.

Throughout the training process, Weights & Biases was used to log experiments, allowing detailed tracking of model performance and hyperparameters. Performance metrics such as training loss and validation loss were monitored. The lowest validation loss achieved during training, noted as best_val_loss, was recorded.

After training, the model achieved an accuracy of accuracy 61.70% on the test set, indicating a good performance in classifying children's drawings. The classification report provided detailed insights into the model's performance across different categories, including precision, recall, and F1-score for each class.

Loss Function, Optimizer, and Scheduler used here:

CrossEntropyLoss is a commonly used loss function for classification tasks. It combines LogSoftmax and NLLLoss in one single class. The input to CrossEntropyLoss is the raw scores (logits) from the model, and it outputs a loss value which indicates how far the model's predictions are from the true labels

AdamW is an optimization algorithm that adjusts the learning rate of each parameter. It’s a variant of the Adam optimizer with weight decay to improve generalization. AdamW helps in minimizing the loss function by adjusting the model parameters based on their gradients.
Learning Rate: Controls how much to change the model parameters in response to the estimated error each time the model weights are updated. A smaller learning rate like 0.0001 makes the training more stable by making small adjustments.

ReduceLROnPlateau is a learning rate scheduler that adjusts the learning rate based on the validation loss. When the validation loss stops improving, this scheduler reduces the learning rate by a factor of 0.1. This helps in fine-tuning the learning rate, allowing the model to settle into minima during training.

Mode: 'min' mode means the scheduler looks for a decrease in the validation loss to decide whether to reduce the learning rate.

Factor: The factor by which the learning rate will be reduced. Here, it’s set to 0.1.

Patience: The number of epochs with no improvement after which the learning rate will be reduced. It’s set to 5 here.