## Happy vs Sad People CNN classification

### STEP 1 — Imports

In [48]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder                      # A dataset class for loading images from a directory structure where each subdirectory represents a class and contains the images for that class
from torch.utils.data import DataLoader, random_split             # A utility for splitting a dataset into random train and validation subsets
import os                                                         # A module for interacting with the operating system, used here to list directory contents because we are loading images from a directory structure
import matplotlib.pyplot as plt

### STEP 2 — Define Dataset Path

In [49]:
path = "C:/Users/OLIVE/.cache/kagglehub/datasets/mayank07thakur/happy-vs-sad-people-cnn-classification/versions/1"

print("Path to dataset files:", path)


Path to dataset files: C:/Users/OLIVE/.cache/kagglehub/datasets/mayank07thakur/happy-vs-sad-people-cnn-classification/versions/1


### Step 3 — Define Transforms

In [50]:
transform = transforms.Compose([            # Define a series of transformations to apply to the images in the dataset; transforms.Compose allows us to chain multiple transformations together
    transforms.Resize((128, 128)),          # Resize the images to a fixed size of 128x128 pixels, which is a common preprocessing step for CNNs to ensure that all input images have the same dimensions, 128x128 is a good size for training a CNN on a small dataset like this, as it provides enough detail while keeping the computational requirements manageable
    transforms.ToTensor(),                  # Convert the images to PyTorch tensors, which are the primary data structure used in PyTorch for storing and manipulating data; This transformation also scales the pixel values from the range [0, 255] to [0.0, 1.0], which is beneficial for training neural networks
])


### Step 4 — Load Dataset

In [51]:
full_dataset = ImageFolder(root=path, transform=transform)             # Load the dataset using the ImageFolder class, which is a dataset class for loading images from a directory structure where each subdirectory represents a class and contains the images for that class; The root parameter specifies the root directory of the dataset, and the transform parameter specifies the transformations to apply to the images when they are loaded

print("Classes:", full_dataset.classes)                                # Print the classes in the dataset, which are determined by the subdirectory names in the root directory; This will show the class labels (e.g., 'Happy People', 'Sad People') that correspond to the images in the dataset
print("Total images are:", len(full_dataset))                          # Print the total number of images in the dataset


Classes: ['Happy People', 'Sad People']
Total images are: 351


### Step 5 — Split Dataset : 80% train, 20% validation

In [52]:
train_size = int(0.8 * len(full_dataset))                         #  calculate the size only where 80% of the dataset is used for training
val_size = len(full_dataset) - train_size

train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])       # randomly split the full dataset into a training set and a validation set based on the calculated sizes; random_split takes the full dataset and a list of sizes for each split, and returns two datasets: train_dataset and val_dataset

print("Train size:", len(train_dataset))
print("Validation size:", len(val_dataset))


Train size: 280
Validation size: 71


### Step 6 — Create DataLoaders objects

In [53]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)             # Small batch size is fine for small dataset
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)


### Step 7 — Check Image Shape

In [54]:
images, labels = next(iter(train_loader))           # Get a batch of images and labels from the training loader; next(iter(train_loader)) retrieves the first batch of data from the train_loader, which is an iterator that yields batches of images and labels; images will be a tensor containing the image data, and labels will be a tensor containing the corresponding class labels for those images

print("Image shape:", images.shape)                 # Print the shape of the images tensor, which will show the batch size, number of channels, and image dimensions (e.g., [16, 3, 128, 128] for a batch of 16 RGB images of size 128x128)
print("Labels:", labels[:5])                        # Print the first 5 labels in the batch, which will show the class labels (e.g., 0 for 'Happy People' and 1 for 'Sad People') corresponding to the images in the batch


Image shape: torch.Size([16, 3, 128, 128])
Labels: tensor([1, 0, 1, 1, 0])


### Step 8 — Build Proper CNN from scratch

In [55]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 16, 3)                # 3 input channels(RGB), 16 output channels, 3x3 kernel size
        self.conv2 = nn.Conv2d(16, 32, 3)               # 16 input channels, 32 output channels, 3x3 kernel size
        self.pool = nn.MaxPool2d(2,2)                   
        
        self.fc1 = nn.Linear(32 * 30 * 30, 128)         # Fully connected layer with 32*30*30 input features (after two conv and pool layers) and 128 output features
        self.fc2 = nn.Linear(128, 2)                    # Output layer with 128 input features and 2 output features (for 2 classes: happy and sad)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))       # 128x128 → 126x126 → 63x63
        x = self.pool(torch.relu(self.conv2(x)))       # 63x63 → 61x61 → 30x30     
        
        x = x.view(x.size(0), -1)                      # flatten 
        
        x = torch.relu(self.fc1(x))                    # input : 32x30x30 [32 channels, 30x30 size ], output:128
        x = self.fc2(x)                                # output: 2 [binary classification]
        
        return x


### Step 9 — Initialize Model and optimizer

In [103]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

model = SimpleCNN().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


Using device: cuda


### Step 10 — Training Loop

In [104]:
epochs = 7

for epoch in range(epochs):
    model.train()                           # train() sets the model to training mode, which is necessary for certain layers like dropout and batch normalization to behave correctly during training; In this case, it ensures that the model is in the correct mode for training, even though we don't have those specific layers in our simple CNN
    running_loss = 0                        
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)                                  # labels.size(0) = batch size , size(0) gives the number of samples in the batch, and we add that to the total count of samples processed
        correct += (predicted == labels).sum().item()
    
    train_acc = 100 * correct / total
    
    print(f"Epoch {epoch+1}, Loss: {running_loss:.3f}, Train Accuracy: {train_acc:.2f}%")


Epoch 1, Loss: 13.190, Train Accuracy: 62.86%
Epoch 2, Loss: 8.054, Train Accuracy: 76.43%
Epoch 3, Loss: 6.155, Train Accuracy: 86.79%
Epoch 4, Loss: 4.868, Train Accuracy: 87.50%
Epoch 5, Loss: 3.318, Train Accuracy: 94.29%
Epoch 6, Loss: 1.828, Train Accuracy: 97.50%
Epoch 7, Loss: 1.201, Train Accuracy: 98.21%


### Step 11 — Validation Accuracy

In [105]:
model.eval()                              # eval() sets the model to evaluation mode, which is necessary to disable certain layers like dropout and batch normalization that are used during training
correct = 0
total = 0

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

val_acc = 100 * correct / total
print("Validation Accuracy:", val_acc)


Validation Accuracy: 83.09859154929578


#### Why is validation lower than training?
Because model has high capacity and dataset is small, so it memorizes training samples more than validation samples.