Load dataset and visualize some samples

In [1]:
from torchvision import transforms, utils
from Dataset import CustomImageDataset


transform = transforms.Compose([
    transforms.Resize((500, 500)), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
dataset = CustomImageDataset('data/raw-img', transform, 2)

#dataset.visualize(5)

#dataset.visualize_all_classes()

#print(dataset.nr_classes)

In [2]:
#define model

import torch.nn as nn
import torch.nn.functional as F

class ClassificationModel(nn.Module):
    def __init__(self, num_classes=10):
        super(ClassificationModel, self).__init__()
        
        # First convolutional layer: 3 input channels (RGB), 32 output channels, kernel size 5, padding 2 to preserve size
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)
        
        # Second convolutional layer: outputs a 32-channel feature map
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, padding=2)
        
        # Third convolutional layer: further reduces spatial dimensions
        self.conv3 = nn.Conv2d(64, 128, kernel_size=5, padding=2)

        # Fully connected layer
        self.fc1 = nn.Linear(128 * 62 * 62, 512)  # Adjusted for the final size after pooling
        
        # Prediction layer
        self.prediction = nn.Linear(512, num_classes)
        
    def forward(self, x):
        # Input size is assumed to be (batch_size, 3, 500, 500)
        
        # First conv -> ReLU -> Max Pooling
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=2)  # Output: (batch_size, 32, 250, 250)

        # Second conv -> ReLU -> Max Pooling
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=2)  # Output: (batch_size, 64, 125, 125)

        # Third conv -> ReLU -> Max Pooling
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, kernel_size=2)  # Output: (batch_size, 128, 62, 62)

        # Flatten the tensor for fully connected layer
        x = x.view(x.size(0), -1)  # Output: (batch_size, 128 * 62 * 62)

        # Fully connected layer -> ReLU
        x = F.relu(self.fc1(x))

        # Output layer (no activation, to be combined with a loss function later)
        x = self.prediction(x)
        x = nn.Softmax(dim=1)(x)  # Specify dim=1 to apply Softmax over class scores

        return x
    



In [3]:
from torchsummary import summary
model = ClassificationModel(2)

summary(model, (3, 500, 500))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 500, 500]           2,432
            Conv2d-2         [-1, 64, 250, 250]          51,264
            Conv2d-3        [-1, 128, 125, 125]         204,928
            Linear-4                  [-1, 512]     251,920,896
            Linear-5                    [-1, 2]           1,026
Total params: 252,180,546
Trainable params: 252,180,546
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 2.86
Forward/backward pass size (MB): 106.82
Params size (MB): 961.99
Estimated Total Size (MB): 1071.67
----------------------------------------------------------------


In [4]:
from models.ClassificationModel import ClassificationModel
from train import train
import torch.optim
from torch.utils.data import DataLoader, random_split
import torch.nn as nn

device = 'mps'

dataset_size = len(dataset)
train_size = int(0.8 * dataset_size)  # 80% for training
val_size = dataset_size - train_size   # 20% for validation

# Split the dataset
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoaders for training and validation sets
train_loader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=5, shuffle=False)



optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()


model.to(device)
model = train(model, train_loader, val_loader, optimizer, criterion, device, 0 , 5, "hej", 1232)

Starting training on device:  mps

Epoch: 0
Loss: 0.73155868 | Acc: 0.00% (0/1)
Loss: 0.36577934 | Acc: 50.00% (1/2)
Loss: 0.24385289 | Acc: 66.67% (2/3)
Loss: 0.18288967 | Acc: 75.00% (3/4)
Loss: 0.14631174 | Acc: 80.00% (4/5)
Loss: 0.12192645 | Acc: 83.33% (5/6)
Loss: 0.10450838 | Acc: 85.71% (6/7)
Loss: 12.59144484 | Acc: 75.00% (6/8)
Loss: 11.19239541 | Acc: 77.78% (7/9)
Loss: 10.07315587 | Acc: 80.00% (8/10)
Loss: 9.15741443 | Acc: 81.82% (9/11)
Loss: 8.39429656 | Acc: 83.33% (10/12)
Loss: 7.74858144 | Acc: 84.62% (11/13)
Loss: 7.19511133 | Acc: 85.71% (12/14)
Loss: 6.71543725 | Acc: 86.67% (13/15)
Loss: 6.29572242 | Acc: 87.50% (14/16)
Loss: 5.92538580 | Acc: 88.24% (15/17)
Loss: 5.59619770 | Acc: 88.89% (16/18)
Loss: 10.56481888 | Acc: 84.21% (16/19)
Loss: 15.03657793 | Acc: 80.00% (16/20)
Loss: 14.32055041 | Acc: 80.95% (17/21)
Loss: 13.66961630 | Acc: 81.82% (18/22)
Loss: 13.07528516 | Acc: 82.61% (19/23)
Loss: 12.53048161 | Acc: 83.33% (20/24)
Loss: 16.02926235 | Acc: 80.00% 