Load dataset and visualize some samples

In [1]:
from torchvision import transforms, utils
from Dataset import CustomImageDataset


transform = transforms.Compose([
    transforms.Resize((500, 500)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
dataset = CustomImageDataset('data/raw-img', transform, 2, 0.01)



#dataset.visualize(5)

#dataset.visualize_all_classes()

#print(dataset.nr_classes)

In [2]:
#define model

import torch.nn as nn
import torch.nn.functional as F

class ClassificationModel(nn.Module):
    def __init__(self, num_classes=10):
        super(ClassificationModel, self).__init__()
        
        # First convolutional layer: 3 input channels (RGB), 32 output channels, kernel size 5, padding 2 to preserve size
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)
        
        # Second convolutional layer: outputs a 32-channel feature map
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, padding=2)
        
        # Third convolutional layer: further reduces spatial dimensions
        self.conv3 = nn.Conv2d(64, 128, kernel_size=5, padding=2)

        # Fully connected layer
        self.fc1 = nn.Linear(128 * 62 * 62, 512)  # Adjusted for the final size after pooling
        
        # Prediction layer
        self.prediction = nn.Linear(512, num_classes)
        
    def forward(self, x):
        # Input size is assumed to be (batch_size, 3, 500, 500)
        
        # First conv -> ReLU -> Max Pooling
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=2)  # Output: (batch_size, 32, 250, 250)

        # Second conv -> ReLU -> Max Pooling
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=2)  # Output: (batch_size, 64, 125, 125)

        # Third conv -> ReLU -> Max Pooling
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, kernel_size=2)  # Output: (batch_size, 128, 62, 62)

        # Flatten the tensor for fully connected layer
        x = x.view(x.size(0), -1)  # Output: (batch_size, 128 * 62 * 62)

        # Fully connected layer -> ReLU
        x = F.relu(self.fc1(x))

        # Output layer (no activation, to be combined with a loss function later)
        x = self.prediction(x)
        x = nn.Softmax(dim=1)(x)  # Specify dim=1 to apply Softmax over class scores

        return x
    



In [3]:
from torchsummary import summary
model = ClassificationModel(2)

summary(model, (3, 500, 500))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 500, 500]           2,432
            Conv2d-2         [-1, 64, 250, 250]          51,264
            Conv2d-3        [-1, 128, 125, 125]         204,928
            Linear-4                  [-1, 512]     251,920,896
            Linear-5                    [-1, 2]           1,026
Total params: 252,180,546
Trainable params: 252,180,546
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 2.86
Forward/backward pass size (MB): 106.82
Params size (MB): 961.99
Estimated Total Size (MB): 1071.67
----------------------------------------------------------------


: 

In [4]:
from models.ClassificationModel import ClassificationModel
from train import train
import torch.optim
from torch.utils.data import DataLoader, random_split
import torch.nn as nn

device = 'mps'
dataset_size = int(len(dataset))
print("length dataset: ", len(dataset))

# Ensure train_size + val_size equals dataset_size
train_size = int(0.8 * dataset_size)
val_size = dataset_size - train_size  # Automatically calculate the remaining size for validation

# Split the dataset
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])


# Create DataLoaders for training and validation sets
train_loader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=5, shuffle=False)



optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()


model.to(device)
model = train(model, train_loader, val_loader, optimizer, criterion, device, 0 , 5, "hej", 1232)





length dataset:  79
Starting training on device:  mps

Epoch: 0
Loss: 0.72717422 | Acc: 0.00% (0/1)
Loss: 0.36358711 | Acc: 50.00% (1/2)
Loss: 0.24239141 | Acc: 66.67% (2/3)
Loss: 0.18179356 | Acc: 75.00% (3/4)
Loss: 0.14543484 | Acc: 80.00% (4/5)
Loss: 0.12119570 | Acc: 83.33% (5/6)
Loss: 14.38959632 | Acc: 71.43% (5/7)
Loss: 12.59089678 | Acc: 75.00% (6/8)
Loss: 22.30301936 | Acc: 66.67% (6/9)
Loss: 20.07271742 | Acc: 70.00% (7/10)
Loss: 18.24792493 | Acc: 72.73% (8/11)
Loss: 16.72726452 | Acc: 75.00% (9/12)
Loss: 15.44055186 | Acc: 76.92% (10/13)
Loss: 14.33765530 | Acc: 78.57% (11/14)
Loss: 13.38181161 | Acc: 80.00% (12/15)
Loss: 12.54544839 | Acc: 81.25% (13/16)
Loss: 11.80748084 | Acc: 82.35% (14/17)
Loss: 11.15150968 | Acc: 83.33% (15/18)
Loss: 15.82774601 | Acc: 78.95% (15/19)
Loss: 20.03635871 | Acc: 75.00% (15/20)
Loss: 19.08224639 | Acc: 76.19% (16/21)
Loss: 18.21487156 | Acc: 77.27% (17/22)
Loss: 17.42292062 | Acc: 78.26% (18/23)
Loss: 16.69696559 | Acc: 79.17% (19/24)
Loss

  val_inputs, val_targets = torch.tensor(val_inputs, requires_grad=True), torch.tensor(val_targets).long()
