Load dataset and visualize some samples

In [None]:
from torchvision import transforms, utils
from Dataset import CustomImageDataset


transform = transforms.Compose([
    transforms.Resize((64, 64)), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
dataset = CustomImageDataset('data/raw-img', transform, 2, 0.1)

dataset.visualize(5)

dataset.visualize_all_classes()

#print(dataset.nr_classes)

In [14]:
#define model

import torch.nn as nn
import torch.nn.functional as F

class ClassificationModel(nn.Module):
    def __init__(self, num_classes=10):
        super(ClassificationModel, self).__init__()
        
        # First convolutional layer: 3 input channels (RGB), 32 output channels, kernel size 5, padding 2 to preserve size
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)
        
        # Second convolutional layer: outputs a 32-channel feature map
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, padding=2)
        
        # Third convolutional layer: further reduces spatial dimensions
        self.conv3 = nn.Conv2d(64, 128, kernel_size=5, padding=2)

        # Fully connected layer
        self.fc1 = nn.Linear(128 * 8 * 8, 512)  # Adjusted for the final size after pooling
        
        # Prediction layer
        self.prediction = nn.Linear(512, num_classes)
        
    def forward(self, x):
        # Input size is assumed to be (batch_size, 3, 500, 500)
        
        # First conv -> ReLU -> Max Pooling
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=2)  # Output: (batch_size, 32, 32, 32)

        # Second conv -> ReLU -> Max Pooling
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=2)  # Output: (batch_size, 64, 16, 16)

        # Third conv -> ReLU -> Max Pooling
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, kernel_size=2)  # Output: (batch_size, 128, 8, 8)

        # Flatten the tensor for fully connected layer
        x = x.view(x.size(0), -1)  # Output: (batch_size, 128 * 62 * 62)

        # Fully connected layer -> ReLU
        x = F.relu(self.fc1(x))

        # Output layer (no activation, to be combined with a loss function later)
        x = self.prediction(x)
        x = nn.Softmax(dim=1)(x)  # Specify dim=1 to apply Softmax over class scores

        return x
    



In [None]:
from torchsummary import summary
model = ClassificationModel(2)

summary(model, (3, 64, 64))

In [None]:
from models.ClassificationModel import ClassificationModel
from train import train
import torch.optim
from torch.utils.data import DataLoader, random_split
import torch.nn as nn

device = 'mps'

dataset_size = len(dataset)
train_size = int(0.1 * dataset_size)  # 80% for training
val_size = dataset_size - train_size   # 20% for validation

# Split the dataset
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoaders for training and validation sets
train_loader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=5, shuffle=False)




optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()


model.to(device)

model, loss, acc = train(model, train_loader, val_loader, optimizer, criterion, device, 0 , 20, "hej", 1232)
from test_model import test
accuracy = test(model, train_loader, device, "hej", 1111)

print(accuracy)