### Imports

In [3]:
import torch
import os
import re

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
from natsort import natsorted
from torchvision import models

### Creating the pytorch dataset

In [4]:
class LymphomaDataset(Dataset):
    def __init__(self, image_folder, label_folder, transform=None):
        self.image_folder = image_folder
        self.label_folder = label_folder
        self.transform = transform

        self.image_files = natsorted(os.listdir(image_folder))
        self.label_files = natsorted(os.listdir(label_folder))

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.image_files[idx])
        label_name = os.path.join(self.label_folder, self.label_files[idx])

        image = Image.open(img_name).convert("RGB")
        label = int(self.read_label_file(label_name))

        if self.transform:
            image = self.transform(image)

        return image, label

    def read_label_file(self, label_path):
        # Implement logic to read labels from label files
        # For example, if labels are in text files, you can use:
        with open(label_path, 'r') as file:
            label = file.read().strip()  # Adjust based on your label format
        return label

# Replace 'your_image_folder' and 'your_label_folder' with the actual paths
base_folder = os.getcwd()
image_folder = os.path.join(base_folder, "data", "images")
label_folder = os.path.join(base_folder, "data", "labels")

# Define transformation (optional, you can customize it based on your needs)
transform = transforms.Compose([
    transforms.Resize((500, 500)),
    transforms.ToTensor(),
])

# Instantiate the custom dataset
dataset = LymphomaDataset(image_folder, label_folder, transform=transform)

batch_size = 10

# Create DataLoader
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Access a sample from the dataset
sample_index = 200
sample_image, sample_label = dataset[sample_index]

# Print information about the sample
print(f"Sample {sample_index} - Image: {sample_image.size()}, Label: {sample_label}")


Sample 200 - Image: torch.Size([3, 500, 500]), Label: 2


### Creating the AlexNet

In [5]:
class AlexNet(nn.Module):
    def __init__(self, num_classes: int = 1000, dropout: float = 0.5) -> None:
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x
alexnet = AlexNet(num_classes = 4)

### Training loop

In [6]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(alexnet.parameters(), lr=0.001, momentum=0.9)

num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

alexnet.to(device) # alexnet to the device, gpu if available

for epoch in range(num_epochs):
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    # Set the model to training mode
    alexnet.train()

    for inputs, labels in dataloader:
        inputs = inputs.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = alexnet(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Update running statistics
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)

    # Calculate training accuracy and loss for the epoch
    train_accuracy = correct_predictions / total_samples
    average_loss = running_loss / len(dataloader)

    print(f'Training - Epoch: {epoch + 1}, Loss: {average_loss:.4f}, Accuracy: {train_accuracy:.4f}')



Training - Epoch: 1, Loss: 1.3677, Accuracy: 0.3717
Training - Epoch: 2, Loss: 1.3318, Accuracy: 0.3717
Training - Epoch: 3, Loss: 1.3003, Accuracy: 0.3717
Training - Epoch: 4, Loss: 1.2732, Accuracy: 0.3717
Training - Epoch: 5, Loss: 1.2425, Accuracy: 0.3717
Training - Epoch: 6, Loss: 1.1879, Accuracy: 0.3717
Training - Epoch: 7, Loss: 1.1272, Accuracy: 0.3503
Training - Epoch: 8, Loss: 1.1117, Accuracy: 0.3342
Training - Epoch: 9, Loss: 1.1120, Accuracy: 0.3262
Training - Epoch: 10, Loss: 1.1077, Accuracy: 0.3449
