In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import cohen_kappa_score

from torchvision import datasets, models, transforms
import os
from torch.utils.tensorboard import SummaryWriter
import numpy as np
from PIL import Image, ImageOps
import numpy as np
import cv2

from torchvision import transforms
from PIL import Image
import numpy as np
import cv2


In [2]:
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image
    transforms.RandomHorizontalFlip(),  # Data augmentation
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


In [3]:

data_dir = 'Data_histEq'  # Replace with your directory path

dataset = datasets.ImageFolder(data_dir, transform=data_transforms)

# Split the dataset into train and validation sets
train_size = int(0.8 * len(dataset))  # 80% for training
val_size = len(dataset) - train_size   # 20% for validation
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create data loaders for train and validation sets
dataloaders = {
    'train': DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4),
    'val': DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
}

num_classes = len(dataset.classes)

In [4]:
print(train_size,val_size)

1328 333


In [5]:
# Print class numbers and their corresponding class names
for idx, class_name in enumerate(dataset.classes):
    print(f"Class number: {idx}, Class name: {class_name}")
    

Class number: 0, Class name: FVR
Class number: 1, Class name: Stage 1
Class number: 2, Class name: Stage 2
Class number: 3, Class name: Stage 3
Class number: 4, Class name: TAR


In [7]:
model = models.resnet18(weights='IMAGENET1K_V1')

# Modify the last fully connected layer for the number of classes
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Check if GPU is available and move the model to GPU if possible
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model = model.to(device)

writer = SummaryWriter('runs/stage_classification_experiment')

In [8]:
def train_model(model, criterion, optimizer, num_epochs=25):
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            all_preds = []
            all_labels = []

            # Iterate over data
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward pass + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Collect predictions and labels for Cohen's Kappa
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.float() / len(dataloaders[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Calculate Cohen's Kappa for validation phase
            if phase == 'val':
                kappa = cohen_kappa_score(all_preds, all_labels)
                print(f'{phase} Cohen\'s Kappa: {kappa:.4f}')

            # Log metrics to TensorBoard
            writer.add_scalar(f'{phase} Loss', epoch_loss, epoch)
            writer.add_scalar(f'{phase} Accuracy', epoch_acc, epoch)
            if phase == 'val':
                writer.add_scalar(f'{phase} Cohen\'s Kappa', kappa, epoch)

            # If it's the validation phase, save the best model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                torch.save(model.state_dict(), 'ResNet_Model.pth')

    return model

In [9]:
model = train_model(model, criterion, optimizer, num_epochs=50)

Epoch 0/49
----------
train Loss: 1.3034 Acc: 0.4684
val Loss: 1.1488 Acc: 0.5345
val Cohen's Kappa: 0.3611
Epoch 1/49
----------
train Loss: 1.0139 Acc: 0.6054
val Loss: 1.2179 Acc: 0.4985
val Cohen's Kappa: 0.3283
Epoch 2/49
----------
train Loss: 0.9653 Acc: 0.6054
val Loss: 1.2097 Acc: 0.5616
val Cohen's Kappa: 0.3735
Epoch 3/49
----------
train Loss: 0.8636 Acc: 0.6627
val Loss: 0.8963 Acc: 0.6607
val Cohen's Kappa: 0.5354
Epoch 4/49
----------
train Loss: 0.8023 Acc: 0.6867
val Loss: 0.9559 Acc: 0.6276
val Cohen's Kappa: 0.4872
Epoch 5/49
----------
train Loss: 0.6993 Acc: 0.7410
val Loss: 1.1843 Acc: 0.6276
val Cohen's Kappa: 0.4643
Epoch 6/49
----------
train Loss: 0.7100 Acc: 0.7259
val Loss: 0.9389 Acc: 0.6096
val Cohen's Kappa: 0.4812
Epoch 7/49
----------
train Loss: 0.5718 Acc: 0.7816
val Loss: 1.3995 Acc: 0.5766
val Cohen's Kappa: 0.4275
Epoch 8/49
----------
train Loss: 0.5253 Acc: 0.8110
val Loss: 1.0110 Acc: 0.6577
val Cohen's Kappa: 0.5321
Epoch 9/49
----------
train 

In [11]:
def extract_embeddings(dataloader, model):
    model.eval()
    embeddings = []
    labels_list = []
    image_list = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass through all layers except the final fully connected (fc) layer
            features = model.conv1(inputs)
            features = model.bn1(features)
            features = model.relu(features)
            features = model.maxpool(features)
            features = model.layer1(features)
            features = model.layer2(features)
            features = model.layer3(features)
            features = model.layer4(features)
            features = model.avgpool(features)
            features = torch.flatten(features, 1)  # Flatten to (batch_size, feature_size)

            embeddings.append(features.cpu())  # Store the features/embeddings
            labels_list.append(labels.cpu())   # Store corresponding labels
            image_list.append(inputs.cpu())    # Save images as numpy arrays

    return torch.cat(embeddings), torch.cat(labels_list), torch.cat(image_list)


# Extract embeddings, labels, and images
embeddings, labels, images = extract_embeddings(dataloaders['val'], model)

# Log embeddings to TensorBoard projector
def log_embeddings_to_projector(writer, embeddings, labels, images, class_names):
    writer.add_embedding(embeddings, metadata=labels, label_img=images)
    
    # Save class names as a separate metadata file
    class_metadata_path = os.path.join('runs', 'class_metadata.tsv')
    with open(class_metadata_path, 'w') as f:
        for label in class_names:
            f.write(f'{label}\n')

    # Add projector config
    writer.add_embedding(embeddings, metadata=labels.tolist(), label_img=images)

# Log embeddings to TensorBoard projector
log_embeddings_to_projector(writer, embeddings, labels, images, dataset.classes)

# Close the TensorBoard writer after training is complete
writer.close()

