# MIL - Multi instance Learning

In [1]:
import os
import datetime
import copy
import re
import yaml
import uuid
import warnings
import time
import inspect

import numpy as np
import pandas as pd
from functools import partial, reduce
from random import shuffle
import random

import torch
from torch import nn, optim
from torch import nn
from torch.nn import functional as F
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import DataLoader
from torchvision.models import resnet
from torchvision.transforms import Compose, ToTensor, Normalize, Resize
from torchvision.models.resnet import ResNet, BasicBlock
from torchvision.datasets import MNIST
from tqdm.autonotebook import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn import metrics as mtx
from sklearn import model_selection as ms
import os
import shutil
import warnings
import torch
import torch.nn as nn
import torchvision.models as models
from sklearn.preprocessing import StandardScaler
import numpy as np
import torch.nn.init as init

warnings.filterwarnings("ignore")

  from tqdm.autonotebook import tqdm


In [2]:
from PIL import Image
from datetime import datetime
import random


def calculate_age(dob_str):
    formats = ["%m/%d/%Y", "%d-%m-%Y"]  # Two different format at the same time!
    for format_string in formats:
        try:
            dob_date = datetime.strptime(dob_str, format_string)
            current_date = datetime.now()
            age = (
                current_date.year
                - dob_date.year
                - (
                    (current_date.month, current_date.day)
                    < (dob_date.month, dob_date.day)
                )
            )
            return age
        except ValueError:
            continue


def encode_gender(gender):
    if gender == "F" or gender == "f":  # Two different gender value F and f ....
        return 0
    elif gender == "M" or gender == "m":
        return 1
    else:
        raise ValueError(
            "Invalid gender value. Expected 'F' or 'M', but received: {}".format(gender)
        )


class DLMICustomDataset(Dataset):
    def __init__(self, data, transform=None, flag="trainset", max_images=80):
        self.data = data
        self.transform = transform
        self.flag = flag
        self.max_images = max_images

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        patient_ID = self.data.iloc[idx, 1]
        img_path_folder = "data/raw/" + self.flag + "/" + str(patient_ID) + "/"

        images = []
        images_loaded = 0
        for filename in os.listdir(img_path_folder):
            if filename.endswith(".jpg"):
                img_path = os.path.join(img_path_folder, filename)
                image = Image.open(img_path).convert("RGB")
                if self.transform:
                    image = self.transform(image)
                images.append(image)
                images_loaded += 1
                if (
                    images_loaded >= self.max_images
                ):  # Stop loading if max_images reached
                    break
        if len(images) == 0:
            images.append(torch.zeros((3, 224, 224)))  # Placeholder image

        while len(images) < self.max_images:
            random_image = random.choice(images)
            images.append(random_image)

        label = torch.tensor(self.data.iloc[idx, 2])
        gender = torch.tensor(encode_gender(self.data.iloc[idx, 3]), dtype=torch.long)
        age = torch.tensor(calculate_age(self.data.iloc[idx, 4]), dtype=torch.float32)
        lymph_count = torch.tensor(self.data.iloc[idx, 5], dtype=torch.float32)
        clinical_data = torch.stack((gender, age, lymph_count))
        # (num_images, channels, height, width)
        return torch.stack(images), clinical_data, label

In [11]:
class MILModel(nn.Module):
    def __init__(self, mlp_input_dim, mlp_hidden_dim):
        super(MILModel, self).__init__()
        self.resnet50 = models.resnet50(pretrained=True)
        for param in self.resnet50.parameters():
            param.requires_grad = False
        num_ftrs = self.resnet50.fc.in_features
        self.resnet50.fc = nn.Linear(num_ftrs, num_ftrs)  # One trainable layers
        self.linear_classifier = nn.Sequential(
            nn.Linear(num_ftrs, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 1),
        )  # One classifier with bias
        self.mlp = MLP(input_dim=mlp_input_dim, hidden_dim=mlp_hidden_dim, output_dim=1)
        self.activation = nn.Sigmoid()

    # Batch imaging forwarding for bags batchsize*imagesnum*channel*width * height
    def forward_images(self, image_data):
        score_output = []
        for batch in image_data:
            batch_encoded_images = []
            for image in batch:
                image = image.unsqueeze(0)
                with torch.no_grad():
                    features = self.resnet50(image)
                    # Mean pooling to do max pooling or LSE pooling
                batch_encoded_images.append(features)
            feature_extracted = torch.stack(batch_encoded_images)
            features = torch.mean(feature_extracted, dim=0)
            score_output.append(self.linear_classifier(features).view(-1))
        return self.activation(torch.stack(score_output))

    def forward(self, image_data, clinical_data):
        batch_size, num_images, channels, height, width = image_data.size()
        image_output = self.forward_images(image_data)
        mlp_output = self.mlp(clinical_data)
        return image_output, mlp_output

    def loss_function(self, image_output, clinical_output, labels):
        image_loss = F.binary_cross_entropy_with_logits(image_output, labels)
        clinical_loss = F.binary_cross_entropy_with_logits(clinical_output, labels)
        return (image_loss + clinical_loss) / 2


class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.LeakyReLU()
        self.activate = nn.Sigmoid()

        # Initialize the weights for the linear layers
        init.xavier_uniform_(self.fc1.weight)
        init.xavier_uniform_(self.fc2.weight)
        init.xavier_uniform_(self.fc3.weight)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return self.activate(x)

Training

In [12]:
def calculate_accuracy(outputs, labels):
    predicted_classes = torch.argmax(outputs, dim=1)
    true_classes = torch.argmax(labels, dim=1)
    accuracy = (predicted_classes == true_classes).float().mean().item()
    return accuracy


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [13]:
import torchvision.transforms as transforms
from torchvision.datasets import DatasetFolder
from torch.utils.data import random_split
from torch.utils.tensorboard import SummaryWriter
import json


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Trianing is on " + str(device))
# Define hyperparameters
hyperparameters = {
    "mlp_input_dim": 3,
    "mlp_hidden_dim": 300,
    "learning_rate": 0.00001,
    "batch_size": 10,
    "num_epochs": 100,
}

model = MILModel(
    mlp_input_dim=hyperparameters["mlp_input_dim"],
    mlp_hidden_dim=hyperparameters["mlp_hidden_dim"],
).to(device)

with open(f"hyperparameters.json", "w") as json_file:
    json.dump(hyperparameters, json_file)

current_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
log_dir = os.path.join("logs", current_time)
writer = SummaryWriter(log_dir)

data = pd.read_csv(
    "/home/lujun/local/DLMI-Classification/data/raw/clinical_annotation.csv"
)
train_data = data[data["LABEL"] != -1]
test_data = data[data["LABEL"] == -1]
transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

# training : validation : test = 130 : 33 : 42
training_dataset = DLMICustomDataset(
    data=train_data, transform=transform, flag="trainset"
)  # 130

test_dataset = DLMICustomDataset(
    data=test_data, transform=transform, flag="testset"
)  # 42

train_size = int(0.8 * len(training_dataset))
val_size = len(training_dataset) - train_size
train_dataset, val_dataset = random_split(training_dataset, [train_size, val_size])
train_loader = DataLoader(
    train_dataset, batch_size=hyperparameters["batch_size"], shuffle=True
)  # 130
val_loader = DataLoader(
    val_dataset, batch_size=hyperparameters["batch_size"], shuffle=False
)  # 33
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)
# Every item: max-images * channel number * width * height

Trianing is on cuda


In [14]:
optimizer = optim.Adam(model.parameters(), lr=hyperparameters["learning_rate"])
num_epochs = hyperparameters["num_epochs"]

best_loss = float("inf")
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    total_correct = 0
    total_samples = 0
    for inputs, clinical, labels in train_loader:
        inputs, clinical, labels = (
            inputs.to(device),
            clinical.to(device),
            labels.to(device),
        )
        optimizer.zero_grad()
        image_output, mlp_output = model(inputs, clinical)
        # labels.requires_grad = True
        labels = labels.view(image_output.shape).float()
        loss = model.loss_function(image_output, mlp_output, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        combined_output = (image_output + mlp_output) / 2
        predicted_labels = torch.round(combined_output)
        correct = (predicted_labels == labels).sum().item()
        total_correct += correct
        total_samples += labels.size(0)

    epoch_loss = running_loss / len(train_dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
    accuracy = total_correct / total_samples
    print(
        "Epoch {} Total Loss: {:.4f}, Accuracy: {:.2f}%".format(
            epoch + 1, running_loss / len(train_loader), accuracy * 100
        )
    )
    writer.add_scalar("Loss/train", epoch_loss, epoch)
    writer.add_scalar("Accuracy/train", accuracy, epoch)
    # Validation
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, clinical, labels in val_loader:
            inputs, clinical, labels = (
                inputs.to(device),
                clinical.to(device),
                labels.to(device),
            )
            image_output, mlp_output = model(inputs, clinical)
            labels = labels.view(image_output.shape).float()
            loss = model.loss_function(image_output, mlp_output, labels)
            val_loss += loss.item() * inputs.size(0)
            combined_output = (image_output + mlp_output) / 2
            predicted_labels = torch.round(combined_output)
            correct += (predicted_labels == labels).sum().item()
            total += labels.size(0)
    val_loss = val_loss / len(val_loader.dataset)
    val_accuracy = correct / total
    writer.add_scalar("Loss/validation", val_loss, epoch)
    writer.add_scalar("Accuracy/validation", val_accuracy, epoch)
    print(
        f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}"
    )

    if epoch_loss < best_loss:
        best_loss = epoch_loss
        unchanged_count = 0
    else:
        unchanged_count += 1

    if unchanged_count >= 5:
        print("Loss has not changed for 5 consecutive epochs. Stopping training.")
        break

    torch.save(
        {
            "epoch": epoch,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "loss": epoch_loss,
            "hyperparameters": hyperparameters,
        },
        f"trained_model_epoch_{epoch+1}.pth",
    )

Epoch [1/100], Loss: 0.6254
Epoch 1 Total Loss: 1.2508, Accuracy: 53.85%
Epoch [1/100], Train Loss: 0.6254, Val Loss: 0.5854, Val Accuracy: 0.6061
Epoch [2/100], Loss: 0.6264
Epoch 2 Total Loss: 1.2528, Accuracy: 66.92%
Epoch [2/100], Train Loss: 0.6264, Val Loss: 0.5809, Val Accuracy: 0.7273
Epoch [3/100], Loss: 0.6093
Epoch 3 Total Loss: 1.2186, Accuracy: 62.31%
Epoch [3/100], Train Loss: 0.6093, Val Loss: 0.5677, Val Accuracy: 0.7273
Epoch [4/100], Loss: 0.6085
Epoch 4 Total Loss: 1.2171, Accuracy: 60.00%
Epoch [4/100], Train Loss: 0.6085, Val Loss: 0.5713, Val Accuracy: 0.7273
Epoch [5/100], Loss: 0.5976
Epoch 5 Total Loss: 1.1952, Accuracy: 71.54%
Epoch [5/100], Train Loss: 0.5976, Val Loss: 0.6019, Val Accuracy: 0.5152
Epoch [6/100], Loss: 0.6204
Epoch 6 Total Loss: 1.2407, Accuracy: 53.08%
Epoch [6/100], Train Loss: 0.6204, Val Loss: 0.5857, Val Accuracy: 0.5758
Epoch [7/100], Loss: 0.6013
Epoch 7 Total Loss: 1.2027, Accuracy: 63.08%
Epoch [7/100], Train Loss: 0.6013, Val Loss: 

In [16]:
# test
model.eval()  # Set model to evaluation mode
val_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
    predicted_labels_list = []
    for inputs, clinical, labels in test_loader:
        inputs, clinical, labels = (
            inputs.to(device),
            clinical.to(device),
            labels.to(device),
        )
        image_output, mlp_output = model(inputs, clinical)
        labels = labels.view(image_output.shape).float()
        loss = model.loss_function(image_output, mlp_output, labels)
        val_loss += loss.item() * inputs.size(0)
        combined_output = (image_output + mlp_output) / 2
        predicted_labels = torch.round(combined_output)
        predicted_labels_list.append(predicted_labels)

In [18]:
torch.stack(predicted_labels_list)

RuntimeError: stack expects each tensor to be equal size, but got [4, 1] at entry 0 and [2, 1] at entry 10

In [21]:
predicted_labels_list[9].shape

torch.Size([4, 1])

In [23]:
combined_tensor = torch.cat(predicted_labels_list, dim=0)

In [24]:
combined_tensor.shape

torch.Size([42, 1])

In [26]:
combined_array = combined_tensor.cpu().numpy()

In [33]:
ID = test_dataset.data.ID.to_numpy()

In [38]:
ID = ID[:, np.newaxis]

In [40]:
ID.shape

(42, 1)

In [37]:
combined_array.shape

(42, 1)

In [41]:
result_array = np.concatenate((ID, combined_array), axis=1)

In [43]:
result_df = pd.DataFrame(result_array)

In [45]:
result_df.columns = ["ID", "Predicted"]

In [47]:
result_df["Predicted"] = result_df["Predicted"].astype(int)

In [50]:
result_df.to_csv("output2.csv", index=False)