In [27]:
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms
from utils import write_log, get_device, process_data
from train import fit, evaluate
import CNN, DNN, MLP
import medmnist
from medmnist import INFO

## We work on the 2D dataset with size 28x28

In [28]:
DATA_FLAG = "bloodmnist"
DOWNLOAD = True
BATCH_SIZE = 128

info = INFO[DATA_FLAG]
task = info["task"]
N_CHANNELS = info["n_channels"]
N_CLASSES = len(info["label"])

DataClass = getattr(medmnist, info["python_class"])

## First, we read the MedMNIST data, preprocess them and encapsulate them into dataloader form.

In [29]:
# preprocessing
data_transform = transforms.Compose(
    [
        transforms.ToTensor(),
    ]
)

# load the data
train_dataset = DataClass(split="train", transform=data_transform, download=DOWNLOAD)
test_dataset = DataClass(split="test", transform=data_transform, download=DOWNLOAD)
validation_dataset = DataClass(split="val", transform=data_transform, download=DOWNLOAD)

write_log("train_dataset_log.txt", str(train_dataset))
write_log("test_dataset_log.txt", str(test_dataset))
write_log("validation_dataset_log.txt", str(validation_dataset))

pil_dataset = DataClass(split="train", download=DOWNLOAD)

# encapsulate data into dataloader form
train_loader = data.DataLoader(
    dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True
)
train_loader_at_eval = data.DataLoader(
    dataset=train_dataset, batch_size=2 * BATCH_SIZE, shuffle=False
)
test_loader = data.DataLoader(
    dataset=test_dataset, batch_size=2 * BATCH_SIZE, shuffle=False
)
validation_loader = data.DataLoader(
    dataset=validation_dataset, batch_size=2 * BATCH_SIZE, shuffle=False
)

In [30]:
# variables declaration
MODEL_TYPE = "CNN"
N_EPOCHS = 15
N_LAYERS = 1
N_INPUTS = N_CHANNELS * 28 * 28
SIZE_HIDDEN_LAYER = (N_INPUTS + N_CLASSES) // 2  # N_CLASSES = n_outputs
loss_function = nn.CrossEntropyLoss()
LEARNING_RATE = 0.001

device = get_device()
write_log("main_log.txt", f"Using device: {device}\n")

if MODEL_TYPE == "CNN":
    # model creation
    model = CNN.CNN(N_CHANNELS, N_CLASSES)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    # process training data
    X, y = process_data(train_loader, flag=False)
    write_log(
        "main_log.txt", f"Processed training data shapes: X: {X.shape}, y: {y.shape}\n"
    )

    # main training loop
    loss_values_training, trained_model = fit(
        device,
        X,
        y,
        model,
        loss_function,
        optimizer,
        N_EPOCHS,
        batch_size=BATCH_SIZE,
    )
    write_log("loss_values_training.txt", str(loss_values_training))

    # evaluate data
    X, y = process_data(validation_loader, flag=False)
    CM_val, f1_val = evaluate(device, X, y, trained_model, batch_size=BATCH_SIZE)
    write_log(
        "validation_results.txt", f"Confusion Matrix:\n{CM_val}\nF1 Score: {f1_val}\n"
    )

    # test data
    X, y = process_data(test_loader, flag=False)
    CM, f1 = evaluate(device, X, y, trained_model, batch_size=BATCH_SIZE)
    write_log("results.txt", f"Confusion Matrix:\n{CM}\nF1 Score: {f1}\n")

elif MODEL_TYPE == "DNN":
    # model creation
    model = DNN.DNN(N_INPUTS, [SIZE_HIDDEN_LAYER] * N_LAYERS, N_CLASSES)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    # process training data
    X, y = process_data(train_loader)
    write_log(
        "main_log.txt", f"Processed training data shapes: X: {X.shape}, y: {y.shape}\n"
    )

    # main training loop
    loss_values_training, trained_model = fit(
        device,
        X,
        y,
        model,
        loss_function,
        optimizer,
        N_EPOCHS,
        batch_size=BATCH_SIZE,
    )
    write_log("loss_values_training.txt", str(loss_values_training))

    # evaluate data
    X, y = process_data(validation_loader)
    CM_val, f1_val = evaluate(device, X, y, trained_model, batch_size=BATCH_SIZE)
    write_log(
        "validation_results.txt", f"Confusion Matrix:\n{CM_val}\nF1 Score: {f1_val}\n"
    )

    # test data
    X, y = process_data(test_loader)
    CM, f1 = evaluate(device, X, y, trained_model, batch_size=BATCH_SIZE)
    write_log("results.txt", f"Confusion Matrix:\n{CM}\nF1 Score: {f1}\n")

elif MODEL_TYPE == "MLP":
    pass

Epoch [1/15], Loss: 148.6486
Epoch [2/15], Loss: 91.0126
Epoch [3/15], Loss: 71.0893
Epoch [4/15], Loss: 63.0121
Epoch [5/15], Loss: 56.8830
Epoch [6/15], Loss: 53.8477
Epoch [7/15], Loss: 50.4935
Epoch [8/15], Loss: 47.8459
Epoch [9/15], Loss: 45.1568
Epoch [10/15], Loss: 41.5441
Epoch [11/15], Loss: 38.8114
Epoch [12/15], Loss: 35.7265
Epoch [13/15], Loss: 32.5733
Epoch [14/15], Loss: 31.0399
Epoch [15/15], Loss: 28.8438
