<H1 align="center">Multi Layer Perceptron for Disease Spread Prediction</H1>

<strong>This notebook trains a multi-lyaer perceptron-based classification model on the training dataset and predicts on the inference dataset.</strong>

## Import Libraries

### Python Libraries

In [None]:
from datetime import datetime


### External Libraries

In [None]:
import torch
from torch.nn import ReLU, BCEWithLogitsLoss
from torch.utils.data import TensorDataset, DataLoader

### Custom Libraries

In [None]:
from utils_classification.models import ModelMultiLayerPerceptron
from utils_classification.models import RunnerMultiLayerPerceptron
from utils_classification.models import TesterMultiLayerPerceptron
from utils_classification.models import TrainerMultiLayerPerceptron
from utils_classification.data import DatasetMultiLayerPerceptron
from utils_torch.data import stratified_random_split
from utils_data import *
from utils_plot import plot_train_eval_loss_accuracy, plot_receiver_operating_characteristics_curve, \
    plot_confusion_matrix

## Set Parameters

### Select Features

In [None]:
features = ['Normalized_Age',
            'Normalized_Behaviour',
            'Normalized_Constitution',
            'Normalized_Degree',
            'Normalized_Distance_to_Index_Patient',
            'Normalized_Sum_Neighbor_Age',
            'Normalized_Sum_Neighbor_Behaviour',
            'Normalized_Sum_Neighbor_Constitution',
            'Normalized_Sum_Neighbor_Degree',
            'Normalized_Sum_Population_Age',
            'Normalized_Sum_Population_Behaviour',
            'Normalized_Sum_Population_Constitution',
            'Normalized_Sum_Population_Distance_to_Index_Patient']

### Set Dataset Parameters

In [None]:
train_eval_test_split = [0.7, 0.15, 0.15]

### Set Dataloader Parameters

In [None]:
batch_size = 32

### Set Model Parameters

In [None]:
num_layers = 3
num_features_in = len(features)
num_features_hidden = [64, 32, 16]
bias = True
activation = ReLU
activation_kwargs = None
dropout_p = 0.5
dropout_inplace = False
dropout_first = True
batch_norm = True
batch_norm_momentum = 0.1

### Set Trainer Parameters

In [None]:
num_epochs = 64
learning_rate = 0.001

### Set IO Parameters

In [None]:
data_in_processed_dir = "../../data/processed/"
data_in_processed_train_file = "train.csv"
data_in_processed_inference_file = "test.csv"
data_out_dir = "../../data/out/"
data_out_file = "mlp_predictions.csv"
model_dir = "../../models/"
model_file = "mlp_model"
plot_dir = "../../plots/"

## Setup Environment

### Set Torch Device

In [None]:
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = "mps"

## Load Data

### Load Processed Training Data

In [None]:
train_data = PopulationData().load_processed('../../data/processed/train.csv')

### Load Processed Inference Data

In [None]:
inference_data = PopulationData().load_processed('../../data/processed/test.csv')

### Create Train Eval Test Datasets

In [None]:
train_features, train_labels = train_data.get_feature_label_tensors(features=features)
train_dataset = DatasetMultiLayerPerceptron(features=train_features, targets=train_labels)
train_subset, eval_subset, test_subset = stratified_random_split(dataset=train_dataset,
                                                                 ratios=train_eval_test_split)

### Create Train Eval Test DataLoaders

In [None]:
dataloader_train = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
dataloader_eval = DataLoader(eval_subset, batch_size=batch_size, shuffle=False)
dataloader_test = DataLoader(test_subset, batch_size=batch_size, shuffle=False)

### Create Inference Dataset

In [None]:
tensor_inference_features = inference_data.get_feature_tensor(features=features)
dataset_inference = TensorDataset(tensor_inference_features)

### Create Inference DataLoader

In [None]:
dataloader_inference = DataLoader(dataset_inference, batch_size=batch_size, shuffle=False)

## Create Model

In [None]:
model = ModelMultiLayerPerceptron(num_layers=num_layers,
                                  num_features_in=num_features_in,
                                  num_features_hidden=num_features_hidden,
                                  bias=bias,
                                  activation=activation,
                                  activation_kwargs=activation_kwargs,
                                  dropout_p=dropout_p,
                                  dropout_inplace=dropout_inplace,
                                  dropout_first=dropout_first,
                                  batch_norm=batch_norm,
                                  batch_norm_momentum=batch_norm_momentum,
                                  device=device)

## Create Trainer

### Create Criterion and Optimizer

In [None]:
criterion = BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

### Create Trainer

In [None]:
trainer = TrainerMultiLayerPerceptron(model=model,
                                      criterion=criterion,
                                      optimizer=optimizer,
                                      train_loader=dataloader_train,
                                      eval_loader=dataloader_eval,
                                      device=device)

## Create Tester

In [None]:
tester = TesterMultiLayerPerceptron(model=model,
                                    criterion=criterion,
                                    test_loader=dataloader_test,
                                    device=device)

## Create Runner

In [None]:
runner = RunnerMultiLayerPerceptron(model=model,
                                    device=device)

## Train Model

### Train Model

In [None]:
trainer.train(num_epochs=num_epochs)

### Get Training Results

In [None]:
finish_time = datetime.now()
best_eval_loss_epoch = trainer.best_eval_loss_epoch
best_eval_loss = trainer.best_eval_loss
best_eval_accuracy = trainer.best_eval_accuracy

### Print Training Results

In [None]:
print(f'Finish Time: {finish_time}')
print(f'Best Eval Loss Epoch: {best_eval_loss_epoch}')
print(f'Best Eval Loss: {best_eval_loss:.4f}')
print(f'Best Eval Accuracy: {best_eval_accuracy:.4f}')

### Plot Training Results

In [None]:
plot_train_eval_loss_accuracy(train_loss=trainer.train_loss,
                              train_accuracy=trainer.train_accuracy,
                              eval_loss=trainer.eval_loss,
                              eval_accuracy=trainer.eval_accuracy,
                              saved_epoch=best_eval_loss_epoch,
                              show=True)

## Test Model

### Test Model

In [None]:
tester.test()

### Get Test Results

In [None]:
test_loss = tester.loss
test_accuracy = tester.accuracy_score
test_precision = tester.precision_score
test_recall = tester.recall_score
test_f1 = tester.f1_score
test_auroc = tester.auroc_score

### Print Test Results

In [None]:
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_precision:.4f}')
print(f'Test Recall: {test_recall:.4f}')
print(f'Test F1: {test_f1:.4f}')
print(f'Test AUROC: {test_auroc:.4f}')

### Plot Test Results

#### Plot Receiver Operating Characteristic (ROC) Curve

In [None]:
fpr, tpr, _ = tester.roc_curve
plot_receiver_operating_characteristics_curve(false_positive_rates=fpr,
                                              true_positive_rates=tpr,
                                              auc=tester.auroc_score,
                                              show=True)

#### Plot Confusion Matrix

In [None]:
plot_confusion_matrix(confusion_matrix=tester.confusion_matrix_scores,
                      task="binary")

## Save Model

In [None]:
model.save(model_dir=model_dir, model_name=model_file)

## Predict on Inference Data

In [None]:
predictions = runner.predict(dataloader=dataloader_inference)

## Save Predictions

In [None]:
inference_data.save_predicted_probabilities(path=data_out_file,
                                            probabilities=predictions)