## Model long COVID Intensity using Multi-Layer Perceptron (MLP)

In [2]:
import pathlib
from pprint import pprint
import numpy as np
from package.data.utils import *

In [3]:
data_path = pathlib.Path("/groups/umcg-lifelines/tmp01/projects/ov21_0402/data")
path_to_table = data_path / "dataset_order_202201" / "results"
path_to_enum = path_to_table.parent / "enumerations"
save_path = pathlib.Path().absolute().parent.parent.parent / "data" / "extract" / "vaccination"
load_path = pathlib.Path().absolute().parent.parent.parent / "data" / "extract" / "vaccination"
load_path_merged = pathlib.Path().absolute().parent.parent.parent / "data" / "extract" / "merged"

## Read the dataset

In [4]:
from package.data.lifeline_dataset import LifeLineDataSet
from package.data.data_manager import DataManager

In [5]:
data_path = pathlib.Path().absolute().parent.parent.parent
dataset = LifeLineDataSet(data_path=data_path / "data" / "extract" / "merged", 
                          dataset_name="merged_vaccin_only_1_full.csv",
                          target_variable="long_covid_intensity")
dataset.get_encoded_data()

In [6]:
data_manager = DataManager()
data_manager.split_data_train_val_test(dataset, train_size=0.7, val_test_prop=0.8)

In [7]:
print(data_manager.train_dataset.features.shape)
print(data_manager.val_dataset.features.shape)
print(data_manager.test_dataset.features.shape)

(3259, 41)
(279, 41)
(1119, 41)


## Multi-Layer Perceptrons (MLP)

In [9]:
from package.data.scaler import StandardScaler
from package.model.torch_models import TorchModels
from package.model.fully_connected import FullyConnected
from package.model.utils import k_fold, cross_validation, compute_metrics

In [10]:
CONFIG_PATH = pathlib.Path().resolve().parent / "configurations" / "models" / "torch_fc.ini"
fc_model = TorchModels(FullyConnected,
                       config_path=CONFIG_PATH,
                       config_name="FINE_TUNED",
                       name="fully_connected",
                       scaler=StandardScaler
                       )

In [None]:
fc_model.params

Train the model

In [None]:
fc_model.train(train_dataset=data_manager.train_dataset, val_dataset=data_manager.val_dataset)

Print the model architecture

In [None]:
print(fc_model._model)

Visualize the convergence of the model

In [None]:
from matplotlib import pyplot as plt
plt.figure(figsize=(10,8))
plt.plot(fc_model.train_losses, label="train")
plt.plot(fc_model.val_losses, label="validation")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.grid()
plt.legend()
plt.show()

Predict using test dataset

In [32]:
predictions = fc_model.predict(data_manager.test_dataset,scale_only_features=False)
predictions = np.concatenate(predictions)

Evaluate and compute metrics

In [None]:
if fc_model._model.scaler is not None:
    targets = fc_model._model.scaler.inverse_transform(data_manager.test_dataset.targets)
else:
    targets = data_manager.test_dataset.targets
metrics = compute_metrics(targets, predictions, index=-1)
pprint(metrics)

### Cross Validation

In [17]:
train_datasets, test_datasets = k_fold(dataset.features, dataset.targets, folds=5)

number of splits: 5


In [None]:
trained_models, metrics_all = cross_validation(FullyConnected, 
                                               train_datasets, 
                                               test_datasets, 
                                               config_path=CONFIG_PATH, 
                                               config_name="FINE_TUNED")

Compute the metrics using mean and standard deviation

In [19]:
metrics_list = ["mae", "mse", "mape", "pearson"]
metrics_values = dict()
metrics_summary = dict()
for metric in metrics_list:
    metrics_values[metric] = []
    metrics_summary[metric] = {}
    for res in metrics_all:
        metrics_values[metric].append(res[metric])
    metrics_summary[metric]["mean"] = np.mean(metrics_values[metric])
    metrics_summary[metric]["std"] = np.std(metrics_values[metric])

In [None]:
metrics_summary

In [None]:
pearson_tests = []
test_value = []
p_values = []
for i in range(5):
    pearson_tests.append(metrics_all[i]["pearson"])
    test_value.append(pearson_tests[i].statistic)
    p_values.append(pearson_tests[i].pvalue)
print(np.mean(test_value))
print(np.mean(p_values))