# Import necessary libraries

In [None]:
import os
import pickle
import pandas as pd

from tqdm import tqdm
from pathlib import Path
from _utils.load_data import fetch_data
from tslearn.utils import to_time_series_dataset
from _utils.utils import visualize_clustering_results, write2excel

# Load the data of the external validation cohort

In [None]:
parent_folder = Path(os.getcwd()).parent.absolute()
data_path = os.path.join(parent_folder, "Data/FLEMENGHO")
sex_file = os.path.join(parent_folder, "Data/flemengho_clinical_data.xlsx")
save_data_path = os.path.join(parent_folder, "Results/dtw_kmedoids_FLEMENGHO")

if not os.path.exists(save_data_path):
    os.makedirs(save_data_path)

male_cpet, female_cpet = fetch_data(data_path, sex_file)

# Variable and sex selection

In [None]:
variables = ["HR", "V'O2", "RER", "PETO2", "PETCO2"]
sex="males"
cpet = {"males": male_cpet, "females": female_cpet}

data = []
patient_ids =[]
for i in tqdm(range(len(cpet[sex]))):
    data.append(cpet[sex]["CPET Data"].iloc[i][variables].to_numpy())
    patient_ids.append(cpet[sex]["Patient IDs"].iloc[i])

formatted_dataset = to_time_series_dataset(data)

# Load the trained model

In [None]:
model = pickle.load(open(os.path.join(parent_folder, fr"Results\dtw_kmedoids\{sex}\5 clusters\model.pkl", "rb")))
clusters = model.predict(formatted_dataset)

# Visualise clustering results

In [None]:
# the desired combinations of variables to visualise the clustering results
combinations = [("V'O2", "HR"), ("V'O2", "V'E"), ("V'O2", "V'CO2"),
                ("V'CO2", "V'E"), ("Time", "Load"), ("Time", "V'E"),
                ("Time", "PETO2"), ("Time", "PETCO2"), ("Time", "RER")]

# cluster renaming to force cluster 1 to be the one with the most favourable profile
cluster_labels = {0: 4, 1: 2, 2: 3, 3: 1, 4: 5}

# colours used for the clusters. '0' corresponds to cluster 1 and is illustrated with green colour.
cluster_colours = {0: "green", 1: "blue", 2: "darkorange", 3: "blueviolet", 4: "red"}

# write the cluster annotations in an .xlsx file
write2excel(clusters, list(cpet[sex]["Patient IDs"]), sex, save_data_path, cluster_labels=cluster_labels)
renamed_clusters = pd.read_excel(os.path.join(save_data_path, f'Clustering_assignments_{sex}.xlsx'))["Cluster"]

visualize_clustering_results(cpet[sex], renamed_clusters, combinations, sex, str(save_data_path), cluster_colours=cluster_colours)