### Notebook used to compare the clusters found in LCTA to the groups found using the T2DM and prediabetes thresholds for fasting glucose

In [84]:
using CairoMakie
using CSV 
using DataFrames
using RData
using Combinatorics
using StatsBase  

In [59]:
file_path_LCTA = "C:/Users/20192809/OneDrive - TU Eindhoven/TUe/Thesis/R shenanigans/Vehpi data/plot_data.RData"

data_LCTA = load(file_path_LCTA)["plot_data"]

# Extract unique rows based on the 'Simulation' column
unique_simulations_LCTA = unique(data_LCTA, [:Simulation])
clusters_LCTA = unique_simulations_LCTA[:, :Cluster];

In [None]:
# load in data
file_path_simulation = "C:/Users/20192809/OneDrive - TU Eindhoven/TUe/Thesis/Synthetic data tests/Vehpi data/simulation_data.csv"

data_simulation = CSV.File(file_path_simulation) |> DataFrame
# Convert all time columns to Float64, handling comma decimal separators
for col in names(data_simulation)[3:end]  # Skip "simulation" and "State Variable"
    data_simulation[!, col] .= parse.(Float64, replace.(data_simulation[!, col], "," => "."))
end

# Filter for glucose (State Variable = 0) and insulin (State Variable = 1)
glucose = filter(row -> row[:"State Variable"] == 0, data_simulation)
insulin = filter(row -> row[:"State Variable"] == 1, data_simulation)

# Convert glucose values to mmol/L (mg/dL ÷ 18.018)
glucose[:, 3:end] .= glucose[:, 3:end] ./ 18.018  # Element-wise division for all time columns


In [86]:
# Define cutoffs
T2DM_cutoff = 6.9

# Extract only the glucose values (ignoring first two columns)
time_columns = names(glucose)[3:end]  # All time columns

# Define function to classify each simulation and return a vector
function classify_glucose(row)
    glucose_values = row[time_columns] |> collect  # Convert row to a vector
    max_glucose = maximum(glucose_values)
    final_glucose = glucose_values[end]
    
    # Check for "Intervention-responsive" group
    went_above = false
    went_below_after_exceed = false
    
    for val in glucose_values
        if val > T2DM_cutoff
            went_above = true
        end
        if went_above && val < T2DM_cutoff
            went_below_after_exceed = true
            break  # No need to check further once condition is met
        end
    end
    
    # Determine group classification
    if max_glucose <= T2DM_cutoff
        return "Non-diabetic"
    elseif went_above && went_below_after_exceed
        return "Intervention-responsive"
    else
        return "Diabetic"
    end
end

# Apply function to classify and return a vector of classifications
glucose_groups = classify_glucose.(eachrow(glucose));


In [87]:
# Direct cluster-to-group mapping (example provided by you)
cluster_to_group_mapping = Dict(2 => "Non-diabetic", 1 => "Intervention-responsive", 3 => "Diabetic")

# Initialize dictionaries to count accurate and misclassifications for each group
accurate_classifications = Dict("Non-diabetic" => 0, "Intervention-responsive" => 0, "Diabetic" => 0)
misclassifications = Dict("Non-diabetic" => 0, "Intervention-responsive" => 0, "Diabetic" => 0)

# Count accurate classifications and misclassifications
for (cluster, group) in zip(clusters_LCTA, glucose_groups)
    if cluster_to_group_mapping[cluster] == group
        accurate_classifications[group] += 1  # Correct classification
    else
        misclassifications[group] += 1  # Incorrect classification
    end
end

# Display the counts of accurate and misclassified classifications per group
println("Accurate classifications: ", accurate_classifications)
println("Misclassifications: ", misclassifications)


Accurate classifications: Dict("Non-diabetic" => 674, "Intervention-responsive" => 100, "Diabetic" => 195)
Misclassifications: Dict("Non-diabetic" => 226, "Intervention-responsive" => 4, "Diabetic" => 85)


In [94]:
# Direct cluster-to-group mapping (example provided by you)
cluster_to_group_mapping = Dict(2 => "Non-diabetic", 1 => "Intervention-responsive", 3 => "Diabetic")

# Initialize confusion matrix (rows: actual class, columns: predicted class)
conf_matrix = zeros(Int, 3, 3)

# Initialize dictionaries to count accurate and misclassifications for each group
accurate_classifications = Dict("Non-diabetic" => 0, "Intervention-responsive" => 0, "Diabetic" => 0)
misclassifications = Dict("Non-diabetic" => 0, "Intervention-responsive" => 0, "Diabetic" => 0)

# Create a mapping to index the rows and columns of the confusion matrix
group_to_index = Dict("Non-diabetic" => 1, "Intervention-responsive" => 2, "Diabetic" => 3)

# Count accurate classifications and misclassifications
for (cluster, group) in zip(clusters_LCTA, glucose_groups)
    predicted_group = cluster_to_group_mapping[cluster]
    
    # Get row and column indices for the confusion matrix
    row = group_to_index[group]
    col = group_to_index[predicted_group]
    
    if predicted_group == group
        accurate_classifications[group] += 1  # Correct classification
        conf_matrix[row, col] += 1  # Increment the diagonal for accurate predictions
    else
        misclassifications[group] += 1  # Incorrect classification
        conf_matrix[row, col] += 1  # Increment the misclassified group in the matrix
    end
end

# Display confusion matrix
println("Confusion Matrix:")
println("                   Predicted")
println("Actual   Non-diabetic  Intervention-responsive  Diabetic")
for i in 1:3
    println(["Non-diabetic", "Intervention-responsive", "Diabetic"][i], "   ", conf_matrix[i, 1], "                ", conf_matrix[i, 2], "                   ", conf_matrix[i, 3])
end

# Display the counts of accurate and misclassified classifications per group
println("\nAccurate classifications: ", accurate_classifications)
println("Misclassifications: ", misclassifications)


Confusion Matrix:
                   Predicted
Actual   Non-diabetic  Intervention-responsive  Diabetic
Non-diabetic   674                226                   0
Intervention-responsive   0                100                   4
Diabetic   0                85                   195

Accurate classifications: Dict("Non-diabetic" => 674, "Intervention-responsive" => 100, "Diabetic" => 195)
Misclassifications: Dict("Non-diabetic" => 226, "Intervention-responsive" => 4, "Diabetic" => 85)


In [95]:
conf_matrix

3×3 Matrix{Int64}:
 674  226    0
   0  100    4
   0   85  195