## File for fitting EDES to all the sets generated for MIR, LIR and healthy populations

In [None]:
# load packages and files
using DifferentialEquations, SciMLBase, Plots, Random # for solving ODEs
using DataFrames
using DataStructures # for the OrderedDict
using LatinHypercubeSampling # for the LHS
using Trapz # for the trapezoidal rule
using CSV # for reading in the parameter distributions of the Ohashi
using Statistics # for the mean and std functions
using JLD2 # for saving the results
rng_test =  Random.seed!(1234) # testing
rng_training = Random.seed!(2) # training
save_boolean = false # for saving the results

include("../EDES/including.jl") # for the EDES model 


PlotMealResponseProgression (generic function with 2 methods)

#### Load Data

Combined the data into test and training set

Add the state as healthy == 2, MIR == 1, LIR == 0

Make new IDs for seperate test and training set

In [None]:


# Load the data
healthy_training = DataFrame(CSV.File("./Metadata/Training_OGTT_healthy.csv"))
MIR_training = DataFrame(CSV.File("./Metadata/Training_OGTT_MIR.csv"))
LIR_training = DataFrame(CSV.File("./Metadata/Training_OGTT_LIR.csv"))

healthy_test = DataFrame(CSV.File("./Metadata/Test_OGTT_healthy.csv"))
MIR_test = DataFrame(CSV.File("./Metadata/Test_OGTT_MIR.csv"))
LIR_test = DataFrame(CSV.File("./Metadata/Test_OGTT_LIR.csv"))

# Add a new column with labels
healthy_training.state .= 2
MIR_training.state .= 1
LIR_training.state .= 0

healthy_test.state .= 2
MIR_test.state .= 1
LIR_test.state .= 0

# Ensure that the number of rows in each dataset is a multiple of 6
@assert nrow(LIR_training) % 6 == 0
@assert nrow(MIR_training) % 6 == 0
@assert nrow(healthy_training) % 6 == 0

# Combine data in blocks of 6 rows and adjust IDs
block_size = 6

# Interleave the rows for the training set, grouping 6 rows from each and assigning same ID for each block
training_set = DataFrame()
id_counter = 1  # Initialize ID counter
for i in 1:block_size:nrow(LIR_training)
    # For each block of 6 rows
    block_LIR = LIR_training[i:i+block_size-1, :]
    block_MIR = MIR_training[i:i+block_size-1, :]
    block_healthy = healthy_training[i:i+block_size-1, :]
    
    # Assign the same ID for all rows within the block
    block_LIR.Patient_ID .= id_counter
    block_MIR.Patient_ID .= id_counter+1
    block_healthy.Patient_ID .= id_counter+2
    
    # Update the ID counter (move to the next ID for the next block)
    id_counter += 3
    
    # Append to the final interleaved DataFrame
    append!(training_set, block_LIR)
    append!(training_set, block_MIR)
    append!(training_set, block_healthy)
end

# For testing sets, repeat the same logic
@assert nrow(LIR_test) % 6 == 0
@assert nrow(MIR_test) % 6 == 0
@assert nrow(healthy_test) % 6 == 0

test_set = DataFrame()
id_counter = 1  # Reset ID counter for testing set
for i in 1:block_size:nrow(LIR_test)
    # For each block of 6 rows
    block_LIR = LIR_test[i:i+block_size-1, :]
    block_MIR = MIR_test[i:i+block_size-1, :]
    block_healthy = healthy_test[i:i+block_size-1, :]
    
    # Assign the same ID for all rows within the block
    block_LIR.Patient_ID .= id_counter
    block_MIR.Patient_ID .= id_counter+1
    block_healthy.Patient_ID .= id_counter+2
    
    # Update the ID counter (move to the next ID for the next block)
    id_counter += 3
    
    # Append to the final interleaved DataFrame
    append!(test_set, block_LIR)
    append!(test_set, block_MIR)
    append!(test_set, block_healthy)
end

# Now, interleaved_training and interleaved_test contain the data in the desired order with the same ID for each block
training_set;

## FIT for training set



In [None]:
using Distributed

n_cores = length(Sys.cpu_info())

# note that this is the same as of the synthetic patients, needs to change if we start changing the weight or the dose
BW = 70 
G_dose::Real = 75000.0
time_G = [0,15,30,60,120,180,240]
time_I = [0,15,30,60,120,180,240]

if nprocs()-1 < n_cores
    addprocs(n_cores-nprocs()+1, exeflags="--project")
end

@everywhere begin
    using DifferentialEquations, SciMLBase, Plots, Random # for solving ODEs
    using Optimization, OptimizationOptimJL, LineSearches # for optimization
    using QuasiMonteCarlo # for LHS
    using Trapz
    include("../EDES/including.jl")
end

columnnames = ["Patient_ID","Long_time_points","Health status","k1","k5","k6"]



patients = [[collect(training_set[j,3:9]), collect(training_set[j,10:16]), BW, G_dose, time_G, time_I] for j in 1:nrow(training_set)]
results = pmap(x -> ParameterEstimation(x...), patients)

# close the parallel pool
rmprocs(procs()[2:end])

# add the ID, health status and the long time points to the results
# add the ID, health status and the long time points to the results
for i in 1:length(results)
    results[i] = [training_set[i,1], training_set[i,2], training_set[i,17], results[i]...]
end

results = DataFrame(reduce(hcat, results)', Symbol.(columnnames))

# save as a CSV file
if save_boolean
    CSV.write("./Metadata/Training_EDES_fits.csv", results)
end



In [None]:
#select all health status of 1 from results and describe the distribution
MIR = results[results."Health status" .== 1, :]

describe(MIR)

## FIT for test set


In [None]:

n_cores = length(Sys.cpu_info())

# note that this is the same as of the synthetic patients, needs to change if we start changing the weight or the dose
BW = 70 
G_dose::Real = 75000.0
time_G = [0,15,30,60,120,180,240]
time_I = [0,15,30,60,120,180,240]

if nprocs()-1 < n_cores
    addprocs(n_cores-nprocs()+1, exeflags="--project")
end

@everywhere begin
    using DifferentialEquations, SciMLBase, Plots, Random # for solving ODEs
    using Optimization, OptimizationOptimJL, LineSearches # for optimization
    using QuasiMonteCarlo # for LHS
    using Trapz
    include("../EDES/including.jl")
end

columnnames = ["Patient_ID","Long_time_points","Health status","k1","k5","k6"]



patients = [[collect(test_set[j,3:9]), collect(test_set[j,10:16]), BW, G_dose, time_G, time_I] for j in 1:nrow(test_set)]
results = pmap(x -> ParameterEstimation(x...), patients)

# close the parallel pool
rmprocs(procs()[2:end])

# add the ID, health status and the long time points to the results
# add the ID, health status and the long time points to the results
for i in 1:length(results)
    results[i] = [test_set[i,1], test_set[i,2], test_set[i,17], results[i]...]
end

results = DataFrame(reduce(hcat, results)', Symbol.(columnnames))

# save as a CSV file
if save_boolean
    CSV.write("./Metadata/Test_EDES_fits.csv", results)
end


### visualise some fits

In [None]:
include("../EDES/including.jl")
patient_n = 4

k1_MIR = MIR[patient_n, :k1]
k5_MIR = MIR[patient_n, :k5]
k6_MIR = MIR[patient_n, :k6]
println(k1_MIR)
println(k5_MIR)
print(k6_MIR)
parameter_values = [k1_MIR, k5_MIR, k6_MIR]
param_names = ["k1", "k5", "k6"]

datapoints = (time = [0,15,30,60,120,180,240],glc = reduce(vcat, MIR_training[patient_n, 3:9]), ins = reduce(vcat, MIR_training[patient_n, 10:16]))
fasting_glucose = datapoints.glc[1]
fasting_insulin = datapoints.ins[1]


PlotMealResponse(param_names, parameter_values, datapoints, fasting_glucose, fasting_insulin)