## Setup

In [None]:
# Load packages
using DifferentialEquations
using Plots, StatsPlots
using CSV, DataFrames
using Turing
using LaTeXStrings
using XLSX
using Optim
using StatsBase
using Random
using KernelDensity
using ProgressMeter
using Distributions
using Measures

# Setup plots standard
Plots.default(fontfamily = ("computer modern"))

# Set seed 
Random.seed!(42)

In [None]:
# Add rhs file
include("model_rhs.jl")
    
# Add parameter file
include("model_default_param.jl")

# Add treatment rhs
include("model_rhs_treatment.jl")

# Add function for calculating VAF
include("model_calc_VAF.jl")

# Add model for Turing inference - individual
include("model_infer_dynamics.jl")

# Add model for Turing inference - hierarchical
include("model_infer_dynamics_Hierarchical_Gibbs_DALIAH.jl")

In [None]:
# Setup initial conditions for master curve
x00 = 1.0*10^5
x10 = 2.5*10^6
x20 = 6.4*10^11
y00 = 1
y10 = 0
y20 = 0
a0 = 8.1*10^2
s0 = 1

# Collect in one vector
u0 = [x00,x10,x20,y00,y10,y20,a0,s0]

# Setup and solve ODEproblem
tspan = (0.0,90*365)
prob = ODEProblem(model_rhs, u0, tspan, p)
sol = solve(prob, TRBDF2(), reltol = 1e-10, abstol = 1e-10, saveat=1)

# Save as named tuple
master_curve = (t = sol.t, x0 = sol[1,:], x1 = sol[2,:], x2 = sol[3,:], y0 = sol[4,:], y1 = sol[5,:], y2 = sol[6,:], 
a = sol[7,:], s = sol[8,:], VAF = sol[6,:]./(sol[3,:]+sol[6,:]))

In [None]:
# Setup dataframes
df_D = DataFrame

# Load data
path = "M:/data_cancer/DALIAH/Trine_DALIAH_5y/DALIAH_5y - Export for Julia.csv"
df_D = CSV.read(path, df_D)

# Change data types
df_D[!,:days] = convert.(Float64,df_D[:,:days])
df_D[!,:RUX] = convert.(Float64,df_D[:,:RUX])


df_D

In [None]:
# Convert to starting at time for first data point - no matter if JAK or treatment
for i in unique(df_D.patientID)
    # Set pID
    pID = i
    
    # Extract minimum number of days - where IFN>0 or JAK>=0
    df_p = df_D[df_D.patientID .== pID,:]

    minDays = minimum(df_p.days[df_p.IFN .> 0  .|| df_p.JAK .>= 0, :])
    
    # Subtract minimum number of days
    df_D.days[df_D.patientID .== pID, :] = df_D.days[df_D.patientID .== pID, :] .- minDays
end
df_D

## Optimise multiple models and compare

In [None]:
# Setup list of models to compare
model_list = ["py0", "py1", "py0py1", "dy0", "dy1IFN", "dy0dy1", "py0dy0", "py0dy1", "dy0py1", "py1dy1"]

In [None]:
# Setup list of models to compare
model_list = ["py0", "py1", "py0py1", "dy0", "dy1IFN", "dy0dy1", "py0dy0", "py0dy1", "dy0py1", "py1dy1"]

# Setup dataframe for storage
df_AIC_BIC = DataFrame(model = String[], Total_AIC = Float64[], Total_BIC = Float64[])

# Set boolean for plot
saveplot = true

# Optimise all models in loop
@showprogress dt=1 desc="Running MLE optimisation for all models..." for k in 1:length(model_list)
    # Choose treatment effect
    effect = model_list[k]
    
    # Extract number of parameterss
    if effect == "sy0dy1" || effect == "py0py1" || effect == "dy0dy1" || effect == "py0dy0" || effect == "py0dy1" || effect == "dy0py1" || effect == "py1dy1"
        n_param = 4
    elseif effect == "dy1" || effect == "py0" || effect == "py1" || effect == "dy0" || effect == "dy1IFN"
        n_param = 3
    end
    
    # Storage
    if n_param == 4
        df_MLE = DataFrame(patientID = Int64[], rho1 = Float64[], rho2 = Float64[], initJAK = Float64[], tau = Float64[], 
                           loglik = Float64[], AIC = Float64[], BIC = Float64[])
    elseif n_param == 3
        df_MLE = DataFrame(patientID = Int64[], rho1 = Float64[], initJAK = Float64[], tau = Float64[], loglik = Float64[],
                           AIC = Float64[], BIC = Float64[])
    end
        

    # Optimise in loop
    for i in unique(df_D.patientID)
        # Extract relevant data
        pID = i
        df_p = df_D[df_D.patientID .== pID, :]
        
        # Extract number of data points
        n = length(df_p.JAK)

        # Extract VAF - but remove NAN-observations
        pVAF = df_p.JAK
        pVAF = pVAF[.!isnan.(pVAF)]

        # Choose to estimate initial JAK
        estInitJAK = true

        # Instantiate model
        model = model_infer_dynamics(pVAF,df_p,effect,p,master_curve,estInitJAK)

        # Set seed 
        Random.seed!(42)

        # Storage
        if n_param == 4
            df_MLE_temp = DataFrame(patientID = Int64[], rho1 = Float64[], rho2 = Float64[], initJAK = Float64[], 
                                    tau = Float64[], loglik = Float64[], AIC = Float64[], BIC = Float64[])
        elseif n_param == 3
            df_MLE_temp = DataFrame(patientID = Int64[], rho1 = Float64[], initJAK = Float64[], 
                                    tau = Float64[], loglik = Float64[], AIC = Float64[], BIC = Float64[])
        end

        # Generate a MLE estimate - loop
       for j=1:10
            # Optimise likelihood
            mle_estimate = maximum_likelihood(model, ParticleSwarm(); maxiters = 100, allow_f_increases=true)

            # Setup temporary dataframe and save
            if n_param == 4
                df_temp = DataFrame(rho1 = coef(mle_estimate)[:rho1], rho2 = coef(mle_estimate)[:rho2],
                                    initJAK = coef(mle_estimate)[:initJAK], tau = coef(mle_estimate)[:tau])
            elseif n_param == 3
                df_temp = DataFrame(rho1 = coef(mle_estimate)[:rho1],
                                    initJAK = coef(mle_estimate)[:initJAK], tau = coef(mle_estimate)[:tau])
            end
            loglik = loglikelihood(model, df_temp)
            if n_param == 4
                df_temp = DataFrame(patientID = pID, rho1 = coef(mle_estimate)[:rho1], rho2 = coef(mle_estimate)[:rho2],
                                    initJAK = coef(mle_estimate)[:initJAK], tau = coef(mle_estimate)[:tau],
                                    loglik = loglik, AIC = 2*n_param-2*loglik, BIC = n_param*log(n)-2*loglik)
            elseif n_param == 3
                df_temp = DataFrame(patientID = pID, rho1 = coef(mle_estimate)[:rho1],
                                    initJAK = coef(mle_estimate)[:initJAK], tau = coef(mle_estimate)[:tau],
                                    loglik = loglik, AIC = 2*n_param-2*loglik, BIC = n_param*log(n)-2*loglik)
            end
            append!(df_MLE_temp, df_temp)
        end

        # Save best MLE
        push!(df_MLE, df_MLE_temp[argmax(df_MLE_temp.loglik),:])   
    
        # Save plots if wanted
        if saveplot
            # Extract relevant data
            pID = i
            df_p = df_D[df_D.patientID .== pID, :]
            
            # Extract latest data point
            maxDays = maximum(df_p.days)

            # Extract VAF - but remove NAN-observations
            pVAF = df_p.JAK
            pVAF = pVAF[.!isnan.(pVAF)]

            # Choose to estimate initial JAK
            estInitJAK = true

            # Choose time for calculating VAF
            pDays = collect(1:11*365)
            pDays2 = df_p.days[.!isnan.(df_p.JAK)]

            # Load standard parameters
            include("model_default_param.jl")

            # Extract MLE estimate
            df_MLE_p = df_MLE[df_MLE.patientID .== pID,:]
            if n_param == 4
                rho = [df_MLE_p.rho1[1], df_MLE_p.rho2[1]]
            elseif n_param == 3
                rho = df_MLE_p.rho1[1]
            end
            initJAK = df_MLE_p.initJAK[1]

            # Calculate VAF using function
            VAF_MLE, sol = model_calc_VAF(rho,df_p,effect,pDays,p,master_curve,initJAK)
            VAF_MLE_points, sol = model_calc_VAF(rho,df_p,effect,pDays2,p,master_curve,initJAK)

            # Calculate RMSE
            RMSE = rmsd(VAF_MLE_points,pVAF)

            # Plot results
            figVAF = plot(pDays[pDays.<=maxDays]/30.4, VAF_MLE[pDays.<=maxDays]*100, linewidth = 3, label = L"JAK2"*" VAF - MLE", colour= :green1)
            plot!(pDays[pDays.>maxDays]/30.4, VAF_MLE[pDays.>maxDays]*100, linewidth = 3, label = L"JAK2"*" VAF - MLE", colour= :green1, linestyle = :dash)
            # plot!(pDays/30.4, VAF_true*100, linewidth = 3, label = L"JAK2"*" VAF - True", colour = :darkgoldenrod2)
            scatter!(df_p.days./30.4, df_p.JAK*100, label=L"\textrm{Data}", markercolor=:red, markersize=4)
            if n_param == 4
                title!(L"JAK2"*" VAF for Patient $(pID), RMSE = $(round(RMSE; sigdigits=3)),\n"*L"\rho_{p_{y_0}}="
                       *"$(round(df_MLE_p.rho1[1]; sigdigits=3)), " * L"\rho_{p_{y_1}}="*"$(round(df_MLE_p.rho2[1]; sigdigits=3))")
            elseif n_param == 3
                title!(L"JAK2"*" VAF for Patient $(pID), RMSE = $(round(RMSE; sigdigits=3)),\n"*L"\rho_{p_{y_0}}="
                       *"$(round(df_MLE_p.rho1[1]; sigdigits=3))")
            end
            xlabel!(L"t"*"/months")
            ylabel!(L"JAK2"*" VAF/%")
            t_end = 125
            xlims!(0, t_end)
            ylims!(0,100)
            plot!(titlefont=20,xtickfontsize=15,ytickfontsize=15,xguidefontsize=15,yguidefontsize=15,legendfontsize=10, 
                legend = :none)
            figIFN = plot(df_p.days/30.4, df_p.IFN, line =:steppost, linewidth = 3, label = "", colour= :blue1)
            plot!([maxDays/30.4,120], [df_p.IFN[end],df_p.IFN[end]], line =:steppost, linewidth = 3, label = "", colour= :blue1,
                   linestyle = :dash)
            title!("IFN Dose for Patient $(pID)")
            xlabel!(L"t"*"/months")
            ylabel!("IFN/"*L"\mathrm{\frac{\mu g}{day}}")
            xlims!(0, t_end)
            ylims!(0, 20)
            plot!(titlefont=20,xtickfontsize=15,ytickfontsize=15,xguidefontsize=15,yguidefontsize=15,legendfontsize=10)
            figcombined = plot(figVAF,figIFN, layout=grid(2,1, heights=(7/10, 3/10)), link =:x, size = (700, 500), margin=5mm)

            # Save figure
            figpath = "C:/Users/boklund/Documents/Egne artikler m.m/IFN-artikel 2024/Billeder og video/DALIAH_5y Data - Patient Plots/Individual/MLE/$(effect)/"
            savefig(figcombined,figpath*"MLE_$(effect)_p$(pID).png")
            savefig(figcombined,figpath*"MLE_$(effect)_p$(pID).pdf")
            savefig(figcombined,figpath*"MLE_$(effect)_p$(pID).svg")
        end
    end
    
    # Write results to CSV file
    path = "C:/Users/boklund/Documents/Egne artikler m.m/IFN-artikel 2024/Excel/DALIAH_5y Data - MLE files/MLE_$(effect).csv"
    CSV.write(path, df_MLE)
    
    # Save sum of BICs in dataframe
    df_temp = DataFrame(model = effect, Total_AIC = sum(df_MLE.AIC), Total_BIC = sum(df_MLE.BIC))
    append!(df_AIC_BIC, df_temp)
end

# Save df_AIC_BIC
path = "C:/Users/boklund/Documents/Egne artikler m.m/IFN-artikel 2024/Excel/DALIAH_5y Data - MLE files/Total_AIC_BIC.csv"
CSV.write(path, df_AIC_BIC)

In [None]:
# Load df_AIC_BIC
df_AIC_BIC = DataFrame
path = "C:/Users/boklund/Documents/Egne artikler m.m/IFN-artikel 2024/Excel/DALIAH_5y Data - MLE files/Total_AIC_BIC.csv"
df_AIC_BIC = CSV.read(path, df_AIC_BIC)

In [None]:
# Calculate relative likelihood - does not make sense for the sum of likelihoods
df_AIC_BIC.relLik = exp.((minimum(df_AIC_BIC.Total_AIC).-df_AIC_BIC.Total_AIC)/2)
df_AIC_BIC

In [None]:
# Storage
df_AIC_all = DataFrame(patientID = unique(df_D.patientID))
df_BIC_all = DataFrame(patientID = unique(df_D.patientID))
    
# Extract AIC and BIC for a single patient
for k in 1:length(model_list)
     # Choose treatment effect
    effect = model_list[k]

    # Load results
    df_MLE = DataFrame
    path = "C:/Users/boklund/Documents/Egne artikler m.m/IFN-artikel 2024/Excel/DALIAH_5y Data - MLE files/MLE_$(effect).csv"
    df_MLE = CSV.read(path, df_MLE)
    
    temp_AIC = []
    temp_BIC = []

    for i in unique(df_D.patientID)
        # Set pID
        pID = i

        # Extract AIC and BIC
        df_MLE_temp = df_MLE[df_MLE.patientID .== pID,:]
        append!(temp_AIC,df_MLE_temp.AIC)
        append!(temp_BIC,df_MLE_temp.BIC)
    end
    
    # Put in dataframe
    df_AIC_all[!, "$(effect)"] = temp_AIC
    df_BIC_all[!, "$(effect)"] = temp_BIC
end
df_AIC_all

In [None]:
# Storage
df_relLik_all = copy(df_AIC_all)
df_relLik_BIC_all = copy(df_BIC_all)

for i=1:size(df_AIC_all)[1]
    for j=2:size(df_AIC_all)[2]
        # Calculate relative likelihood and save
        df_relLik_all[i,j] = exp((minimum(df_AIC_all[i,:])-df_AIC_all[i,j])/2)
        df_relLik_BIC_all[i,j] = exp((minimum(df_BIC_all[i,:])-df_BIC_all[i,j])/2)
    end
end
df_relLik_all

In [None]:
# Sum of relative likelihoods
sum_rel_lik = vec(sum(Matrix(df_relLik_all[:,2:end]),dims=1))
scatter(sum_rel_lik, markercolor=:red, markersize=4, xticks = (1:10, model_list), label = "")
title!("\nSum of relative likelihoods")
xlabel!("Model")
ylabel!("Relative likelihoods")
# xlims!(0.5, 10.5)
plot!(titlefont=20,xtickfontsize=15,ytickfontsize=15,xguidefontsize=15,yguidefontsize=15,legendfontsize=10,size = (1000, 700),
      margin=5mm)

In [None]:
# x ticks
model_list_plot = [L"p_{y_0}", L"p_{y_1}", L"p_{y_0}, "*L"p_{y_1}", L"d_{y_0}", L"d_{y_1}", L"d_{y_0}, "*L"d_{y_1}", 
                   L"p_{y_0}, "*L"d_{y_0}", L"p_{y_0}, "*L"d_{y_1}", L"d_{y_0}, "*L"p_{y_1}", L"p_{y_1}, "*L"d_{y_1}"]

# Number of "good models" (relative likelihood greater than a threshold)
threshold = 0.2
sum_good_model = vec(sum(Matrix(df_relLik_all[:,2:end]).>threshold,dims=1))
fig_relLik = scatter(sum_good_model, markercolor=:red, markersize=4, xticks = (1:10, model_list_plot), label = "")
title!("\nNumber of patients for which the model\n has a relative likelihood above 0.2")
xlabel!("Model")
ylabel!("Number of patients")
xlims!(0.5, 10.5)
ylims!(0, 64)
plot!(titlefont=20,xtickfontsize=15,ytickfontsize=15,xguidefontsize=15,yguidefontsize=15,legendfontsize=10,size = (1000, 700),
      margin=5mm)

# Save figure
figpath = "C:/Users/boklund/Documents/Egne artikler m.m/IFN-artikel 2024/Billeder og video/DALIAH_5y Data - Patient Plots/Individual/MLE/"
savefig(fig_relLik,figpath*"relLik_comparison.png")
savefig(fig_relLik,figpath*"relLik_comparison.pdf")
savefig(fig_relLik,figpath*"relLik_comparison.svg")

In [None]:
# Number of best models
threshold = 1.0
sum_best_model = vec(sum(Matrix(df_relLik_all[:,2:end]).>=threshold,dims=1))
scatter(sum_best_model, markercolor=:red, markersize=4, xticks = (1:10, model_list), label = "")
title!("\nNumber of patients for whom the model has the highest AIC")
xlabel!("Model")
ylabel!("Number of patients")
xlims!(0.5, 10.5)
ylims!(0, 17)
plot!(titlefont=20,xtickfontsize=15,ytickfontsize=15,xguidefontsize=15,yguidefontsize=15,legendfontsize=10,size = (1000, 700),
      margin=5mm)

In [None]:
# Number of best models - BIC
threshold = 1.0
sum_best_model = vec(sum(Matrix(df_relLik_BIC_all[:,2:end]).>=threshold,dims=1))
scatter(sum_best_model, markercolor=:red, markersize=4, xticks = (1:10, model_list), label = "")
title!("\nNumber of patients for which the model has the highest BIC")
xlabel!("Model")
ylabel!("Number of patients")
xlims!(0.5, 10.5)
ylims!(0, 17)
plot!(titlefont=20,xtickfontsize=15,ytickfontsize=15,xguidefontsize=15,yguidefontsize=15,legendfontsize=10,size = (1000, 700),
      margin=5mm)

In [None]:
# Setup list of models to compare
model_list_plot = [L"p_{y_0}", L"p_{y_1}", L"p_{y_0}, "*L"p_{y_1}", L"d_{y_0}", L"d_{y_1}", L"d_{y_0}, "*L"d_{y_1}", 
                   L"p_{y_0}, "*L"d_{y_0}", L"p_{y_0}, "*L"d_{y_1}", L"d_{y_0}, "*L"p_{y_1}", L"p_{y_1}, "*L"d_{y_1}"]

# Plot results
fig_AIC = scatter(df_AIC_BIC.Total_AIC,label="", markercolor=:red, markersize=4, xticks = (1:10, model_list_plot))
title!("Model AIC Comparison")
xlabel!("Model")
ylabel!("Sum of AIC")
xlims!(0.5, 10.5)
ylims!(-2000,0)
plot!(titlefont=20,xtickfontsize=15,ytickfontsize=15,xguidefontsize=15,yguidefontsize=15,legendfontsize=10,size = (1000, 700),
      margin=5mm)

# Save figure
figpath = "C:/Users/boklund/Documents/Egne artikler m.m/IFN-artikel 2024/Billeder og video/DALIAH_5y Data - Patient Plots/Individual/MLE/"
savefig(fig_AIC,figpath*"AIC_comparison.png")
savefig(fig_AIC,figpath*"AIC_comparison.pdf")
savefig(fig_AIC,figpath*"AIC_comparison.svg")

In [None]:
# Setup list of models to compare
model_list_plot = [L"p_{y_0}", L"p_{y_1}", L"p_{y_0}, "*L"p_{y_1}", L"d_{y_0}", L"d_{y_1}", L"d_{y_0}, "*L"d_{y_1}", 
                   L"p_{y_0}, "*L"d_{y_0}", L"p_{y_0}, "*L"d_{y_1}", L"d_{y_0}, "*L"p_{y_1}", L"p_{y_1}, "*L"d_{y_1}"]

# Plot results
fig_BIC = scatter(df_AIC_BIC.Total_BIC,label="", markercolor=:red, markersize=4, xticks = (1:10, model_list_plot))
title!("Model BIC Comparison")
xlabel!("Model")
ylabel!("Sum of BIC")
xlims!(0.5, 10.5)
ylims!(-2000,0)
plot!(titlefont=20,xtickfontsize=15,ytickfontsize=15,xguidefontsize=15,yguidefontsize=15,legendfontsize=10,size = (1000, 700),
      margin=5mm)

# Save figure
figpath = "C:/Users/boklund/Documents/Egne artikler m.m/IFN-artikel 2024/Billeder og video/DALIAH_5y Data - Patient Plots/Individual/MLE/"
savefig(fig_BIC,figpath*"BIC_comparison.png")
savefig(fig_BIC,figpath*"BIC_comparison.pdf")
savefig(fig_BIC,figpath*"BIC_comparison.svg")