In [5]:
using Pkg, BenchmarkTools, Random, Statistics
using XLSX, Dates, DataFrames, Statistics
using DifferentialEquations, Plots 

In [6]:
function ODE_sol(ODE_model, ODE_vars, x, params_fix, ICs_fix, param_opt_indices, param_fix_indices,
                 IC_opt_indices, IC_fix_indices, param_UBs, IC_UBs, f_calc_ICs, 
                 N0, t_pred, int_options)
               
   params_opt_scaled = x[1:length(param_opt_indices)] 
   IC_ratios_opt_scaled = x[length(param_opt_indices)+1:end]

   params_opt = params_opt_scaled .* param_UBs
   IC_ratios_opt = IC_ratios_opt_scaled .*IC_UBs

   num_params = length(param_opt_indices) + length(param_fix_indices)
   num_ICs_and_IC_ratios = length(IC_opt_indices) + length(IC_fix_indices)
    
   #Create vectors to store ODE_params and ICs/IC_ratios
   ODE_params = Array{Real}(undef,num_params)
   ICs_and_IC_ratios = Array{Real}(undef,num_ICs_and_IC_ratios)

   #Populate ODE_params
   ODE_params[param_opt_indices] = params_opt
   ODE_params[param_fix_indices] = params_fix

   #Populate ICs_and_IC_ratios
   ICs_and_IC_ratios[IC_opt_indices] = IC_ratios_opt
   ICs_and_IC_ratios[IC_fix_indices] = ICs_fix

   #Calculate the IC from the IC ratios 
   ODE_ICs = f_calc_ICs(ICs_and_IC_ratios) 

   #Scale the Initial Condition 
   ODE_ICs_scaled = ODE_ICs ./ N0

   #Now we solve the ODE system:
   t_span = 1.0 .* [t_pred[1],t_pred[end]] 

   #Get the integrator options 
   integrator = get(int_options, :integrator, Tsit5())
   rtol = get(int_options, :rtol, 1e-8)
   atol = get(int_options, :atol, 1e-10)
    
   ODE_prob = ODEProblem(ODE_model, ODE_ICs_scaled, t_span, ODE_params);
   sol_scaled = solve(ODE_prob, integrator, reltol = rtol, abstol = atol, saveat = t_pred, dt = 0.01);
  
   sol = N0.* DataFrame(sol_scaled', ODE_vars); 
    
   insertcols!(sol, 1, :t => t_pred)
   return sol
    
end

ODE_sol (generic function with 1 method)

In [7]:
function sort_by_var(soln_dfs)
   
    """
    soln_dfs:   A list of DataFrames, each containing the time series solution data 
                of an ODE--the solution to the *same* ODE solved at the same time values!
                The DataFrames should include a column of the time values, named :t. 
    """
    
    #Create a dictionary to store results
    var_dfs = Dict()
    
    #Get the names of the variables, but exclude 't' (we only want to sort the dependent variables)
    vars_list = filter(sym -> sym != :t,  Symbol.(names(soln_dfs[1]))) 
        
    #The number of ODE solutions we've got 
    num_solns = length(soln_dfs)   
        
    #Get the time values (should be the first column of each DataFrame)
    t_vals = soln_dfs[1].t   

    for var in vars_list
        
        #Create DataFrames and name the columns by variable & sample (e.g. "S1","S2",...,"S500")
        col_names = [Symbol(string(var) * string(i)) for i=1:num_solns]
        df = DataFrame(Array{Float64,2}(undef,length(t_vals),num_solns), col_names)
        
        for j = 1:num_solns
            #Go through each DataFrame in 'soln_dfs' and select the column for variable 'var'.
            #Put these columns into the DataFrame for 'var' that we just created.
            df[:,j] = (soln_dfs[j])[:,var]
        end
        
        #Add a column for the t values
        insertcols!(df, 1, :t => t_vals)
        
        #Add the DataFrame to the dictionary
        var_dfs[var] = df
    end
    
    #Return the dictionary of DataFrames that we created.
    return var_dfs
end

sort_by_var (generic function with 1 method)

In [8]:
function percentiles_by_row(df::AbstractDataFrame; low::Float64 = 0.25, med::Float64 = 0.50, high::Float64 = 0.75)
    
    """
    THE ARGUMENTS:
    df:            A DataFrame whose columns are each a time series values of a variable.
                   In the case of ParamEst, we'll pass this function a DataFrame whose columns are 
                   the time series values **of ONE (1) particular variable**, obtained from solving an ODE 
                   multiple times. So for example, the DataFrame 'df' might have columns named "S_1", "S_2", "S_3",...,"S_500", where each
                   column is a solution for S (susceptibles) from the same ODE, but with different parameters.
    
    low,med,high:  Floats satisfying 0.0 <= low < med < high <= 1.0. These are the percentiles that will be computed 
                   for *each row* of the DataFrame 'df'.
    
    NOTE: 
    It is assumed that df does not have a column for time values. If df does have a column for t values,
    remember to only select the dependent variable values by, for example, select(df, Not(:t)). 
    
    OUTPUT:
    An nrow(df) x 3 DataFrame whose columns names are the percentiles passed (converted to Strings). 
    So the ith row of the DataFrame that gets returned consists of the low, med, and high quantile values 
    of the ith row of the inputted DataFrame.
    """
    
    #Exception handling:
    if !(0.0 <= low < med < high <= 1.0)
        error("Error! 0.0 <= low < med < high <= 1.0 not satisfied.")
    end
    
    #Create column names for DataFrame to be returned
    col_names = [string(low), string(med), string(high) ]

    #Create an empty data frame with the column names by quantile 
    pctiles_df = DataFrame(Array{Float64}(undef, nrow(df), 3), col_names)

    #Calculate the quantiles for each row of df
    #Store each set of quantiles as a row in qtiles_df
    for i=1:nrow(df)
        pctiles_df[i,:] = quantile(df[i,:] , [low, med, high])
    end
    
    return pctiles_df
    
end

percentiles_by_row (generic function with 1 method)

In [9]:
function df_from_rows(rows::Array, col_names::Array)

    col_types = typeof.(rows[1]) #Note: we're assuming that all values within a column are of the same type
    df = DataFrame(col_types, col_names)
    
    for row in rows
        push!(df, row)
    end
    return df
end

df_from_rows (generic function with 1 method)

In [10]:
#NOTE: The first column of the inputted data is expected to be Dates
function data_to_df(;file_path, sheet_name, date_format, date_range, usecols, colnames)
    
    xf = XLSX.readxlsx(file_path)
    data_sheet = xf[sheet_name]
      
    #Put the data in a DataFrame
    data_all_df = DataFrame(XLSX.gettable(data_sheet, header = true)...)
      
    #Select only the columns we want 
    data_df = data_all_df[:,usecols]
    rename!(data_df, colnames)
    
    #Convert the first column to the "Date" type 
    data_df[!,1] = convert.(Date, data_df.Date)
    
    #The first and last dates to select
    date_first = Date(date_range[1], date_format)
    date_last = Date(date_range[2], date_format)
     
    local row_first::Any
    local row_last::Any
    
    try
        row_first = findall(data_df.Date .== date_first)[1]
        row_last = findall(data_df.Date .== date_last)[1]
    catch
        error("Error: One or more of the entered dates was not contained in the spreadsheet.")
    end
    
    #Select the data in the date range
    data_df_new = data_df[row_first:row_last,:]   
    
    #The date range of interest, as an array of Dates
    dates_obs = data_df_new.Date
    
    #Calculate the Rata Die value of date_first
    t0_rata = Dates.datetime2rata(date_first)
    
    #Convert the Dates to integers (days since date_first)
    t_obs = Array{Any}(Dates.datetime2rata.(dates_obs) .- t0_rata)
    
    #Insert t_obs into the DataFrame
    insertcols!(data_df_new, 2, "t" => t_obs)
    return data_df_new
end

data_to_df (generic function with 1 method)