## Code to generate an array of parameter inputs

For the identified high, medium, and low values of the input parameters, this notebook generates the complete set of combinations and batches them. The sets and batches are then written in that organized order to csv files. These files serve as inputs to the loop simulation, allowing the simualtions to be run in parallel.

This notebook also serves to identify simulations that were not run and then regroups the simulations to isolate those that haven't been run yet. This allowed all simulations to be ran.

Last updated May 8, 2024 by Madeline Murphy

In [None]:
# import necessary packages
import Pkg
Pkg.add("CSV")
Pkg.add("DataFrames")
Pkg.add("PyPlot")
Pkg.add("NBInclude")
Pkg.add("Trapz")
Pkg.add("Printf")
Pkg.add("Statistics")
Pkg.add("NBInclude")
Pkg.add("DifferentialEquations")
Pkg.add("Random")

In [None]:
# Load packages
using CSV, DataFrames, Trapz, Printf, NBInclude, DifferentialEquations, Statistics, Random

In [None]:
function AllCombinations(v...)
    """
    inputs: 
        v... [multiple vectors of floats] possible input values for each parameter
    
    outputs: 
        allcombo: [vector of vectors of floats] all the combinations of input parameters 
    """
    
    allcombo = vec(collect(Iterators.product(v...)))
    return allcombo
end

In [None]:
function RxnParameters(parameters, BEa)
    """
    inputs: 
        parameters: [vector floats] input values for each parameter
        BEa: [float] binding energy of A* a given catalyst state
    
    outputs: 
        Ea : [vector floats] The activation energy of each surface reaction
    """
 # Constants
        T = 298.15             # temperature [K]
        kB = 8.61733034e-5        # Boltzmann constant [eV/K]
        h = 4.1357e-15            # Planck constant [eV-s]
        ev = 1.602e-19            # Converts Joules to Ev
         
        # Unpacking Parameters
        gamma_ab = parameters[7]
        gamma_ac = parameters[8]
        
        delta_ab = parameters[9]
        delta_ac = parameters[10]
        
        alpha_a = parameters[1]
        alpha_b = parameters[2]
        alpha_c = parameters[3]
        
        beta_a = parameters[4]
        beta_b = parameters[5]
        beta_c = parameters[6]
    
        delBEa = parameters[11]
        
    
        ## Binding Energies from linear scaling relationships
        BEb = gamma_ab*BEa + (1-gamma_ab)*delta_ab
        BEc = gamma_ac*BEa + (1-gamma_ac)*delta_ac
    
        ## Surface Reactions 
        deltaG1 = Vector{Float64}(undef,3)
        deltaG1[1] = - BEb - (-BEa)
        deltaG1[2] = - BEc - (-BEb)
        deltaG1[3] = - BEa - (-BEc)
        
        ## Activation Energies from BEP relationships
        Ea1 = Vector{Float64}(undef,3)
        Ea1[1] = alpha_a*deltaG1[1] + beta_a
        Ea1[2] = alpha_b*deltaG1[2] + beta_b
        Ea1[3] = alpha_c*deltaG1[3] + beta_c 
  
        # Forward rate constant via transition state theory (TST)
        kf1 = (kB*T/h)*exp.(-Ea1/(kB*T))
        
        # Equilibrium constant
        K1 = exp.(-deltaG1/(kB*T))
        
        # Reverse rate constant
        kr1 = kf1./K1
    
     ## STATE 2
    BEa = BEa+delBEa
    
        ## Binding Energies from linear scaling relationships
        BEb = gamma_ab*BEa + (1-gamma_ab)*delta_ab
        BEc = gamma_ac*BEa + (1-gamma_ac)*delta_ac
    
        ## Surface Reactions 
        deltaG2 = Vector{Float64}(undef,3)
        deltaG2[1] = - BEb - (-BEa)
        deltaG2[2] = - BEc - (-BEb)
        deltaG2[3] = - BEa - (-BEc)
        
        ## Activation Energies from BEP relationships
        Ea2 = Vector{Float64}(undef,3)
        Ea2[1] = alpha_a*deltaG2[1] + beta_a
        Ea2[2] = alpha_b*deltaG2[2] + beta_b
        Ea2[3] = alpha_c*deltaG2[3] + beta_c 
  
        # Forward rate constant via transition state theory (TST)
        kf2 = (kB*T/h)*exp.(-Ea2/(kB*T))
        
        # Equilibrium constant
        K2 = exp.(-deltaG2/(kB*T))
        
        # Reverse rate constant
        kr2 = kf2./K2
    
        Ea = vcat(Ea1,Ea2)
    return Ea
end 

In [None]:
# define possible input parameters
alpha = [0.2,0.6,0.9]
beta = [0.6,0.9,1.2]
gamma = [0.6,1.4,1.8]
delta = [0.5,1.0,1.5]
delBEa = [0.3,0.5,0.8]
BEa = 0.8

# generate all possible combinations
allcombo = AllCombinations(alpha,alpha,alpha,beta,beta,beta,gamma,gamma,delta,delta,delBEa)

In [None]:
# check the combinations for any that have an activation energy of less than 0.0
count = 0
delete_idx = Vector{Int}(undef,177147)
for i in range(1,length(allcombo))
    Ea = RxnParameters(allcombo[i],BEa);
    if any(Ea.<0.0)
        delete_idx[count+1] = i
        count = count +1
    end
end
delete_idx = delete_idx[1:count]
deleteat!(allcombo, delete_idx)
println(count)
println(length(allcombo))

In [None]:
# define sets
set1 = allcombo[1:90000]

set2 = allcombo[90001:end]

In [None]:
function identify_reruns_SimID(allcombo, csv_path)
    """
    Identify the necessary reruns in Set 1
    
    inputs: 
    allcombo [vector of vectors of floats] all the combinations of input parameters 
    csv_path [string]: path to the csv containing the current list of simulations
    
    outputs: 
    rerun_params: [vector of vectors of floats] the combinations of input parameters that need to be resubmitted to the 
        simulation and ran again. Parameter sets are resubmitted if they haven't been simulated yet.
    """
    # Read the CSV file
    len = length(allcombo)
    data = CSV.File(csv_path) |> DataFrame

    # Get unique Simulation IDs
    sim_ids = data."Simulation ID"
    println(length(sim_ids))

    Rerun_ID = Int[]
    rerun_params = []

    for j in range(1,1500) # batch number
        for k in range(1,60) # job ID 
            
            ID = (j-1)*60 + k
            
            if ID > len
                break
            end
            
            combo = allcombo[ID]

            if !(ID in sim_ids)
                push!(Rerun_ID, ID)
                push!(rerun_params, (j, k, combo...))
            end
        end
    end

    return rerun_params
end


In [None]:
function identify_reruns_SimID_2(allcombo, csv_path)
    """
    Identify the necessary reruns in Set 2
    
    inputs: 
    allcombo [vector of vectors of floats] all the combinations of input parameters 
    csv_path [string]: path to the csv containing the current list of simulations
    
    outputs: 
    rerun_params: [vector of vectors of floats] the combinations of input parameters that need to be resubmitted to the 
        simulation and ran again. Parameter sets are resubmitted if they haven't been simulated yet.
    """

    # Read the CSV file
    len = length(allcombo)
    data = CSV.File(csv_path) |> DataFrame

    # Get unique Simulation IDs
    sim_ids = data."Simulation ID"
    println(length(sim_ids))

    Rerun_ID = Int[]
    rerun_params = []

    for j in range(1,1406) # batch number
        for k in range(1,60) # job ID 
            
            index = (j-1)*60 + k 
            ID = (j-1)*60 + k + 90000
            
            if ID > len + 90000
                break
            end
            
            combo = allcombo[index]

            if !(ID in sim_ids)
                push!(Rerun_ID, ID)
                push!(rerun_params, (j, k, combo...))
            end
        end
    end

    return rerun_params
end


### Identify reruns and export the Parameters

In [None]:
# Identify Reruns 
csv_path = "/home/dauenha0/murp1677/Cyclic_Dynamics/Results/2024-03-13_ALL_Simulation_outputs.csv"

Rerun_params_set1 = identify_reruns_SimID(set1, csv_path)
Rerun_params_set1 = shuffle(Rerun_params_set1)
println(length(Rerun_params_set1))
println((Rerun_params_set1[1:10,:]))

In [None]:
## SET 1
#initialize count
count = 1

    for j in range(1,18) # batch number
        for k in range(1,111) # job ID 
            combo = Rerun_params_set1[count]

            # Create DataFrame to export parameters to CSV
            fpath = "/home/dauenha0/murp1677/Cyclic_Dynamics/Batch_Scripts/Parameters/ReRuns_Round19_Set1/ReRun_Batch$(j).csv"
            df = DataFrame(batch_number=combo[1], jobID=combo[2], alpha_a = combo[3], alpha_b = combo[4], 
            alpha_c = combo[5], beta_a = combo[6], beta_b = combo[7], beta_c = combo[8], gamma_BA = combo[9], 
            gamma_CA = combo[10], delta_BA = combo[11], delta_CA = combo[12], delBEa = combo[13])

            if isfile(fpath) == false # checks if file exists
            # if file does NOT exist, write file and include column names
                col_names = ["BatchID", "JobID", "alpha_a", "alpha_b", "alpha_c", "beta_a", "beta_b", "beta_c", "gamma_BA", "gamma_CA", "delta_BA", "delta_CA", "delBEa"]; # 13 columns
                CSV.write(fpath,df, header=col_names)
            else 
                CSV.write(fpath,df, append=true)
            end
            count = count + 1
            if count > length(Rerun_params_set1)
                println("Set 1 Exported")
                break
            end
        end
    end
println(count-1)


In [None]:
# Identify Reruns 
csv_path = "/home/dauenha0/murp1677/Cyclic_Dynamics/Results/2024-03-12_ALL_Simulation_outputs.csv"

Rerun_params_set2 = identify_reruns_SimID_2(set2, csv_path)
Rerun_params_set2 = shuffle(Rerun_params_set2)
println(length(Rerun_params_set2))
println((Rerun_params_set2[1:10,:]))

In [None]:
## SET 2
#initialize count
count = 1

    for j in range(1,18) # batch number
        for k in range(1,111) # job ID 
            combo = Rerun_params_set2[count]

            # Create DataFrame to export parameters to CSV
            fpath = "/home/dauenha0/murp1677/Cyclic_Dynamics/Batch_Scripts/Parameters/ReRuns_Round15_Set2/ReRun_Batch$(j).csv"
            df = DataFrame(batch_number=combo[1], jobID=combo[2], alpha_a = combo[3], alpha_b = combo[4], 
            alpha_c = combo[5], beta_a = combo[6], beta_b = combo[7], beta_c = combo[8], gamma_BA = combo[9], 
            gamma_CA = combo[10], delta_BA = combo[11], delta_CA = combo[12], delBEa = combo[13])

            if isfile(fpath) == false # checks if file exists
            # if file does NOT exist, write file and include column names
                col_names = ["BatchID", "JobID", "alpha_a", "alpha_b", "alpha_c", "beta_a", "beta_b", "beta_c", "gamma_BA", "gamma_CA", "delta_BA", "delta_CA", "delBEa"]; # 13 columns
                CSV.write(fpath,df, header=col_names)
            else 
                CSV.write(fpath,df, append=true)
            end
            count = count + 1
            if count >= length(Rerun_params_set2)
                println("Set 2 Exported")
                break
            end
        end
    end
print(count)

In [None]:
# Function to create a folder in Jupyter Notebooks using Julia
function create_folder(folder_name::AbstractString, path::AbstractString="")
    full_path = joinpath(path, folder_name)
    try
        mkdir(full_path)
        println("Folder '$folder_name' created at: $full_path")
    catch
        println("Error: Failed to create folder '$folder_name'")
    end
end
