#### Aggregating csv files into one file

aggregate_csv_files reads all the files from the input_dir and rights the results to an output_file

includes cells to separate steady-state and non steady-state data and find parameters to re-run

Last updated March 13, 2024 by Madeline Murphy

In [10]:
import Pkg

Pkg.add("CSV")
Pkg.add("DataFrames")
Pkg.add("Printf")
Pkg.add("FileIO")
Pkg.add("NBInclude")

[32m[1m    Updating[22m[39m registry at `~/.julia/registries/General.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `/panfs/jay/groups/16/dauenha0/murp1677/.julia/environments/v1.9/Project.toml`
[32m[1m  No Changes[22m[39m to `/panfs/jay/groups/16/dauenha0/murp1677/.julia/environments/v1.9/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `/panfs/jay/groups/16/dauenha0/murp1677/.julia/environments/v1.9/Project.toml`
[32m[1m  No Changes[22m[39m to `/panfs/jay/groups/16/dauenha0/murp1677/.julia/environments/v1.9/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `/panfs/jay/groups/16/dauenha0/murp1677/.julia/environments/v1.9/Project.toml`
[32m[1m  No Changes[22m[39m to `/panfs/jay/groups/16/dauenha0/murp1677/.julia/environments/v1.9/Manifest.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m

In [1]:
using CSV, DataFrames, Printf, FileIO, NBInclude

In [2]:
@nbinclude("/home/dauenha0/murp1677/Cyclic_Dynamics/Code/Non-Git/RxnParameters.ipynb");

In [3]:
function aggregate_csv_files(input_dir, output_file)
    # Get a list of CSV files in the directory
    csv_files = filter(x -> occursin(r"\.csv$", x), readdir(input_dir))

    # Iterate over each CSV file
    for file in csv_files
        # read date from the csv file
        data = CSV.read(joinpath(input_dir, file), DataFrame)

        # Write the contents to the output CSV file
        if isfile(output_file) == false
            # If the file does not exst, write the file including column names
            col_names = ["Batch ID","Simulation ID", "alpha a", "alpha b", "alpha c", "beta a", "beta b", 
                "beta c", "gamma B-A", "gamma C-A", "delta B-A", "delta C-A", "BEa", "frequency [1/s]", 
                "ΔBEa [eV]", "Loop TOF [1/s]","Steady State Conditon"]; # 17 columns
            CSV.write(output_file, data, header=col_names)
        else    
            CSV.write(output_file, data, append=true)
        end
        
        # Delete the CSV file
        # rm(joinpath(input_dir, file))
    end
end

aggregate_csv_files (generic function with 1 method)

In [6]:
input_dir = "/home/dauenha0/murp1677/Cyclic_Dynamics/Results/SimulationOutputSet1"
output_file = "/home/dauenha0/murp1677/Cyclic_Dynamics/Results/Set1_Simulation_outputs.csv"

aggregate_csv_files(input_dir, output_file)
println("set 1 done.")

input_dir = "/home/dauenha0/murp1677/Cyclic_Dynamics/Results/SimulationOutputSet2"
output_file = "/home/dauenha0/murp1677/Cyclic_Dynamics/Results/Set2_Simulation_outputs.csv"

aggregate_csv_files(input_dir, output_file)
println("set 2 done.")

set 1 done.
set 2 done.


In [7]:
# Load Set 1
fileName = "/home/dauenha0/murp1677/Cyclic_Dynamics/Results/Set1_Simulation_outputs.csv"
df_set1 = CSV.read(fileName, DataFrame)
df_array_set1 = Matrix(df_set1)

# Load Set 2
fileName = "/home/dauenha0/murp1677/Cyclic_Dynamics/Results/Set2_Simulation_outputs.csv"
df_set2 = CSV.read(fileName, DataFrame)
df_array_set2 = Matrix(df_set2)

# Preallocate Array
len = size(df_array_set1, 1) + size(df_array_set2, 1)
combined_array = Matrix{Float64}(undef, len, 16)


# Loop through Set 1
for i in 1:size(df_array_set1, 1)
    SimID = (df_array_set1[i, 1]-1) *60 + df_array_set1[i, 2]  # Convert to Int64
    
    if df_array_set1[i, 16] == "Not-defined"
        SS_indicator = 0.0
        loopTOF = 0.0
        else
        SS_indicator = 1.0
        loopTOF = parse(Float64, df_array_set1[i, 16])
        
    end
    combined_array[i, :] = vcat(SimID, df_array_set1[i, 3:15], loopTOF, SS_indicator)
end

# Loop through Set 2
for i in 1:size(df_array_set2, 1)
    SimID = (df_array_set2[i, 1]-1) *60 + df_array_set2[i, 2] + 90000  # Convert to Int64
    if df_array_set2[i, 16] == "Not-defined"
        SS_indicator = 0.0
        loopTOF = 0.0
        else
        SS_indicator = 1.0
        loopTOF = parse(Float64, df_array_set2[i, 16])
    end
    combined_array[i + size(df_array_set1, 1), :] = vcat(SimID, df_array_set2[i, 3:15], loopTOF, SS_indicator)
end

# Get permutation indices based on the first column
perm_indices = sortperm(combined_array[:, 1])

# Sort the matrix using the permutation indices
sorted_array = combined_array[perm_indices, :]

# Convert sim ID to a integer
convert.(Int64, sorted_array[:,1])

println(sorted_array[1,:])

[1.0, 0.2, 0.2, 0.2, 0.6, 0.6, 0.6, 0.6, 0.6, 0.5, 0.5, 0.8, 50.0, 0.3, 0.05797767833709657, 1.0]


In [10]:
function delete_duplicates(matrix)
    Sim_ID = matrix[:, 1]
    n = length(Sim_ID)
    
    Sim_ID_unique = Set{eltype(Sim_ID)}()  # Preallocate Set
    unique_matrix = Matrix{eltype(matrix)}(undef, 0, size(matrix, 2))  # Preallocate matrix

    for i in 1:n
        if !(Sim_ID[i] in Sim_ID_unique)
            push!(Sim_ID_unique, Sim_ID[i])
            unique_matrix = vcat(unique_matrix, matrix[i, :]')
        end
    end
    
    return unique_matrix
end


delete_duplicates_new (generic function with 1 method)

In [11]:
unique_matrix_refined = delete_duplicates(unique_matrix)
println(size(unique_matrix_refined))

println(unique_matrix_refined[1:10,1])

(174312, 16)
[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]


In [12]:
unique_df = DataFrame(unique_matrix_refined, :auto) 

fpathSS = "/home/dauenha0/murp1677/Cyclic_Dynamics/Results/2024-03-13_ALL_Simulation_outputs.csv"
if isfile(fpathSS) == false # checks if file exists
    # if file does NOT exist, write file and include column names
    col_names = ["Simulation ID", "alpha a", "alpha b", "alpha c", "beta a", "beta b", "beta c", "gamma B-A", "gamma C-A", "delta B-A", "delta C-A", "BEa", "frequency [1/s]", "ΔBEa [eV]", "Loop TOF [1/s]","Steady State Conditon"];
    CSV.write(fpathSS, unique_df, header=col_names)
else 
    CSV.write(fpathSS, unique_df, append=true)
end

"/home/dauenha0/murp1677/Cyclic_Dynamics/Results/2024-03-13_ALL_Simulation_outputs.csv"

### Export results with rate constants as parameters

In [13]:
# Parse Data
loopTOF = unique_df[:,15];
len = length(loopTOF)

# Preallocate Arrays 
alpha = Matrix{Float64}(undef,len,3)
beta = Matrix{Float64}(undef,len,3)
gamma = Matrix{Float64}(undef,len,2)
delta = Matrix{Float64}(undef,len,2)

alpha[:,1] = unique_df[:,2];
alpha[:,2] = unique_df[:,3];
alpha[:,3] = unique_df[:,4];

beta[:,1] = unique_df[:,5];
beta[:,2] = unique_df[:,6];
beta[:,3] = unique_df[:,7];

gamma[:,1] = unique_df[:,8];
gamma[:,2] = unique_df[:,9];

delta[:,1] = unique_df[:,10];
delta[:,2] = unique_df[:,11];

BEa = unique_df[:,12];
delBEa = unique_df[:,14];
freq = unique_df[:,13];
SimID = unique_df[:,1];
SS_cond = unique_df[:,16];

# define each state
k1 = Matrix{Float64}(undef,len,6)
k2 = Matrix{Float64}(undef,len,6)

for i in range(1,len)
    # State 1
    kf1, kr1, BE1 = RxnParametersArray(BEa[i], gamma[i,:], delta[i,:], alpha[i,:], beta[i,:]);
    
    # State 2
    kf2, kr2, BE2 = RxnParametersArray((BEa[i]+delBEa[i]), gamma[i,:], delta[i,:], alpha[i,:], beta[i,:]);
    
    k1[i,:] = vcat(kf1,kr1)
    k2[i,:] = vcat(kf2,kr2)
end

rate_df = DataFrame(SimID = SimID, kf1_1 = k1[:,1], kf2_1 = k1[:,2], kf3_1 = k1[:,3], 
    kr1_1 = k1[:,4], kr2_1 = k1[:,5], kr3_1 = k1[:,6], kf1_2 = k2[:,1], kf2_2 = k2[:,2],
    kf3_2 = k2[:,3], kr1_2 = k2[:,4], kr2_2 = k2[:,5], kr3_2 = k2[:,6], frequency = freq, 
    ΔBEa = delBEa, Loop_TOF = loopTOF, Steady_State = SS_cond)

fpath = "/home/dauenha0/murp1677/Cyclic_Dynamics/Results/2024-03-13_ALL_Simulation_outputs_RATECONSTANTS.csv"
if isfile(fpath) == false # checks if file exists
    # if file does NOT exist, write file and include column names
    col_names = ["Simulation ID", "k1 state 1", "k2 state 1", "k3 state 1", "k-1 state 1", "k-2 state 1", 
        "k-3 state 1", "k1 state 2", "k2 state 2", "k3 state 2", "k-1 state 2", "k-2 state 2", 
        "k-3 state 2", "frequency [1/s]", "ΔBEa [eV]", "Loop TOF [1/s]","Steady State Conditon"];
    CSV.write(fpath, rate_df, header=col_names)
else 
    CSV.write(fpath, rate_df, append=true)
end

"/home/dauenha0/murp1677/Cyclic_Dynamics/Results/2024-03-13_ALL_Simulation_outputs_RATECONSTANTS.csv"