In [1]:
using Unitful #https://painterqubits.github.io/Unitful.jl/stable/
#quantity * @u_str("unit abbreviation") 
using Symbolics #https://symbolics.juliasymbolics.org/dev/
#cite https://doi.org/10.48550/arXiv.2105.03949
using Latexify
using Test
#1 * @u_str("mA") is 1 milliamp
using CSV, DataFrames
#using Plots
using PlotlyJS
using Printf
using SymPy
using PDFIO
using Unzip
using Interpolations
using Plots
#plotlyjs()

In [2]:
function check_line(line, start)
    if length(line) > length(start) - 1
        return (line[1:length(start)] == start)
    end
    return false
end

function make_spacing_dict(line1, line2)
    spacing_ends = [collect(out)[1] for out in findall(">", line2)]
    spacing_starts = append!([1], [collect(out)[1] for out in findall("<", line2)])
    #Find where some of the spacings begin and end based on the arrows
    if length(spacing_ends) != length(spacing_starts)
        print("Error: improper formatting")
        return 
    end
    indices_with_gaps = [index for index in  1:length(spacing_ends)-1 if 
                    spacing_ends[index] != spacing_starts[index+1]-1]
    #Some of the spacings are instead denoted by the letter o instead of arrows
    missing_spacings = [spacing_ends[index]+1:spacing_starts[index+1]-1 for index in indices_with_gaps]
    spacings = append!(missing_spacings, 
    [spacing_starts[i]:spacing_ends[i] for i in 1:length(spacing_ends)])
    spacing_dict = Dict([])
    #Make a dictionary where the keys are name for each column and the values are the indices of the columns
    spacing_names = [spacing_dict[strip(line1[spacing], [' ', '#'])] = spacing for spacing in spacings]
    return spacing_dict
end

function read_datum(datum)
    #Reads a single datum from a line of data
    datum = strip(datum, [' '])
    out = tryparse(Float64, datum)
    if out == nothing
        return datum 
    end
    return out
end

function read_data(index0, file_as_array)
    spacing_dict = make_spacing_dict(file_as_array[index0], file_as_array[index0 + 1])
    lines_of_data = tryparse(Int64, split(file_as_array[index0 - 1], [' '])[end])
    data_dict = Dict([])
    out = [data_dict[key] =  [read_datum(line[spacing_dict[key]])
        for line in file_as_array[index0 + 2 : index0 + 1 + lines_of_data]]
            for key in keys(spacing_dict)]        
    return data_dict
end

read_data (generic function with 1 method)

In [8]:
data_dir = "C:\\Cross-Section-Data\\EXFOR\\"
file_path = data_dir * "gammas\\001_H_001.c4"
file_as_array = split(open(f->read(f, String), file_path), "\n")
spacing_specifiers = [index for index in 1:length(file_as_array) 
                    if check_line(file_as_array[index], "# Prj")]

13-element Vector{Int64}:
  26
  67
 110
 206
 302
 340
 376
 411
 455
 509
 557
 601
 649

In [28]:
df_list = [DataFrame(read_data(spacing_specifier, file_as_array))
            for spacing_specifier in spacing_specifiers]

13-element Vector{DataFrame}:
 [1m8×18 DataFrame[0m
[1m Row [0m│[1m Cos/LO    [0m[1m Data    [0m[1m ELV/HL    [0m[1m Energy    [0m[1m Entry     [0m[1m I78       [0m[1m M      [0m ⋯
[1m     [0m│[90m SubStrin… [0m[90m Float64 [0m[90m SubStrin… [0m[90m SubStrin… [0m[90m SubStrin… [0m[90m SubStrin… [0m[90m SubStri[0m ⋯
─────┼──────────────────────────────────────────────────────────────────────────
   1 │            0.01876             3.3000+8   K2066                         ⋯
   2 │            0.01906             4.1000+8   K2066
   3 │            0.01925             5.2000+8   K2066
   4 │            0.01932             6.0000+8   K2066
   5 │            0.01932             6.8000+8   K2066                         ⋯
   6 │            0.01952             7.5000+8   K2066
   7 │            0.01961             8.3000+8   K2066
   8 │            0.01972             9.1000+8   K2066
[36m                                                              12 column

In [61]:
list_of_df_rows = append!([0],[[s for s in size(df)][1] for df in df_list])
n_rows(df_list) = sum(list_of_df_rows(df_list))
n_cols = size(df_list[1])[2]
n_rows(df_list), n_cols
combined_df = DataFrame([Vector{Union{Missing, Float64}}(missing, n_rows(df_list)) 
                for _ in 1:n_cols], names(df_list[1]))

ErrorException: invalid redefinition of constant list_of_df_rows

In [26]:
list_of_df_rows = append([0], list_of_df_rows(df_list))

Unnamed: 0_level_0,MT,PXC,Prj,Refer (YY),Sub,Targ,dCos/LO
Unnamed: 0_level_1,Float64,SubStrin…,Float64,SubStrin…,Float64,Float64,SubStrin…
1,1.0,,0.0,"S.Homma,ET.AL. (74)",7.0,1001.0,
2,1.0,,0.0,"S.Homma,ET.AL. (74)",7.0,1001.0,
3,1.0,,0.0,"S.Homma,ET.AL. (74)",7.0,1001.0,
4,1.0,,0.0,"S.Homma,ET.AL. (74)",7.0,1001.0,
5,1.0,,0.0,"S.Homma,ET.AL. (74)",7.0,1001.0,
6,1.0,,0.0,"S.Homma,ET.AL. (74)",7.0,1001.0,
7,1.0,,0.0,"S.Homma,ET.AL. (74)",7.0,1001.0,
8,1.0,,0.0,"S.Homma,ET.AL. (74)",7.0,1001.0,
9,2.0,C,0.0,"A.Hunger,ET.AL. (97)",3.0,1001.0,
10,2.0,C,0.0,"A.Hunger,ET.AL. (97)",3.0,1001.0,


In [56]:
n_rows

n_rows (generic function with 1 method)

In [57]:
DataFrame([Vector{Union{Missing, Float64}}(missing, n_rows(df_list)) for _ in 1:n_cols], names(df_list[1]))

Unnamed: 0_level_0,Cos/LO,Data,ELV/HL,Energy,Entry,I78,M,MF
Unnamed: 0_level_1,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,missing,missing,missing,missing,missing,missing,missing,missing
2,missing,missing,missing,missing,missing,missing,missing,missing
3,missing,missing,missing,missing,missing,missing,missing,missing
4,missing,missing,missing,missing,missing,missing,missing,missing
5,missing,missing,missing,missing,missing,missing,missing,missing
6,missing,missing,missing,missing,missing,missing,missing,missing
7,missing,missing,missing,missing,missing,missing,missing,missing
8,missing,missing,missing,missing,missing,missing,missing,missing
9,missing,missing,missing,missing,missing,missing,missing,missing
10,missing,missing,missing,missing,missing,missing,missing,missing
