In [1]:
using Unitful #https://painterqubits.github.io/Unitful.jl/stable/
#quantity * @u_str("unit abbreviation") 
using Symbolics #https://symbolics.juliasymbolics.org/dev/
#cite https://doi.org/10.48550/arXiv.2105.03949
using Latexify
using Test
#1 * @u_str("mA") is 1 milliamp
using CSV, DataFrames
#using Plots
using PlotlyJS
using Printf
using SymPy
using PDFIO
using Unzip
using Interpolations
using Plots
#plotlyjs()

In [None]:
function check_line(line, start)
    if length(line) > length(start) - 1
        return (line[1:length(start)] == start)
    end
    return false
end

function make_spacing_dict(line1, line2)
    spacing_ends = [collect(out)[1] for out in findall(">", line2)]
    spacing_starts = append!([1], [collect(out)[1] for out in findall("<", line2)])
    #Find where some of the spacings begin and end based on the arrows
    if length(spacing_ends) != length(spacing_starts)
        print("Error: improper formatting")
        return 
    end
    indices_with_gaps = [index for index in  1:length(spacing_ends)-1 if 
                    spacing_ends[index] != spacing_starts[index+1]-1]
    #Some of the spacings are instead denoted by the letter o instead of arrows
    missing_spacings = [spacing_ends[index]+1:spacing_starts[index+1]-1 for index in indices_with_gaps]
    spacings = append!(missing_spacings, 
    [spacing_starts[i]:spacing_ends[i] for i in 1:length(spacing_ends)])
    spacing_dict = Dict([])
    #Make a dictionary where the keys are name for each column and the values are the indices of the columns
    spacing_names = [spacing_dict[strip(line1[spacing], [' ', '#'])] = spacing for spacing in spacings]
    return spacing_dict
end

function read_datum(datum)
    #Reads a single datum from a line of data
    datum = strip(datum, [' '])
    out = tryparse(Float64, datum)
    if out == nothing
        return datum 
    end
    return out
end

function read_data(index0, file_as_array)
    spacing_dict = make_spacing_dict(file_as_array[index0], file_as_array[index0 + 1])
    lines_of_data = tryparse(Int64, split(file_as_array[index0 - 1], [' '])[end])
    data_dict = Dict([])
    out = [data_dict[key] =  [read_datum(line[spacing_dict[key]])
        for line in file_as_array[index0 + 2 : index0 + 1 + lines_of_data]]
            for key in keys(spacing_dict)]        
    return data_dict
end

In [87]:
data_dir = "C:\\Cross-Section-Data\\EXFOR\\"
file_as_array = split(open(f->read(f, String), path), "\n")
spacing_specifiers = [index for index in 1:length(file_as_array) 
                    if check_line(file_as_array[index], "# Prj")]

13-element Vector{Int64}:
  26
  67
 110
 206
 302
 340
 376
 411
 455
 509
 557
 601
 649

In [161]:
DataFrame(read_data(spacing_specifiers[1], file_as_array))

Dict{Any, Any} with 18 entries:
  "Targ"       => [1001.0, 1001.0, 1001.0, 1001.0, 1001.0, 1001.0, 1001.0, 1001…
  "Entry"      => SubString{String}["K2066", "K2066", "K2066", "K2066", "K2066"…
  "dCos/LO"    => SubString{String}["", "", "", "", "", "", "", ""]
  "I78"        => SubString{String}["", "", "", "", "", "", "", ""]
  "dEnergy"    => SubString{String}["4.0000+7", "4.0000+7", "4.0000+7", "4.0000…
  "dELV/HL"    => SubString{String}["", "", "", "", "", "", "", ""]
  "M"          => SubString{String}["", "", "", "", "", "", "", ""]
  "Refer (YY)" => SubString{String}["S.Homma,ET.AL. (74)", "S.Homma,ET.AL. (74)…
  "Prj"        => [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
  "Energy"     => SubString{String}["3.3000+8", "4.1000+8", "5.2000+8", "6.0000…
  "Cos/LO"     => SubString{String}["", "", "", "", "", "", "", ""]
  "ELV/HL"     => SubString{String}["", "", "", "", "", "", "", ""]
  "MF"         => [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0]
  "dData"      => SubString{String}["

In [84]:
line1 = "# Prj Targ M MF MT PXC  Energy  dEnergy  Data      dData   Cos/LO   dCos/LO   ELV/HL  dELV/HL I78 Refer (YY)              EntrySubP"
line2 = "#---><---->o<-><-->ooo<-------><-------><-------><-------><-------><-------><-------><-------><-><-----------------------><---><->o"
make_spacing_dict(line1, line2)

Dict{Any, Any} with 19 entries:
  "Targ"       => 6:11
  "Entry"      => 123:127
  "P"          => 131:131
  "dCos/LO"    => 68:76
  "I78"        => 95:97
  "dEnergy"    => 32:40
  "dELV/HL"    => 86:94
  "M"          => 12:12
  "Refer (YY)" => 98:122
  "Prj"        => 1:5
  "Energy"     => 23:31
  "Cos/LO"     => 59:67
  "ELV/HL"     => 77:85
  "MF"         => 13:15
  "dData"      => 50:58
  "Data"       => 41:49
  "Sub"        => 128:130
  "PXC"        => 20:22
  "MT"         => 16:19