In [1]:
import re
import pathlib
import io

import numpy as np
import pandas as pd

In [2]:
interaction_map = {
    'compton': 'compton-scattering',
    'pair': 'pair-production',
    'photo': 'photoelectric',
    'rayleigh': 'rayleigh-scattering',
    'triplet': 'triplet-production'
}

In [3]:
new_data_directory = pathlib.Path('../pyegsnrc/data/xcom')
new_data_directory.mkdir(exist_ok=True, parents=True)

In [4]:
file_options = list(pathlib.Path('xcom').glob('*.data'))
xcom_filepath = file_options[1]
xcom_filepath

PosixPath('xcom/xcom_compton.data')

In [5]:
interaction_name = interaction_map[str(xcom_filepath).split('_')[-1].split('.')[0]]
csv_filepath = new_data_directory.joinpath(f"{interaction_name}.csv")

with open(xcom_filepath) as f:
    raw_data = f.readlines()

In [6]:
single_integer_pattern = re.compile(r'^\s*\d+$')

row_with_single_integer = [
    bool(single_integer_pattern.match(item))
    for item in raw_data
]
single_integer_indices = np.where(row_with_single_integer)[0]

single_integer_indices

array([   0,   39,   78,  117,  156,  195,  234,  273,  312,  351,  390,
        429,  468,  507,  546,  585,  624,  663,  702,  741,  780,  819,
        858,  897,  936,  975, 1014, 1053, 1092, 1131, 1170, 1209, 1248,
       1287, 1326, 1365, 1404, 1443, 1482, 1521, 1560, 1599, 1638, 1677,
       1716, 1755, 1794, 1833, 1872, 1911, 1950, 1989, 2028, 2067, 2106,
       2145, 2184, 2223, 2262, 2301, 2340, 2379, 2418, 2457, 2496, 2535,
       2574, 2613, 2652, 2691, 2730, 2769, 2808, 2847, 2886, 2925, 2964,
       3003, 3042, 3081, 3120, 3159, 3198, 3237, 3276, 3315, 3354, 3393,
       3432, 3471, 3510, 3549, 3588, 3627, 3666, 3705, 3744, 3783, 3822,
       3861])

In [7]:
lower_data_bounds_inclusive = single_integer_indices + 1
upper_data_bounds_non_inclusive = list(lower_data_bounds_inclusive[1::]-1) + [len(raw_data)]

raw_data_by_z = [
    raw_data[lower:upper]
    for lower, upper 
    in zip(lower_data_bounds_inclusive, upper_data_bounds_non_inclusive)
]

# raw_data_by_z

In [8]:
# raw_data

In [9]:
def get_mev_and_barn_from_raw_z_data(raw_z_data):
    data_as_string = ' '.join(raw_z_data).replace('\n', '').strip()
    data_as_list = re.split(r'\s+', data_as_string)
    data_as_array = np.array(data_as_list).astype(float)
    
    mev = np.exp(data_as_array[0::2])
    barn = np.exp(data_as_array[1::2])
    
    return mev, barn

In [10]:
data = np.array([
    get_mev_and_barn_from_raw_z_data(item)
    for item in raw_data_by_z
])

data.shape

(100, 2, 150)

In [11]:
assert np.all(data[:, 0, :] == data[0, 0, :])
mev = data[0, 0, :]

data_without_mev = data[:,1,:]
table = pd.DataFrame(data_without_mev, )

table.index = np.arange(1,101)
table.columns = mev

In [12]:
# original file units, log(MeV) vs log(barn)

In [13]:
# output units, MeV, and barn

In [14]:


table.to_csv(csv_filepath)