## **Jupyter Notebook for exporting data into an HDF5 file. Only **
- version: XRD-release-1.4 <br>
- author: William Rigaut <br>
- date: 5.06.2024  <br>

In [None]:
import h5py;
import os;
import re;
import xml.etree.ElementTree as ET;
import numpy as np;
from tqdm.notebook import tqdm;

def is_excluded(line):
    excluded = ['RAS', 'DISP', 'FILE', 'MEAS_COND_AXIS_NAME_INTERNAL', 'MEAS_COND_AXIS_NAME_MAGICNO', 'MEAS_COND_AXIS_UNIT'];
    
    for prefix in excluded :
        if line.startswith(f'*{prefix}'):
            return True;
    return False;

def readMokeDataFile(file):
    data = [];
    
    for elm in file:
        if not (elm.startswith('D') or elm.startswith('P') or elm.startswith('S')):
            dataline = [float(number) for number in elm.split()];
            if len(dataline) > 0:
                data.append(dataline);
                
    return data;

def convertFloat(item):
    try:
        item = float(item);
    except (ValueError, TypeError):
        pass;

    return item;

def writeHDF5_EDX(HDF5_path, filepath, x_pos, y_pos):
    ref_dtype = h5py.special_dtype(ref=h5py.Reference);
    excluded_base = ['','TRTSpectrum', 'RTHeader', 'ClassInstance', 'TRTHeaderedClass', 'ChildClassInstances', 'TRTKnownHeader',
                     'DetectorCount', 'Channels', 'DetLayers', 'PPRTData', 'ReferenceFactor', 'ReferenceFactor2', 'ReferenceStdDev',
                     'ResponseFunction', 'ShiftData', 'Result', 'WindowLayers', 'WindowType', 'Type', 'Version', 'Size', 'Atom'];
    params = ['Atom','XLine', 'AtomPercent', 'MassPercent', 'NetIntens', 'Background', 'Sigma'];
    header_ext = [];
    results_ext = [];
    spectra_offset = 0;

    tree = ET.parse(filepath);
    root = tree.getroot();
    
    for elm in root.iter():
        if elm.tag == 'ClassInstance' and elm.get('Name') == 'Results':
            for child in elm.iter():
                if child.tag == 'Result':
                    results_ext.append([]);
                elif child.tag in params:
                    if child.tag == 'Atom' and int(child.text) < 10:
                        results_ext[-1].append((child.tag, f'0{child.text}'));
                    else :
                        results_ext[-1].append((child.tag, child.text));
                elif child.tag == 'ExtResults':
                    break;
            break;

        elif elm.tag not in excluded_base:
            header_ext.append((elm.tag, elm.text));
        elif elm.tag == 'Channels':
            edx_spectra = np.array([(i, int(counts)) for i, counts in enumerate(elm.text.split(','))]);

    with h5py.File(HDF5_path, "a") as f:
        file_group_path = f"EDX/Spectrum_({x_pos}, {y_pos})";
        spectra = f.create_group(file_group_path);

        spectra_data = f.create_group(f"{file_group_path}/Data");
        spectra_header = f.create_group(f"{file_group_path}/Header");
        spectra_results = f.create_group(f"{file_group_path}/Results");

        for elm in header_ext:
            header_list = list(spectra_header.keys());#updating the header list each time we add
            if elm[1] is not None and elm[0] not in header_list:
                elm_1 = convertFloat(elm[1].strip());
                hset = spectra_header.create_dataset(f'{elm[0]}', (1,), data=elm_1);

        for results in sorted(results_ext):
            rgroup_name = f'Atomic Number: {results[0][1]}';
            rgroup = spectra_results.create_group(rgroup_name);
            for elm in results[1:]:
                elm_1 = convertFloat(elm[1].strip());
                rset = rgroup.create_dataset(f'{elm[0]}', (1,), data=elm_1);
        
        dset = spectra_data.create_dataset("Counts", (4096,2), data=edx_spectra);
        
    return 0;

def writeHDF5_XRD(HDF5_path, filename, x_pos, y_pos):
    attrib_list = ['SpacegroupNo=', 'HermannMauguin=', 'XrayDensity=', 'Rphase=', 'UNIT=', 'A=', 'B=', 'C=', 'k1=', 'k2=', 'B1='];
    header_ext_HW = []; header_ext_MEAS =[];
    xrd_pattern = [];
    results_XRD = [['R coefficients'], ['Global Parameters'], ['Phases']];
    
    results_path = filename.replace('.ras','.lst');
    current_phase = "None";
    phs_idx_current = -1;
    
    with open(filename, 'r', encoding='iso-8859-1') as file:
        for line in file:
            if line.startswith('*') and not is_excluded(line):
                formatted_line = line.strip().split('\"')[0:2];
                #print(formatted_line)
                if line.startswith('*HW'):
                    header_ext_HW.append(formatted_line);
                elif line.startswith('*MEAS'):
                    header_ext_MEAS.append(formatted_line);
                else :
                    print(f"[WARNING] Unsupported header attribut \'{line.split(' ')[0]}\' => Skipping")
            elif not is_excluded(line):
                dataline = [float(elm) for elm in line.split(' ')[0:2]];
                xrd_pattern.append(dataline);
    xrd_pattern = np.array(xrd_pattern);

    with open(results_path, 'r', encoding='iso-8859-1') as file:
        for line in file:
            if line.startswith('Rp='):
                R_factors = line.split();
                for elm in [elm.strip().split('=') for elm in R_factors]:
                    results_XRD[0].append(elm);
            elif line.startswith('Q'):
                results_XRD[1].append(line.strip().split('='));
            elif line.startswith('Local parameters and GOALs for phase'):
                current_phase = line.split()[-1];#name of the current phase for the refined lattice parameters
                results_XRD[2].append([current_phase]);
                phs_idx_current = results_XRD[2].index([current_phase]);
            elif True in [line.startswith(elm) for elm in attrib_list]:
                results_XRD[2][phs_idx_current].append(line.strip().split('='));
            elif line.startswith('Atomic positions for phase'):
                results_XRD[2][phs_idx_current].append(['Atomic positions']);
                atomic_pos_idx_current = results_XRD[2][phs_idx_current].index(['Atomic positions']);
                next(file);
                new_line = file.readline().split();
                while len(new_line) > 0:
                    results_XRD[2][phs_idx_current][atomic_pos_idx_current].append(new_line);
                    #print(results_XRD[2][phs_idx_current][atomic_pos_idx_current]);
                    new_line = file.readline().split();

    with h5py.File(HDF5_path, "a") as f:
        file_group_path = f"XRD/Areamap_({x_pos}, {y_pos})";
        spectra = f.create_group(file_group_path);

        spectra_data = f.create_group(f"{file_group_path}/Data");
        spectra_header = f.create_group(f"{file_group_path}/Header");
        spectra_results = f.create_group(f"{file_group_path}/Results");

        spectra_header_hw = spectra_header.create_group("Hardware");
        for elm in header_ext_HW:
            ##### Patching the weird character values in some attributs ######
            attrib = elm[1].split('|');
            if len(attrib) > 1:
                elm_1 = attrib[1];
            else :
                elm_1 = attrib[0];
            ##################################################################
            if elm_1 != '-':
                elm_1 = convertFloat(elm_1.strip());
                hset_hw = spectra_header_hw.create_dataset(f'{elm[0].replace("*HW_","").strip()}', (1,), data=elm_1);

        spectra_header_meas = spectra_header.create_group("Measurement");
        for elm in header_ext_MEAS:
            if elm[1] != '' and elm[1].strip() != '-':
                elm_1 = convertFloat(elm[1].strip());
                hset_meas = spectra_header_meas.create_dataset(f'{elm[0].replace("*MEAS_","").strip()}', (1,), data=elm_1);

        dset = spectra_data.create_dataset("Counts", (5001,2), data=xrd_pattern);

        #print(results_XRD);
        for elm in results_XRD:
            sub_results = spectra_results.create_group(elm[0]);
            for sub_dset in elm[1:]:
                if(elm[0] == 'Phases'):
                    sub_phase = sub_results.create_group(sub_dset[0]);
                    for attrb in sub_dset[1:]:
                        if attrb[0] == 'B1':
                            attrb[0] = 'W';
                        if len(attrb[1:]) > 0:
                            sub_phase.create_dataset(f'{attrb[0]}', np.shape(np.array(attrb[1:])), data=[convertFloat(elm) for elm in attrb[1:]]);
                else:
                    sub_results.create_dataset(f'{sub_dset[0]}', (1,), data=convertFloat(sub_dset[1]));
    
    return 0;

def writeHDF5_MOKE(HDF5_path, MOKE_path, filename, x_pos, y_pos):
    header_path = f'{MOKE_path}/info.txt';
    header_MOKE = [["Sample name"], ["Date"]];
    sample_name = (MOKE_path.split('/'))[-1];
    result_path = f'./results/MOKE/{sample_name}_MOKE.dat'
    data_mag = filename;
    data_pul = filename.replace('magnetization','pulse');
    data_sum = filename.replace('magnetization','sum');
    
    data_magnetization = []; data_pulse = []; data_reflectivity = []; results_MOKE = [];

    with open(header_path, 'r', encoding='iso-8859-1') as header:
        for j, line in enumerate(header):
            if j == 0 or j==1:
                header_MOKE[j].append(line.split('#')[-1].strip());
            else:
                header_MOKE.append(line.strip().split('='));
    nb_aq = int(header_MOKE[-1][1]);
                
    with open(
        data_mag, 'r', encoding='iso-8859-1') as magnetization, open(
        data_pul, 'r', encoding='iso-8859-1') as pulse, open(
        data_sum, 'r', encoding='iso-8859-1') as reflectivity:
        
        data_magnetization = readMokeDataFile(magnetization);
        data_pulse = readMokeDataFile(pulse);
        data_reflectivity = readMokeDataFile(reflectivity);

    with open(result_path, 'r', encoding='iso-8859-1') as file:
        results_header = next(file).split('\t');
        for line in file:
            line_values = [round(float(elm), 3) for elm in line.split()];
            if int(line_values[0]) == x_pos and int(line_values[1]) == y_pos:
                results_MOKE = [(results_header[i+2].strip(), elm) for i, elm in enumerate(line_values[2:])];

    with h5py.File(HDF5_path, "a") as f:
        file_group_path = f"MOKE/Scan_({x_pos}, {y_pos})";
        f.create_group(file_group_path);
        
        data = f.create_group(f"{file_group_path}/Data");
        moke_header = f.create_group(f"{file_group_path}/Header");
        moke_results = f.create_group(f"{file_group_path}/Results");

        for elm in header_MOKE:
            elm_1 = convertFloat(elm[1].strip());
            moke_header.create_dataset(elm[0], (1,), data=elm_1);
        for elm in results_MOKE:
            moke_results.create_dataset(elm[0], (1,), data=elm[1]);

        mag_dset = data.create_dataset("Magnetization", (len(data_magnetization),nb_aq), data=data_magnetization);
        pul_dset = data.create_dataset("Pulse", (len(data_pulse),nb_aq), data=data_pulse);
        sum_dset = data.create_dataset("Reflectivity", (len(data_reflectivity),nb_aq), data=data_reflectivity);

    return 0;

def main(test=False, tt_x_pos=-30, tt_y_pos=20):
    HDF5_path = './NdFeB.hdf5';
    EDX_path = './data/EDX/2897_NdFeB';
    XRD_path = './data/XRD/2898_NdFeB_600-10s';
    MOKE_path = './data/MOKE/NdFeB-sq films/2898-NdFeB-10s600';
    
    with h5py.File(HDF5_path, "w") as f:
        f.create_group("EDX");
        f.create_group("XRD");
        f.create_group("MOKE");
    
    step_x, step_y = 5, 5;
    start_x, start_y = -40, -40;
    
    for elm in tqdm(sorted(
        [datafile for datafile in os.listdir(EDX_path)
         if datafile.endswith('.spx')])):
    
        filepath = f"{EDX_path}/{elm}";
        x_idx, y_idx = elm.split('.spx')[0].split('(')[-1].split(')')[0].split(',');
        x_pos, y_pos = (int(x_idx)-1)*step_x+start_x, (int(y_idx)-1)*step_y+start_y;

        if test:
            if (x_pos == tt_x_pos and y_pos == tt_y_pos):
                writeHDF5_EDX(HDF5_path, filepath, x_pos, y_pos);
        elif np.abs(x_pos) + np.abs(y_pos) <= 60 and np.abs(x_pos) <= 40 and np.abs(y_pos) <= 40:
            writeHDF5_EDX(HDF5_path, filepath, x_pos, y_pos);

    for elm in tqdm(sorted(
        [datafile for datafile in os.listdir(XRD_path)
         if datafile.startswith('Areamap') and datafile.endswith('.ras')])):
        
        filepath = f"{XRD_path}/{elm}";
        indexes = elm.split('.ras')[0].split('_')[-1];
        x_idx, y_idx = indexes[0:3], indexes[3:]
        x_pos, y_pos = (int(x_idx)-1)*step_x+start_x, (int(y_idx)-1)*step_y+start_y;

        if test:
            if (x_pos == tt_x_pos and y_pos == tt_y_pos):
                writeHDF5_XRD(HDF5_path, filepath, x_pos, y_pos);
        elif np.abs(x_pos) + np.abs(y_pos) <= 60 and np.abs(x_pos) <= 40 and np.abs(y_pos) <= 40:
            writeHDF5_XRD(HDF5_path, filepath, x_pos, y_pos);

    for elm in tqdm(sorted(
        [datafile for datafile in os.listdir(MOKE_path)
         if datafile.endswith('magnetization.txt')])):

        filepath = f"{MOKE_path}/{elm}";
        indexes = elm.split('_')[1:3];
        x_pos, y_pos = int(float(indexes[0].split('x')[-1])), int(float(indexes[1].split('y')[-1]));

        if test:
            if (x_pos == tt_x_pos and y_pos == tt_y_pos):
                writeHDF5_MOKE(HDF5_path, MOKE_path, filepath, x_pos, y_pos);
        elif np.abs(x_pos) + np.abs(y_pos) <= 60 and np.abs(x_pos) <= 40 and np.abs(y_pos) <= 40:
            writeHDF5_MOKE(HDF5_path, MOKE_path, filepath, x_pos, y_pos);
    
    with h5py.File(HDF5_path, "r") as f:
        print(f.keys());
        print([len(list(f[key])) for key in f.keys()]);
        #print(f['EDX/Spectrum_(10,-5)/Results/Atomic Number: 27'].keys());
        #print([elm for elm in f['EDX/Spectrum_(10,-5)/Data/Counts'][()]]);
    return 0;

#main(True, 20, -15);
main();

**@end-of-notebook**