In [None]:
##Processing XRF MCA files into a hdf5 format
#This is intended to process a collection of point spectra from an Amptek SDD detector.
#The image shape is determined by the file name.
#Matthew Clarke, National Museum of Asian Art, Smithsonian Institution, 2025-12

In [None]:
#Import libraries
import os
import fnmatch
import csv
import h5py
import re
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#Set folder and output file
os.chdir("D:\Processing-XRF\Datafolder") #set the location of the mca files; sometimes this needs \\
exportfilename='OutputFileName.hdf5'
cwd = os.getcwd() 
print("Current working directory:", cwd) #this is a check to ensure the directory is selected

In [None]:
#Create a 2D array
#Grab last file to determine image size
filealllist = os.listdir('.')
filenum=(len(filealllist))-1
print(filenum)
while filenum > 0:
    if fnmatch.fnmatch(filealllist[filenum], '*.mca'):
        print(filealllist[filenum])
        fullFilename = filealllist[filenum]
        allnum=re.findall(r'\d+', fullFilename)
        xnum=int(allnum[len(allnum)-2])
        ynum=int(allnum[len(allnum)-1])
        break
    else:
        filenum -= 1
print(xnum,ynum)
#now pull XRF data
with h5py.File(exportfilename, "w") as f:
    dset = f.create_dataset("sxrfdata", (xnum,ynum,2048,), dtype='int16') #16-bit seems fine for single spectrum.
    for file in os.listdir('.'):
        if fnmatch.fnmatch(file, '*.mca'):
            fullFilename = file
            filename = file[:-4]
            allnum=re.findall(r'\d+', fullFilename)
            xcurr=int(allnum[len(allnum)-2]) #get the xposition from this file
            ycurr=int(allnum[len(allnum)-1]) #get the yposition from this file
            with open(fullFilename) as csv_file:
                csv_reader = csv.reader(csv_file)
                line_count = 0
                Data = []
                for row in csv_reader:
            #Data is 2048 channels, starting line 13, ending line 2060, but for index need to use values below
                    if line_count > 11 and line_count <2060:
                        Data.append(int(row[0]))
                        line_count += 1
                    elif line_count == 2060:
                        break
                    else:
                        line_count += 1
                f['sxrfdata'][xcurr-1,ycurr-1,:] = Data
print('complete')

##Below is for viewing data, or reformatting data for other purposes

In [None]:
#Use spectral module to examine the hdf5 file
%matplotlib qt 
import numpy as np
import matplotlib.pyplot as plt
from spectral import *
import wx
import OpenGL

In [None]:
with h5py.File(exportfilename, "r") as f:
    img = f['sxrfdata'][:,:,:]

In [None]:
view = imshow(img, (647,433,247)) #channels that are close to Pb, Fe, Ca

In [None]:
#Create a sum spectrum text file readable in ARTAX software. Elin and Eabs must be determined for energy calibration.
sumchannel = np.empty(2048, dtype=float, order='C', like=None)
artaxout='Artax-fileout' #a text file 
for channel in range(0, int(img.shape[2])-1):
    sumchannel[channel]=sum(sum(img[:,:,channel].astype(np.int32))) #need to convert from int16 to int32 for sum spec
with open(artaxout+'.txt', mode='w+') as f:
    f.write('BeginHeader'+'\r\n')
    f.write('Elin=0.0149 Eabs=0.0154'+'\r\n')
    f.write('Fano=0.11 FWHM=151' +'\r\n')
    f.write('EndHeader' +'\r\n')
    for i in range(0,2047): 
        f.write(str(sumchannel[i])+'\r\n')
    f.close()