In [1]:
#This script checks the folder that 4D-STEM data is being saved (user input, e.g. \\e02-storage\data\2018\em19064-2\Merlin). In case there 
#is a new dataset (assumes time-stamping folders to appear), it saves into the processing folder of the 
#corresponding visit, e.g.\\e02-storage\data\2018\em19064-2\processing the following files:
# full data in hdf5 format
# data binned by 4 (on diff plane) in hdf5 format
# incoherent bright-field image (ibf) TIFF
# sum diffraction pattern image TIFF

# Note that if the time-stamped folder already exists in the Processing path, this code assumes that the conversion has
# already been done!

#TO DO:
#Add peak finding routine
#Ask User if they want ADF / BF images also saved
#How to handle half-written files when automated?


In [1]:
%matplotlib tk
#This won't work with cluster
import hyperspy.api as hs
#import matplotlib 
#%matplotlib qt5
#change to qt5 for the installed version on DLS Linux
import numpy as np
from math import floor
import pylab as plt
import matplotlib.pyplot as plt
import os
import time
import sys
from pathlib import Path



In [2]:
acquisition_folder = input("Enter the Medipix acquisition folder path (use time-stamping): ")
print('******************************')
assert os.path.exists(acquisition_folder), "We did not find the folder at, "+str(acquisition_folder)
#print('We found your Session folder!')
path_count = 1
acquisition_path = Path(acquisition_folder)
for parts in acquisition_path.parts:
    path_count +=1
    if parts.startswith(('em','cm')):
        print('Found session folder: ', parts)
        processing_folder = str(acquisition_path.parents[len(acquisition_path.parts) - path_count])+ r'\processing'
        
        try:
            os.chdir(processing_folder)
            #if it doesn't exist create it
        except OSError:
            #break
            try: 
                os.makedirs(processing_folder)
            except OSError:
                if not os.path.isdir(processing_folder):
                    raise
            
        print('Processed data will be saved at: ', processing_folder)
        print('******************************')

Enter the Medipix acquisition folder path (use time-stamping): P:\data\2018\em19064-2\Merlin\Medipix
******************************
Found session folder:  em19064-2
Processed data will be saved at:  P:\data\2018\em19064-2\processing
******************************


In [3]:
def reshape_4DSTEM_data(data, plot_sum = False):
    """
    Reshapes the lazy-imported stack of dimensions: (xxxxxx|256, 256) to the correct scan pattern shape: (x, y | 256,256)
    It utilises the over-exposed fly-back frame to identify the start of the lines in the first 10 scan lines, 
    checks line length consistancy and finds the number of frames to skip at the beginning (this number is printed out as 
    string output).
    Note that this code crops the over-exposed flyback pixel on the LHS of the frame. 
    
    Parameters
    ----------
    data : hyperspy lazily imported mib file with diensions of: framenumbers|256, 256
       
    Returns
    -------
    data_reshaped : reshaped data (x, y | 256,256)
    optional: plots the sum intensity vs frames
    """
    data_crop = data.inav[0:np.int(10* np.sqrt(data.axes_manager[0].size))] #crop the first ~10 lines
    data_crop_t = data_crop.T
    data_crop_t_sum = data_crop_t.sum()
    intensity_array = data_crop_t_sum.data #summing over patterns
    intensity_array = intensity_array.compute() #out of lazy
    #Checking for local maxima to be more than 10 times the neighbouring elements
    local_max = (np.r_[True, intensity_array[1:] > 10* intensity_array[:-1]] 
            & np.r_[intensity_array[:-1] > 10* intensity_array[1:], True])
    
    if plot_sum == True:
        import matplotlib.pyplot as plt
        
        fig1 = plt.figure()
        ax1 = fig1.add_subplot(121)
        ax2 = fig1.add_subplot(122)
        
        ax1.plot(intensity_array, 'k')
        ax2.plot(local_max, 'b')
        
        ax1.set_title('sum intensity of first 2pct of frames')
        ax2.set_title('peaks detected')
    
    peaks = np.ravel(np.where(local_max))
    lines = np.ediff1d(peaks) #Diff between consecutive elements of the array
    line_len = lines[lines.size-1] # Assuming the last element to be the line length
    check = np.ravel(np.where(lines == line_len)) #Checking line lengths
    
    line_confirm = [np.ediff1d(check) == 1]
    if ~np.all(line_confirm):
        print('Line lengths are not consistent! Soemthing went wrong!')
        return data

    skip_ind = peaks[check[0]] #number of frames to skip at the beginning
    
    n_lines = floor((data.data.shape[0] - skip_ind) / line_len) #Number of lines
    data_skip = data.inav[skip_ind:skip_ind + (n_lines * line_len)] #with the skipped frames removed
    
    data_skip.data = data_skip.data.reshape(n_lines, line_len, 256, 256)
    data_skip.axes_manager._axes.insert(0, data_skip.axes_manager[0].copy())
    data_skip.get_dimensions_from_data() #reshaped
    
    print('Number of frames skipped at the beginning: ', skip_ind)
    data_skip = data_skip.inav[1:]
    return data_skip

In [None]:
##converts all mib files in folder to hdf5 and saves binned data and incoherent BF image
os.chdir(acquisition_folder)
for dirName in os.listdir(acquisition_folder):
    time0 = time.time()
    print('Currently active in this directory: %s' % dirName)
    
    for root, dirs, files in os.walk(acquisition_folder + '\\' + dirName, topdown = False):
        for file_name in files:
            #print(file_name)
            if file_name.endswith('.hdr'):
                print(file_name)
                print(acquisition_folder + '\\' + dirName)
                
                #If the folder already exists assumes the hdf5 is already saved and moves on
                if os.path.exists(processing_folder + r'\\' + dirName):
                    print('\n ******* ' + dirName + ' Folder already exists! *********')
                else:
                    os.chdir(acquisition_folder + '\\' + dirName)
                    try:
                        dp = hs.load(file_name, lazy = True)
                    except ValueError:
                        print('Something went wrong during acquisition!!!')
                        break

                    print('loaded to hyperspy')
                    # checks to see if it is a multi-frame data before reshaping
                    if any(dp.axes_manager.navigation_axes):
                        dp = reshape_4DSTEM_data(dp)
                        print('Data reshaped to: '+ str(dp.axes_manager.navigation_shape))
                        #TODO peak finding + individual DF
                        os.chdir(processing_folder)
                        os.mkdir(dirName)
                        os.chdir(processing_folder + r'\\' + dirName)
                        #Only save if the dp object is from the right file! - Removing the extensions
                        if dp.metadata.General.original_filename.rpartition('.')[0] == file_name.rpartition('.')[0]:
                            print('Saving the hdf5 version of '+ file_name)
                            dp.save(file_name, extension = 'hdf5')
                        #Only save dp_bin and ibf if they exist! (not the case for single frame data)
                        try:         
                            print('Writing binned hdf5')
                            #making a binned version
                            dp_bin = dp.rebin(scale = (1,1,4,4))
                            dp_bin.compute()
                            file_bin = file_name.rpartition('.')[0]+ '_bin'
                            dp_bin.save(file_bin, extension = 'hdf5')
                            print('Writing ibf tiff')
                            #making an ibf image
                            ibf = dp_bin.sum(axis = dp_bin.axes_manager.signal_axes)
                            ibf = ibf.T
                            file_ibf = file_name.rpartition('.')[0]+ '_ibf'
                            ibf.save(file_ibf, extension = 'tiff')
                            print('Writing sum diffraction tiff')
                            # sum dp image
                            sum_dp = dp.sum(axis=dp.axes_manager.navigation_axes)
                            file_dp = file_name.rpartition('.')[0]+ '_dp'
                            sum_dp.save(file_dp, extension = 'tiff')
                        except(NameError, ValueError):
                            print('Something went wrong - perhaps data is a single frame!')
            
        print('Total time elapsed (seconds): ', int(time.time() - time0))


Currently active in this directory: 20180727 095242
A5V33dT6-9cmCL-400kx.hdr
P:\data\2018\em19064-2\Merlin\Medipix\20180727 095242

 ******* 20180727 095242 Folder already exists! *********
Total time elapsed (seconds):  0
Currently active in this directory: 20180727 150054
G1HSA6iT6-9cmCL-4Mx.hdr
P:\data\2018\em19064-2\Merlin\Medipix\20180727 150054
loaded to hyperspy
Number of frames skipped at the beginning:  673
Data reshaped to: (511, 511)
Saving the hdf5 version of G1HSA6iT6-9cmCL-4Mx.hdr
Writing binned hdf5
[########################################] | 100% Completed |  6min  0.4s
Writing ibf tiff
Writing sum diffraction tiff
