# Load and examine a dataset
- sample by HW, mesurment by CO
- high-pressure torsion sample, Ni65Cu35

### Acknowledgements

The following was created on basis of the turorial provided by the py4DSTEM instructor team:
- Ben Savitzky (bhsavitzky@lbl.gov)
- Steve Zeltmann (steven.zeltmann@berkeley.edu)
- Stephanie Ribet (sribet@u.northwestern.edu)
- Alex Rakowski (arakowski@lbl.gov)
- Colin Ophus (clophus@lbl.gov)

# Set environment: Load the packages
- for details on how to connect to the cluster and set up environments visit paullobpreis.com (pw:4DSTEM)

In [1]:
# timestr used to create simple timestamps for easier version controll
import time
timestr = time.strftime("%Y%m%d")

# py4dstem as main tool, the follwing command also prints the currently used version
import py4DSTEM
py4DSTEM.__version__

'0.14.15'

# Load the dataset

In [2]:
# Load the .dm4 file from a workspace on OMNI or XNAS, set filepath with 'filepath_data = "FILEPATH"'
# even better, use dirpath'' and 'filepath_data', this makes it easier to add more paths later

dirpath = "/fast/ws-tmp/g031362-data/high_pressure_torsion/lamC/ROI3/"


filepath_data = dirpath + 'COPL_Ni65Cu35_C_ROI3_240827_aper_50_conv_1.5_spot_6_CL_47_stepsize_10_r_x_178_r_y_186_GIF_512x512.dm4'

In [3]:
# Load the datacube from the .dm4 file specified above

# py4DSTEM uses the import_file function to load non-native file formats, while the read function is used for files originally written by py4DSTEM.

datacube = py4DSTEM.import_file(
filepath_data,
)

In [4]:
# datacube directly passed reveiles the 4-dimensional array of Real Space and Diffaction Space for the sample

datacube

DataCube( A 4-dimensional array of shape (186, 178, 512, 512) called 'dm_dataset',
          with dimensions:

              Rx = [0.0,0.010716622695326805,0.02143324539065361,...] µm
              Ry = [0.0,0.010716622695326805,0.02143324539065361,...] µm
              Qx = [0.0,0.0072991721332073215,0.014598344266414643,...] A^-1
              Qy = [0.0,0.0072991721332073215,0.014598344266414643,...] A^-1
)

# Calibrate pixel size and unit

In [5]:
# several properties of our datacube can be shown by:
print(datacube.data.shape)
print(datacube.shape)
print(datacube.Rshape)
print(datacube.Qshape)

(186, 178, 512, 512)
(186, 178, 512, 512)
(186, 178)
(512, 512)


In [6]:
# Vectors calibrating each dimension of the dataset are included within the datacube, utilizing any available calibrations retrieved from the file

# dimension vectors -
print('The first dimension:')
print(f'  - dimension name: {datacube.dim_names[0]}')
print(f'  - dimension units: {datacube.dim_units[0]}')
print(f'  - dim vector: {datacube.dims[0][:10]}') # note the `[:10]` - we're only displaying the first 10 entries
print()
print('The third dimension:')
print(f'  - dimension name: {datacube.dim_names[2]}')
print(f'  - dimension units: {datacube.dim_units[2]}')
print(f'  - dim vector: {datacube.dims[2][:10]}')

print()

# pixel sizes -
qpix = datacube.calibration.get_Q_pixel_size()
qpixunit = datacube.calibration.get_Q_pixel_units()
rpix = datacube.calibration.get_R_pixel_size()
rpixunit = datacube.calibration.get_R_pixel_units()
print()
print(f"The diffraction space pixels are each {qpix:.4f} {qpixunit}")
print(f"The real space pixels are each {rpix:.4f} {rpixunit}")

The first dimension:
  - dimension name: Rx
  - dimension units: µm
  - dim vector: [0.         0.01071662 0.02143325 0.03214987 0.04286649 0.05358311
 0.06429974 0.07501636 0.08573298 0.0964496 ]

The third dimension:
  - dimension name: Qx
  - dimension units: A^-1
  - dim vector: [0.         0.00729917 0.01459834 0.02189752 0.02919669 0.03649586
 0.04379503 0.0510942  0.05839338 0.06569255]


The diffraction space pixels are each 0.0073 A^-1
The real space pixels are each 0.0107 µm


In [7]:
# complete list of calibrations is located here; the above vectors are derived from these values

datacube.calibration

Calibration( A Metadata instance called 'calibration', containing the following fields:

             Q_pixel_size:    0.0072991721332073215
             R_pixel_size:    0.010716623
             Q_pixel_units:   A^-1
             R_pixel_units:   µm
             QR_flip:         False
)

In [8]:
# Currently, the real space pixel size is shown as 1 pixel, indicating that this information was either unavailable or not extracted from the .dm4 file.
# Assuming we know the real space pixel size between beam positions is 5 nanometers, we can update the value using:

datacube.calibration.set_R_pixel_size(5)
datacube.calibration.set_R_pixel_units('nm')

# and print the newly calibrated values with:
datacube.calibration

# the values will be automatically updated in the datacube

Calibration( A Metadata instance called 'calibration', containing the following fields:

             Q_pixel_size:    0.0072991721332073215
             R_pixel_size:    5
             Q_pixel_units:   A^-1
             R_pixel_units:   nm
             QR_flip:         False
)

# Save the modified/unmodified data
- Now we set a new filepath consisting of the old path with additions indicating our processing
- from the name alone should be clear if we preprosessed/filtered/binned
- additionalls I implemented timestr to include the date

In [9]:
# this data is saved as a .h5 file format 

from os.path import splitext
# filepath_save = splitext(filepath_data)[0] + '_preprocessed_unfiltered_no_bin_' + timestr + '.h5'
filepath_save = splitext(filepath_data)[0] + '_preprocessed_unfiltered_no_bin.h5'

# print the new filepath 

print(filepath_save)

/fast/ws-tmp/g031362-data/high_pressure_torsion/lamC/ROI3/COPL_Ni65Cu35_C_ROI3_240827_aper_50_conv_1.5_spot_6_CL_47_stepsize_10_r_x_178_r_y_186_GIF_512x512_preprocessed_unfiltered_no_bin.h5


In [10]:
# Save

py4DSTEM.save(
    filepath_save,
    datacube,
    mode = 'o'    # 'overwrite' mode
)

# Inspect the resulting HDF5 file
- first we want to see where the data lives without opening it
- secondly we check which default name was assigned

In [11]:
# 'dm_dataset' and 'dm_dataset_root' are placeholders we could re-assigning a different name later

py4DSTEM.print_h5_tree(filepath_save)

datacube.name

/
|---dm_dataset_root
    |---dm_dataset




'dm_dataset'