In [107]:
import h5py
import numpy as np
import pyUSID as usid
import matplotlib.pyplot as plt
import sidpy
import re

In [120]:
_end_tags = dict(grid = ':HEADER_END:',
                 scan = ':SCANIT_END:',
                 spec = '[DATA]'
                 )
file_path = '/Users/hunfen/OneDrive/General Files/STM1500_Nanonis_data/2020/2020-10-12/Topography010.sxm'

In [121]:
def header_reader(fname):
    '''
    Read the sxm file header into dictionary. Header entries
    as dict keys, and header contents as dict values.

    Parameters
    ----------------
    fname : .sxm file path.

    Return
    ----------------
    header : header dict file. 
    '''
    header_end = False # file_handler
    key = '' # Header_dict_key
    contents = '' # Header_data_buffer
    header = {} # Header_dict
    #Nanonis_header_regex = ':\w+([_>-]\w+[/ ]?\w+:)?'
    with open(fname, 'rb') as f:
        while not header_end:
            line = f.readline().decode(encoding = 'utf-8', errors = 'replace')
            if re.match(':SCANIT_END:\n',
                        line
                        ):
                header_end = True
            elif re.match(':.+:',
                          line
                          ):
                key = line[1:-2] # Read header_entry
                content = ''     # Clear 
            else:
                content += line
                # remove EOL
                header[key] = content.strip('\n') 
    return header

In [122]:
header_reader(file_path)

{'NANONIS_VERSION': '2',
 'SCANIT_TYPE': '              FLOAT            MSBFIRST',
 'REC_DATE': ' 12.10.2020',
 'REC_TIME': '19:33:52',
 'REC_TEMP': '      290.0000000000',
 'ACQ_TIME': '       511.2',
 'SCAN_PIXELS': '       256       256',
 'SCAN_FILE': 'C:\\Users\\Touru Hirahara\\Desktop\\data\\2020\\2020-10-12\\Topography010.sxm',
 'SCAN_TIME': '             9.984E-1             9.984E-1',
 'SCAN_RANGE': '           5.000000E-8           5.000000E-8',
 'SCAN_OFFSET': '             1.856870E-7         1.155498E-7',
 'SCAN_ANGLE': '            0.000E+0',
 'SCAN_DIR': 'down',
 'BIAS': '            2.000E+0',
 'Z-CONTROLLER': '\tName\ton\tSetpoint\tP-gain\tI-gain\tT-const\n\tlog Current\t1\t1.000E-10 A\t3.500E-12 m\t2.200E-7 m/s\t1.591E-5 s',
 'COMMENT': 'Si 7x7',
 'Scan>Scanfield': '185.687E-9;115.55E-9;50E-9;50E-9;0E+0',
 'Scan>series name': 'Topography',
 'Scan>channels': 'Current (A);Z (m)',
 'Scan>pixels/line': '256',
 'Scan>lines': '256',
 'Scan>speed forw. (m/s)': '50.0801E-9',

In [126]:
# Header reform
def header_reform(header):
    """ 
    Reform the header which is obtained from NANONIS .sxm file.

    Parameter
    ---------
    header : header dict

    Returns
    -------
    header : reformed header dict
    """
    # HEADER_CLASSIFICATION
    trash_bin = ['NANONIS_VERSION',
                 'SCANIT_TYPE',
                 'REC_TEMP',
                 'SCAN_PIXELS',
                 'SCAN_TIME',
                 'SCAN_RANGE',
                 'SCAN_OFFSET',
                 'SCAN_ANGLE',
                 'Scan>channels'
                 ]
    scan_info_str = ['REC_DATE',
                     'REC_TIME',
                     'SCAN_FILE',
                     'SCAN_DIR',
                     'COMMENT'
             ]
    scan_info_float = ['BIAS',
                       'ACQ_TIME',
                       'Scan>pixels/line',
                       'Scan>lines',
                       'Scan>speed forw. (m/s)',
                       'Scan>speed backw. (m/s)'
                       ]
    table = ['Scan>Scanfield',
             'Z-CONTROLLER',
             'DATA_INFO'
             ]
    scan_field_key = ['X_OFFSET',
                      'Y_OFFSET',
                      'X_RANGE',
                      'Y_RANGE',
                      'ANGLE'
                      ]
    # Clear redundant header entries
    for i in range(len(trash_bin)):
        header.pop(trash_bin[i])

    # Clear redundant space in scan_info_str
    for j in range(len(scan_info_str)):
        header[scan_info_str[j]] = header[scan_info_str[j]].strip(' ')
    
    # Transform scan_info_float from str to float
    for k in range(len(scan_info_float)):
        header[scan_info_float[k]] = float(header[scan_info_float[k]])

    # Transform table from str to dict
    # SCAN_FIELD
    scan_field = header['Scan>Scanfield'].split(';')

    #  SCAN_FIELD dict
    SCAN_FIELD = {}
    for k in range(len(scan_field_key)):
        SCAN_FIELD[scan_field_key[k]] = float(scan_field[k])

    # CHANNEL_INFO
    data_info = header['DATA_INFO'].split('\n')
    DATA_INFO = []
    for row in data_info:
        DATA_INFO.append(row.strip('\t').split('\t'))
    # CHANNEL_INFO dict
    key_list = DATA_INFO[0][1:]
    channels = []
    values = []
    CHANNEL_INFO = {}
    for i in range(1, len(DATA_INFO)):
        channels.append(DATA_INFO[i][0])
        values.append(DATA_INFO[i][1:])
    for i in range(len(channels)):
        chan_dict = {}
        for j in range(len(key_list)):
            chan_dict[key_list[j]] = values[i][j]
        CHANNEL_INFO[channels[i]] = chan_dict
    # Z_CONTROLLER_INFO
    Z_Controller = header['Z-CONTROLLER'].split('\n')
    Controller_config = []
    for row in Z_Controller:
        Controller_config.append(row.strip('\t').split('\t'))
    # CONTROLLER_INFO dict
    CONTROLLER_INFO = {}
    for i in range(len(Controller_config[0])):
        CONTROLLER_INFO[Controller_config[0][i]] = Controller_config[1][i]

    # Substitute table dict
    for j in range(len(table)):
        header.pop(table[j])
    header['SCAN_FILED'] = SCAN_FIELD
    header['CONTROLLER_INFO'] = CONTROLLER_INFO
    header['CHANNEL_INFO'] = CHANNEL_INFO
    return header

In [127]:
# Extract file header of .sxm file
sxm_header = header_reform(header_reader(file_path))

In [141]:
sxm_header

{'REC_DATE': '12.10.2020',
 'REC_TIME': '19:33:52',
 'ACQ_TIME': 511.2,
 'SCAN_FILE': 'C:\\Users\\Touru Hirahara\\Desktop\\data\\2020\\2020-10-12\\Topography010.sxm',
 'SCAN_DIR': 'down',
 'BIAS': 2.0,
 'COMMENT': 'Si 7x7',
 'Scan>series name': 'Topography',
 'Scan>pixels/line': 256.0,
 'Scan>lines': 256.0,
 'Scan>speed forw. (m/s)': 5.00801e-08,
 'Scan>speed backw. (m/s)': 5.00801e-08,
 'SCAN_FILED': {'X_OFFSET': 1.85687e-07,
  'Y_OFFSET': 1.1555e-07,
  'X_RANGE': 5e-08,
  'Y_RANGE': 5e-08,
  'ANGLE': 0.0},
 'CONTROLLER_INFO': {'Name': 'log Current',
  'on': '1',
  'Setpoint': '1.000E-10 A',
  'P-gain': '3.500E-12 m',
  'I-gain': '2.200E-7 m/s',
  'T-const': '1.591E-5 s'},
 'CHANNEL_INFO': {'14': {'Name': 'Z',
   'Unit': 'm',
   'Direction': 'both',
   'Calibration': '1.291E-8',
   'Offset': '0.000E+0'},
  '0': {'Name': 'Current',
   'Unit': 'A',
   'Direction': 'both',
   'Calibration': '1.000E-10',
   'Offset': '0.000E+0'}}}

In [136]:
a = sxm_header['CHANNEL_INFO']
a.keys()

dict_keys(['14', '0'])

In [114]:
def data_type(dstr):
    '''
    to determine the type of data, and transform to proper fomation.
    
    Parameter
    --------------
    dstr : string read from file_header.

    Return
    --------------
    data : transformed data.
    '''

    scinot = '[+-]?\d+(?:\.\d+)[eE][+-]?\d{1,2}'
    digits = '[+-]?[0-9]*[.]{0,1}[0-9]+'
    #integer = '(\s\d+\s)'

    # Scientific notation?
    if re.search(scinot, dstr) != None: # if Scientific notation ?
        return re.findall(scinot, dstr)
    elif re.search(digits, dstr) != None: # if digits?
        return re.findall(digits, dstr)
    else:
        return dstr

In [115]:
def channels_counts(channel_info_dict):
    chal_counts = 0
    for chal in channel_info_dict.keys():
        if channel_info_dict[chal]['Direction'] == 'both':
            chal_counts += 2
        else:
            chal_counts += 1
    return chal_counts

In [116]:
num_rows = int(sxm_header['Scan>lines'])
num_cols = int(sxm_header['Scan>pixels/line'])
num_pos = num_cols * num_rows
channel_length = channels_counts(CHANNEL_INFO)

y_qty = 'Y'
y_units = 'm'
y_vec = np.linspace(,num_rows, endpoint = True)

SyntaxError: invalid syntax (<ipython-input-116-3695b047f1b7>, line 8)

In [137]:
a = (2, 2, 256, 256)

In [138]:
print(a)

(2, 2, 256, 256)


In [165]:
def channels_counts(header):
    '''
    Determine the dimensions of raw_data.
    
    Parameter
    ---------
    header : reformed .sxm file header
     
     Return
     ------
     dimension : return dimension of raw_data (tuple)
    '''
    
    channels = header['CHANNEL_INFO']
    keys = channels.keys()
    dir_l = []
    counts = 0
    for i in keys:
        dir_l.append(channels[i]['Direction'])
    if 'both' in dir_l:
        counts = 2
    else:
        counts = 1
    dimension = (len(keys), counts, int(header['Scan>pixels/line']), int(header['Scan>lines']))
    return dimension

In [168]:
a = channels_counts(sxm_header)
print(a)

(2, 2, 256, 256)


2