This notebook performs data augmentation steps in which no cropping takes place. This is qualtitatively different to the other data augmentation notebook, which should be executed first.

All FFT descriptors are calculated within the notebook as well.

# Import libraries

In [None]:
import os
import h5py
import matplotlib.pyplot as plt
import numpy as np
import numpy.matlib
from scipy import ndimage
from collections import Counter
import itertools
from scipy.signal import get_window
import cv2
from collections import defaultdict
from scipy.stats import mode
from scipy import stats
from ai4stem.utils.utils_fft import calc_fft
import json

In [None]:
# specify path in which data will be saved
save_path = '.'

# Load all data

In [None]:
# Specify three folders where h5 files with simulated images can be found
folders = ['.']


In [None]:
# load all folders:
files = []
images = []
for folder in folders:
    files.extend( [i for i in os.listdir(folder) if 'augmented_images_rotation&shear' in i ] )
    images.extend( [os.path.join(folder, i) for i in [i for i in os.listdir(folder) if 'augmented_images_rotation&shear' in i ] ] )

In [None]:
files

Exemplarily load one of the images:

In [None]:
file = h5py.File(images[0],'r')
print(file.keys())
print(np.array(file.get('Image_rotation_and_shear').get('Rotated_and_sheared_images')).shape)
file.close()

# Load rotated and sheared images and calculate FFT

In [None]:
all_images = []
all_images_fft = []
all_labels = []

# FFT parameters
thresholding = True
r_cut = None
sigma = None

for idx in range(len(images)):
    
    print(images[idx])

    file = h5py.File(images[idx],'r')
    
    # get keys 
    file_ending_current_image = '_'.join(images[idx].split('/')[-1].split('_')[-4:])

    #Extract datasets for each group
    file_data = file.get("Image_rotation_and_shear")
    img = file_data.get("Rotated_and_sheared_images")
    
    current_structure = '_'.join(images[idx].split('/')[-1].split('_')[:-4])
    
    img_shape = img.shape
    
    for rot in range(img_shape[-2]):
        for shear in range(img_shape[-1]):

            all_images.append(img[:, :, rot, shear])
            all_labels.append(current_structure + '_rot_{}_shear_{}'.format(rot, shear))

            fft_desc = calc_fft(img[:, :, rot, shear],
                                r_cut=r_cut, thresholding=thresholding,
                                sigma=sigma)
            all_images_fft.append(fft_desc)

In [None]:
print('# of rotated and sheared images = {}'.format(len(all_images_fft)))

In [None]:
# Save HAADF images
np.save(os.path.join(save_path, 'X_haadf_rotation_and_shear.npy'), np.array(all_images))

# Save labels
np.save(os.path.join(save_path, 'y_rotation_and_shear.npy'), all_labels)

In [None]:
print(np.array(all_images).shape)

In [None]:
# Save HAADF FFTs
np.save(os.path.join(save_path, 'X_fft_haadf_rotation_and_shear.npy'), np.array(all_images_fft))

### Define relation between labels and int labels

In [None]:
a = [_.split('_')[:3] for _ in all_labels]
b = ['_'.join(_) for _ in a]

In [None]:
unique_labels = np.unique(b)
print(unique_labels)

In [None]:
numerical_to_text_labels = dict(zip(range(len(unique_labels)), unique_labels))
text_to_numerical_labels = dict(zip(unique_labels, range(len(unique_labels))))
print(numerical_to_text_labels, text_to_numerical_labels)

In [None]:
with open(os.path.join(save_path, 'text_to_numerical_labels.json'), 'w') as f:
    json.dump(text_to_numerical_labels, f)
    
with open(os.path.join(save_path, 'numerical_to_text_labels.json'), 'w') as f:
    json.dump(numerical_to_text_labels, f)

In [None]:
converted_labels = [text_to_numerical_labels[_] for _ in b]

In [None]:
# save int labels
np.save(os.path.join(save_path,
                     'y_rotatation_and_shear_int.npy'), np.array(converted_labels))

# Add noise

Consider a subselection of images for addition of noise. This is done to keep the number of data points below (at the order of) 100k.

In [None]:
from scipy.ndimage import gaussian_filter
from skimage.util import random_noise

In [None]:
# Extract subselection, otherwise may run into memory problems - at least
# if not run on high-performance computing cluster

raw_images = []
raw_labels = []

for idx in range(len(images)):
    
    print(images[idx])

    file = h5py.File(images[idx],'r')
    
    # get keys 
    file_ending_current_image = '_'.join(images[idx].split('/')[-1].split('_')[-4:])

    #Extract datasets for each group
    file_data = file.get("Image_rotation_and_shear")
    img = file_data.get("Rotated_and_sheared_images")
    
    current_structure = '_'.join(images[idx].split('/')[-1].split('_')[:-4])
    
    img_shape = img.shape
    
    for rot in range(img_shape[-2])[::2]:
        for shear in range(img_shape[-1])[::2]:

            raw_images.append(img[:, :, rot, shear])
            raw_labels.append(current_structure + '_rot_{}_shear_{}'.format(rot, shear))

In [None]:
print('Consider subsect of {} images for addition of random noise'.format(len(raw_images)))

## Poisson noise

In [None]:
images_w_poisson = []
labels_w_poisson = []
iterations = 2 # iterations of applying random Poisson noise

for img, current_structure in zip(raw_images, raw_labels):
               
    # NORMALIZE
    current_image = cv2.normalize(img, None,
                                  alpha=0, beta=1,
                                  norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)

    images_w_poisson.append(current_image)
    labels_w_poisson.append(current_structure)

    for it in range(iterations):
        distorted_image = random_noise(current_image, mode='poisson')
        images_w_poisson.append(distorted_image)
        labels_w_poisson.append('{}_pois_it_{}'.format(current_structure, it))

In [None]:
print('# Images after introducing Poisson noise = {}'.format(len(images_w_poisson)))

# Add Blurring

In [None]:
widths = [2, 4]
images_w_poisson_w_gaussian = []
labels_w_poisson_w_gaussian = []

for current_image, current_label in zip(images_w_poisson, labels_w_poisson):

    images_w_poisson_w_gaussian.append(current_image)
    labels_w_poisson_w_gaussian.append(current_label)
    
    for width in widths:

        distorted_image = gaussian_filter(current_image, sigma=width)

        images_w_poisson_w_gaussian.append(distorted_image)
        labels_w_poisson_w_gaussian.append(current_label + '_gwidth_' + str(width))

In [None]:
print('# Images after introducing Poisson and Blurring noise = {}'.format(len(images_w_poisson_w_gaussian)))

# Add Gaussian noise

In [None]:
var_list = [0.005, 0.01]
images_w_poisson_w_gaussian_w_gnoise = []
labels_w_poisson_w_gaussian_w_gnoise = []

for current_image, current_label in zip(images_w_poisson_w_gaussian, labels_w_poisson_w_gaussian):
    
    if 'pois' in current_label or 'gwidth' in current_label:
        # only keep distorted, not pristine or those with scan noise
        images_w_poisson_w_gaussian_w_gnoise.append(current_image)
        labels_w_poisson_w_gaussian_w_gnoise.append(current_label)
        
    if 'pois' in current_label:
        # choose to not add gaussian noise AND poisson noise
        continue
    
    for var in var_list:
        distorted_image = random_noise(current_image, mode='gaussian', var=var)
        
        images_w_poisson_w_gaussian_w_gnoise.append(distorted_image)
        labels_w_poisson_w_gaussian_w_gnoise.append(current_label + '_gnoisevar_' + str(var))

In [None]:
print('# Images after introducing Posson, Blurring, and Gaussian noise = {}'.format(len(labels_w_poisson_w_gaussian_w_gnoise)))

# Calculate FFT for distorted images

In [None]:
ffts_distorted = []
labels_distorted = []
r_cut = None
sigma = None
thresholding = True

for img, label in zip(images_w_poisson_w_gaussian_w_gnoise, 
                     labels_w_poisson_w_gaussian_w_gnoise):
    
    fft_desc = calc_fft(img, r_cut=r_cut,
                    thresholding=thresholding,
                    sigma=sigma)
    
    ffts_distorted.append(fft_desc)
    labels_distorted.append(label)

In [None]:
print('Total # of HAADF-FFTs = {}'.format(len(ffts_distorted)))

In [None]:
# Save
np.save(os.path.join(save_path, 'X_haadf_poisson_blurring_gaussian_noise.npy'), 
        np.asarray(images_w_poisson_w_gaussian_w_gnoise))

np.save(os.path.join(save_path, 'X_fft_haadf_poisson_blurring_gaussian_noise.npy'),
        np.asarray(ffts_distorted))

np.save(os.path.join(save_path, 'y_poisson_blurring_gaussian_noise.npy'), 
        np.asarray(labels_distorted))

In [None]:
a = [_.split('_')[:3] for _ in labels_distorted]
b = ['_'.join(_) for _ in a]
converted_labels = [text_to_numerical_labels[_] for _ in b]

# save int labels
np.save(os.path.join(save_path, 'y_poisson_blurring_gaussian_noise_int.npy'), 
        np.asarray(converted_labels))