# Automatic Microscopic Image Analysis by Moving Window Local Fourier Transform and Machine Learning

## Benedykt R. Jany
## benedykt.jany[at]uj.edu.pl


# Date: 11.2019
 

# Institute of Physics Jagiellonian University in Krakow, Poland



## To run this notebook first you have to install HyperSpy https://hyperspy.org/


In [1]:
%matplotlib notebook
import hyperspy.api as hs
import matplotlib.pyplot as plt
import matplotlib.backends.backend_pdf
hs.preferences.GUIs.warn_if_guis_are_missing = False
hs.preferences.save()



# Load image for analysis

In [2]:
import imageio # supported file types https://imageio.readthedocs.io/en/stable/formats.html
import os
import sys

In [3]:
filename = "SEM-AuGaSb.tif" #image for analysis
elementsize = 128 #128 default for ~2000x2000 pixels image
xstep = 64 #64 default for ~2000x2000 pixels image
ystep = 64 #64 default for ~2000x2000 pixels image
NComponents = "auto" #automatically determine number of components for NMF decomposition from PCA Scree Plot
#NComponents = 3 #specify number of components for NMF decomposition
ImageRescale = False # put True if you wish to rescale your image to 2048pixels width

In [4]:
import time
start_time = time.time()

In [5]:
filetype = os.path.splitext(filename)[-1]

In [6]:
if filetype == '.jpg' or filetype == '.png' or filetype == '.bmp': #color images
    imdata = imageio.imread(filename,as_gray=True)
    print(filename, "read")
elif filetype == '.tif' or filetype == '.tiff': #grayscale images
    imdata = imageio.imread(filename)
    print(filename, "read")
else:
    print("not supported file type")
    sys.exit()

SEM-AuGaSb.tif read


In [7]:
im = hs.signals.Signal2D(imdata)

In [8]:
im.metadata.General.title = filename

## Optionally Rescale Image to 2048 width

In [9]:
if ImageRescale == True:
    imshape = im.axes_manager.signal_shape
    wscale = imshape[0]/2048.
    im = im.rebin(scale=[wscale, wscale])
    imdata = im.data

In [10]:
im.plot(scalebar=False, axes_off=True)

<IPython.core.display.Javascript object>

# Moving Window via NumPy as_strided

## divide image into parts of elementsize and move it in x and y direction by xstep and ystep

In [11]:
import numpy as np
from numpy.lib.stride_tricks import as_strided

In [12]:
ws = np.arange(elementsize*elementsize).reshape(elementsize, elementsize) # shape of the elements on which you want to perform the operation (e.g. Fourier Transform)


In [13]:
imdataW = as_strided(imdata, shape=(int((imdata.shape[0]-ws.shape[0]+1)/xstep),int((imdata.shape[1]-ws.shape[1]+1)/ystep),ws.shape[0],ws.shape[1]), strides=(imdata.strides[0]*xstep,imdata.strides[1]*ystep,imdata.strides[0],imdata.strides[1]))

In [None]:
#imWindow = hs.signals.Signal2D(imdataW)

In [None]:
#imWindow.plot(cmap='plasma', axes_ticks=False, scalebar=False, axes_off=True) #plot divided image

# Compute Hanning Window Power Spectrum (FFT) from Local Window Data

In [14]:
hanningf = np.hanning(elementsize)
hanningWindow2d = np.sqrt(np.outer(hanningf, hanningf))

In [15]:
imdataWfft = np.fft.fftshift(np.abs(np.fft.fft2(hanningWindow2d*imdataW))**2, axes=(2,3))

In [16]:
imdataWfft = imdataWfft+10000 #adding offset to prevent 0

In [17]:
imdataWfft = np.log(np.abs(imdataWfft))

In [18]:
imWindowFFT = hs.signals.Signal2D(imdataWfft)

In [None]:
#imWindowFFT.plot(cmap='plasma', axes_ticks=False, scalebar=False, axes_off=True) #plot 4D local FFT data

# Now Machine Learning on Local FFT 4D Data

## Perform PCA to determine number of components in the Local FFT data from Scree Plot

### Look for inflection point in Scree Plot

In [19]:
imWindowFFT.decomposition()

In [20]:
#imWindowFFT.plot_explained_variance_ratio(n=30, xaxis_type='number') #plot PCA Scree Plot

In [21]:
screedata = imWindowFFT.get_explained_variance_ratio().data

## Automatically Analyze PCA Scree Plot

### compute gradient on Scree Plot Data

In [22]:
grad = np.gradient(screedata)

In [None]:
#gradS = hs.signals.Signal1D(grad)

In [None]:
#gradS.plot()

### find local maxima of gradient

In [None]:
from scipy.signal import argrelextrema

In [None]:
gradLocalMaxima0 = argrelextrema(grad, np.greater)

In [None]:
gradLocalMaxima = [x + 1 for x in gradLocalMaxima0[0]] # add 1 due to the array indexing from 0

In [None]:
print("Estimated candidates for number of components from PCA Scree Plot\n", gradLocalMaxima)

In [None]:
if NComponents == "auto":
    NComponents = gradLocalMaxima[0]

In [None]:
print("Taking", NComponents, "components for NMF Decomposition") #estimated number of components (first local maximum)

In [None]:
imWindowFFT.plot_explained_variance_ratio(n=30, xaxis_type='number', threshold=NComponents, hline=True) #plot PCA Scree Plot

## Perform Decomposition on the Local FFT data by NMF

### you have to provide number of components in output_dimension (e.g. from PCA Scree Plot)

In [None]:
imWindowFFT.decomposition(algorithm="nmf", output_dimension=NComponents)

In [None]:
#imWindowFFT.plot_decomposition_results()

# Nice color plotting

## Loadings

In [None]:
loadingsS = imWindowFFT.get_decomposition_loadings()

In [None]:
hs.plot.plot_images(loadingsS,cmap='viridis', scalebar=None, axes_decor='off')

## Factors

In [None]:
factorsS =  imWindowFFT.get_decomposition_factors()

In [None]:
hs.plot.plot_images(factorsS,cmap='plasma', scalebar=None, axes_decor='off')

# Export Data

In [None]:
filebase = os.path.splitext(filename)[0]
filebase = filebase +"-es"+str(elementsize)+"-xs"+str(xstep)+"-ys"+str(ystep)

if ImageRescale == True:
    filebase = filebase+"-Rescaled"

if ImageRescale == False:
    filebase = filebase+"-Original"

In [None]:
loadingsS.save(filebase+"-Loadings_NMF"+str(NComponents)+".tif", overwrite=True) # tif for ImageJ/FIJI open via BioFormats

In [None]:
factorsS.save(filebase+"-Factors_NMF"+str(NComponents)+".tif", overwrite=True) # tif for ImageJ/FIJI open via BioFormats

In [None]:
#imWindowFFT.save(filebase+"-Loadings_NMF"+str(NComponents)+"Data")

# Create PDF Report File

In [None]:
pdf = matplotlib.backends.backend_pdf.PdfPages(filebase+"-NMF"+str(NComponents)+"-Report"+".pdf")

for fig in range(1, plt.gcf().number + 1):
    pdf.savefig( fig )
pdf.close()

In [None]:
print("Analysis time", "--- %s seconds ---" % (time.time() - start_time))