# NESM Python Part 4 - Advanced Topics

- Deep learning with Tensorflow
- Our image analysis pipeline at a glance
- Dask for out of memory computing
- Classical machine learning with `scikit-learn`

In [26]:
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
from mpl_interactions import hyperslicer
%matplotlib widget

## Dask for out of memory computing



In [4]:
memory = 8e9 #8GB 
pixels = 1024*1024
bytes_per_pix = 2 #16 bit unsigned ints

In [8]:
memory/(pixels*bytes_per_pix) #images you can have in memory

3814.697265625

That seems like a lot but that corresponds to less than

(20 Time points) x (10 Positions) x (4 Channels) x (5 z-slices)


**Enter Dask Array**

In [9]:
import dask.array as da

In [14]:
darr = da.random.random((10000,1024,1024))

In [None]:
darr

In [16]:
darr.mean(0)

Unnamed: 0,Array,Chunk
Bytes,8.39 MB,131.07 kB
Shape,"(1024, 1024)","(128, 128)"
Count,6016 Tasks,64 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 8.39 MB 131.07 kB Shape (1024, 1024) (128, 128) Count 6016 Tasks 64 Chunks Type float64 numpy.ndarray",1024  1024,

Unnamed: 0,Array,Chunk
Bytes,8.39 MB,131.07 kB
Shape,"(1024, 1024)","(128, 128)"
Count,6016 Tasks,64 Chunks
Type,float64,numpy.ndarray


In [17]:
(darr - darr.min())/(darr.std())

Unnamed: 0,Array,Chunk
Bytes,83.89 GB,32.77 MB
Shape,"(10000, 1024, 1024)","(250, 128, 128)"
Count,13543 Tasks,2560 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 83.89 GB 32.77 MB Shape (10000, 1024, 1024) (250, 128, 128) Count 13543 Tasks 2560 Chunks Type float64 numpy.ndarray",1024  1024  10000,

Unnamed: 0,Array,Chunk
Bytes,83.89 GB,32.77 MB
Shape,"(10000, 1024, 1024)","(250, 128, 128)"
Count,13543 Tasks,2560 Chunks
Type,float64,numpy.ndarray


In [19]:
from dask.distributed import Client

In [21]:
client = Client()
client.cluster

Perhaps you already have a cluster running?
Hosting the HTTP server on port 64232 instead


VBox(children=(HTML(value='<h2>LocalCluster</h2>'), HBox(children=(HTML(value='\n<div>\n  <style scoped>\n    …

In [22]:
out = darr.mean(0).compute()

In [23]:
out

array([[0.49719658, 0.50459197, 0.50002816, ..., 0.50171562, 0.49793277,
        0.497131  ],
       [0.49980782, 0.50553955, 0.5026381 , ..., 0.50375397, 0.49803832,
        0.49861827],
       [0.50271469, 0.49597537, 0.50582245, ..., 0.50005791, 0.4990012 ,
        0.50259924],
       ...,
       [0.49964408, 0.50411596, 0.50376439, ..., 0.50066666, 0.50224443,
        0.50479198],
       [0.49369245, 0.50180242, 0.50432704, ..., 0.49566215, 0.49789641,
        0.49495108],
       [0.49516304, 0.50064357, 0.5003209 , ..., 0.49884902, 0.50081065,
        0.49663388]])

In [25]:
xr.DataArray(darr, dims=['T','Y','X'], coords={'T':np.linspace(0,7*1440, 10000)})

Unnamed: 0,Array,Chunk
Bytes,83.89 GB,32.77 MB
Shape,"(10000, 1024, 1024)","(250, 128, 128)"
Count,2560 Tasks,2560 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 83.89 GB 32.77 MB Shape (10000, 1024, 1024) (250, 128, 128) Count 2560 Tasks 2560 Chunks Type float64 numpy.ndarray",1024  1024  10000,

Unnamed: 0,Array,Chunk
Bytes,83.89 GB,32.77 MB
Shape,"(10000, 1024, 1024)","(250, 128, 128)"
Count,2560 Tasks,2560 Chunks
Type,float64,numpy.ndarray


## PCA on Hyperspectral SRS imaging data

**What is PCA?**

**What is SRS?**



In [27]:
import io
import requests

In [28]:
# Get the dataset directly from github
response = requests.get(
    "https://github.com/jrussell25/data-sharing/raw/master/srs_beads.npy"
)
response.raise_for_status()
beads = np.load(io.BytesIO(response.content))

In [32]:
# Define the coordinates for the xarray as a dict of name:array pairs
# Wns = Wns is relevant spectroscopic unit in cm^-1 as above
# X,Y = actual dimensions of the images in microns from microscope metadata
coords = {'wavenums':np.linspace(2798.65, 3064.95, beads.shape[0]),
          'X':np.linspace(0, 386.44, 512),
          'Y':np.linspace(0, 386.44,512)}

x_beads = xr.DataArray(beads, dims=coords.keys(), coords=coords)

In [34]:
plt.figure()
ctrls = hyperslicer(x_beads)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

VBox(children=(HBox(children=(IntSlider(value=0, description='wavenums', max=125, readout=False), Label(value=…

In [35]:
from sklearn.decomposition import PCA

In [56]:
pca = PCA(n_components=10)

In [57]:
pcs = pca.fit_transform(beads.reshape( beads.shape[0], -1).T)

In [70]:
beads.shape

(126, 512, 512)

In [63]:
plt.figure()
plt.plot(x_beads['wavenums'],pca.components_[:3].T)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [65]:
plt.figure()
plt.plot(pca.explained_variance_ratio_)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x1716f8460>]

In [74]:
rgb = pcs[...,:3].reshape(512, 512,3)
rgb = (rgb-rgb.min(0).min(0))
rgb = rgb/rgb.max(0).max(0)

In [75]:
plt.figure()
plt.imshow(rgb)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.image.AxesImage at 0x17170e9d0>