In [None]:
# TODO: start with fcc image, then see transition and all these features
# then add more images, Ti for example, and then add Fe bcc as final example.
# maybe precalculate the hidden reps
# ***Add visualization feature where can hover over the images and sees them.***
# Better to provide pre-calculated values and then some code how to do it for 
# new images.

This notebook demonstrates how to use UMAP, a manifold learning algorithm, to inspect the internal neural-network representations of AI-STEM that are learned during training. 

First we import the required packages.

In [None]:
! pip install 'git+https://github.com/AndreasLeitherer/ai4stem.git'
! pip install tensorflow
! pip install opencv-python
! pip install umap-learn
! pip install bokeh

In [None]:
import os
# tensorflow info/warnings switched off
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from tensorflow.keras.models import Model

from ai4stem.utils.utils_data import load_pretrained_model, load_example_image
from ai4stem.utils.utils_prediction import predict

from ai4stem.utils.utils_fft import calc_fft
from ai4stem.utils.utils_prediction import localwindow
from ai4stem.utils.utils_nn import decode_preds, predict_with_uncertainty
from ai4stem.utils.utils_data import load_class_dicts

import numpy as np

import umap

import matplotlib
matplotlib.rcParams.update({'font.size': 10})

import matplotlib.pyplot as plt

import pandas as pd

import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

Next, we load an example image, here Fe bcc in [100] orientation:

In [None]:
# load image
image = load_example_image()
image_name = 'Fe_bcc'
plt.imshow(image, cmap='gray')
plt.show()

Now we want to investigate how the AI-STEM's internal neural-network representations represent local fragments of this image, for which we first perform the fragmentation and desciptor-calculation steps (as done in the quickstart [notebook](https://colab.research.google.com/github/AndreasLeitherer/ai4stem/blob/main/notebooks/Application_of_pretrained_model.ipynb)):

In [None]:
# pixel/Angstrom relation
pixel_to_angstrom = 0.1245
# AI-STEM parameters
window_size = 12.
stride_size = [36, 36]
# convert window [Angstrom] to window [pixels]
adapted_window_size = int(window_size * (1. / pixel_to_angstrom))

logger.info('Fragmentation.')
# calc fft
sliced_images, spm_pos, ni, nj = localwindow(image, 
                                             stride_size=stride_size, 
                                             pixel_max=adapted_window_size)

logger.info('Calculate FFT-HAADF descriptor.')
fft_descriptors = []
for im in sliced_images:
    fft_desc = calc_fft(im, sigma=None, thresholding=True)
    fft_descriptors.append(fft_desc)
    
# reshape such that matches model input shape
data = np.array([np.stack([_]) for _ in fft_descriptors])
data = np.moveaxis(data, 1, -1)
logger.info('Finished.')

Now we want to extract the neural-network representations, where we inspect the last layer before classification is performed (this layer's name is 'Dense_1'). We first load the pretrained model and then truncate it:

In [None]:
# Load pretrained model
model = load_pretrained_model()

# Define model, where remove last classification layer
inputs = model.input
# select layer before last classification layer
# as new final layer:
outpout_layer_name = 'Dense_1' 
outputs = model.get_layer(outpout_layer_name).output
intermediate_layer_model = Model(inputs=inputs,
                                 outputs=outputs)
intermediate_layer_model.summary()

Using this truncated model, we have access to the hidden representations (expected exeuction time ~30 seconds):

In [None]:
# Compute representations
nn_representations = decode_preds(data, intermediate_layer_model, n_iter=100)

Now we can apply the Uniform Manifold Approximation and Projection (UMAP) algorithm to visualize the hidden space. We explore some of the most important parameters in UMAP and visualize them in the following cell:

In [None]:
# Apply UMAP

# most important parameter:
# number of neighbors employed
# for calculating low-dimensional (here, 2D)
# embedding, we try a range of values, where
# we use 1%, 5% and 10% of the data set size as # neighbors
n_neighbors_list = nn_representations.shape[0] * np.array([0.02, 0.05, 0.1]) 
n_neighbors_list = n_neighbors_list.astype(int)
# Choose minimum distance (0<min_dist<1.0) 
# which controls the spread of the points
# in the low-dimensional embedding (only for improving visualization)
min_dist_list = [0.1, 0.5, 0.9]
# choose Euclidean metric
# for measuring distance between data points
metric = 'euclidean'
# Choose 2 as embedding dimension
n_components = 2
# plotting parameters
s = 2.5
edgecolors = 'face'

data_for_fitting = nn_representations

results = dict()

for i, n_neighbors in enumerate(n_neighbors_list):
    for j, min_dist in enumerate(min_dist_list):
        logger.info('Calculate UMAP embedding for # neighbors = {}, min. distance = {}'.format(n_neighbors, min_dist))
        mapper = umap.UMAP(n_neighbors=n_neighbors, 
                           min_dist=min_dist,
                           metric=metric,
                           n_components=n_components).fit(data_for_fitting)
        embedding = mapper.transform(data_for_fitting)
        
        results[(n_neighbors, min_dist)] = embedding

Now let us visualize the results:

In [None]:
fig, axs = plt.subplots(len(min_dist_list), len(n_neighbors_list), figsize=(15, 15))
for i, n_neighbors in enumerate(n_neighbors_list):
    for j, min_dist in enumerate(min_dist_list):
        embedding = results[(n_neighbors, min_dist)]
        im = axs[i, j].scatter(embedding[:, 0], embedding[:, 1],
                               s=s)
        axs[i, j].set_aspect('equal')
        axs[i, j].axis('off')
        axs[i, j].set_title('# Neighbors = {},\n min_dist = {}'.format(n_neighbors, min_dist))

We can see that for small # neighbors, no patterns can be observed, while for larger values, two main clusters emerge. The minimium distance on the other hand controls the spread of the points in both clusters.

Now we would like to know what physical meaning these clusters have. For that, we calculate the AI-STEM assignments:

In [None]:
prediction, uncertainty = predict_with_uncertainty(data, model, 
                                                   model_type='classification', 
                                                   n_iter=100)

Let us first check which symmetry is assigned to the main clusters - by chooinsg the most likely label as the color scale:

In [None]:
from ai4stem.utils.utils_data import load_class_dicts

numerical_to_text_labels, text_to_numerical_labels = load_class_dicts()

In [None]:
import seaborn as sns
import pandas as pd
color_scale = prediction.argmax(axis=-1)
# relation between int class labels and text labels:
numerical_to_text_labels, text_to_numerical_labels = load_class_dicts()

fig, axs = plt.subplots(len(min_dist_list), len(n_neighbors_list), figsize=(15, 15))
for i, n_neighbors in enumerate(n_neighbors_list):
    for j, min_dist in enumerate(min_dist_list):
        embedding = results[(n_neighbors, min_dist)]
        #im = axs[i, j].scatter(embedding[:, 0], embedding[:, 1],
        #                       s=s, c=color_scale, cmap='tab10')
        df = pd.DataFrame({'e1': embedding[:, 0], 'e2': embedding[:, 1], 
                           'target': [numerical_to_text_labels[str(_)] for _ in color_scale]})
        im = sns.scatterplot(x="e1", y="e2", hue="target", 
                             data=df, ax=axs[i, j], palette='tab10', s=s)
        im.set(xticks=[])
        im.set(yticks=[])
        im.set(xlabel=None)
        im.set(ylabel=None)
        
        axs[i, j].set_aspect('equal')
        axs[i, j].legend(loc='lower right')
        # sns.move_legend(axs[i, j], "lower left", bbox_to_anchor=(1, 0.5))
        axs[i, j].set_title('# Neighbors = {},\n min_dist = {}'.format(n_neighbors, min_dist))
        #fig.colorbar(im, ax=axs[i, j])

We can see that the two clusters are assigned the same label with some exceptions as one can see in the sub-clusters that appear. Based on the assignmnets, we can already infer that these correspond to the interface region. To further confirm that, we also use the mutual information as color scale:

In [None]:
import seaborn as sns
import pandas as pd
color_scale = uncertainty['mutual_information']

fig, axs = plt.subplots(len(min_dist_list), len(n_neighbors_list), figsize=(15, 15))
for i, n_neighbors in enumerate(n_neighbors_list):
    for j, min_dist in enumerate(min_dist_list):
        embedding = results[(n_neighbors, min_dist)]
        im = axs[i, j].scatter(embedding[:, 0], embedding[:, 1],
                               s=s, c=color_scale, cmap='hot')

        axs[i, j].set_xticks([])
        axs[i, j].set_yticks([])
        
        axs[i, j].set_aspect('equal')
        axs[i, j].set_title('# Neighbors = {},\n min_dist = {}'.format(n_neighbors, min_dist))
        fig.colorbar(im, ax=axs[i, j])

We have thus visualized how the network separaters bulk and interface regions.

To add one more tool for visualization, please find the following hover plot:

In [None]:
import cv2
from io import BytesIO
from PIL import Image
import base64
! pip install bokeh
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper
from bokeh.palettes import Spectral10
output_notebook()

def embeddable_image(data):
    data = cv2.normalize(data, None,
                       alpha=0, beta=1,
                       norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    
    #img_data = 255 - 15 * data#.astype(np.uint8)
    #image = Image.fromarray(img_data, mode='L')
    # image = image.convert("L")
    data = (255 * data).astype(np.uint8)
    image = Image.fromarray(data)
    image = image.convert("L")
    buffer = BytesIO()
    image.save(buffer, format='jpeg')
    for_encoding = buffer.getvalue()
    return 'data:image/png;base64,' + base64.b64encode(for_encoding).decode()


digits = {}
digits['target'] = prediction.argmax(axis=-1)
digits['target_names'] = [numerical_to_text_labels[str(_)] for _ in prediction.argmax(axis=-1)]
digits['images'] = sliced_images

#####################################
digits_df = pd.DataFrame(embedding, columns=('x', 'y'))
digits_df['digit'] = [str(x) for x in digits['target']]
digits_df['image'] = list(map(embeddable_image, digits['images']))

datasource = ColumnDataSource(digits_df)
color_mapping = CategoricalColorMapper(factors=[x for x in np.unique(digits['target_names'])],
                                       palette=Spectral10)

plot_figure = figure(
    title='UMAP projection Fe bcc [100] HAADF STEM image',
    plot_width=600,
    plot_height=600,
    tools=('pan, wheel_zoom, reset')
)

plot_figure.add_tools(HoverTool(tooltips="""
<div>
    <div>
        <img src='@image' style='float: left; margin: 5px 5px 5px 5px'/>
    </div>
    <div>
        <span style='font-size: 16px; color: #224499'>Label:</span>
        <span style='font-size: 18px'>@digit</span>
    </div>
</div>
"""))

plot_figure.circle(
    'x',
    'y',
    source=datasource,
    color=dict(field='digit', transform=color_mapping),
    line_alpha=0.6,
    fill_alpha=0.6,
    size=10
)
show(plot_figure)

The question is now: what happens if we consider different interfaces? Will they be assigned the same cluster? Does our choice of training set and optimization routine make it impossible to distinguish different interface types?

The answer is no - we can distinguish different interfaces, and we will demonstrate that in the following - by considering three experimental images:

In [None]:
download_link_fcc = 'https://www.dropbox.com/s/flfy5qe1qxv47t6/Cu_fcc_111.npy?dl=0'
download_link_bcc = 'https://www.dropbox.com/s/ukab367rktmddse/Fe_bcc_100.npy?dl=0'
download_link_hcp = 'https://www.dropbox.com/s/q4rvqcy87u3ath9/Ti_hcp_0001.npy?dl=0'

!wget -q $download_link_fcc -O 'Cu_fcc_100.npy'
!wget -q $download_link_bcc -O 'Fe_bcc_100.npy'
!wget -q $download_link_hcp -O 'Ti_hcp_0001.npy'

images = [np.load('Cu_fcc_100.npy'),
          np.load('Fe_bcc_100.npy'),
          np.load('Ti_hcp_0001.npy')]

image_names = ['Cu_fcc_100',
               'Fe_bcc_100',
               'Ti_hcp_0001']

pixel_to_angstrom= [0.08805239,
                    0.12452489,
                    0.12452489]

adapted_window_sizes = [int(window_size * (1. / ratio)) for ratio in  pixel_to_angstrom]


fig, axs = plt.subplots(1, 3, figsize=(20,20))
axs[0].imshow(images[0], cmap='gray')
axs[0].set_title('Cu fcc [100]')
axs[1].imshow(images[1], cmap='gray')
axs[1].set_title('Fe bcc [100]')
axs[2].imshow(images[2], cmap='gray')
axs[2].set_title('Ti hcp [0001]')
plt.show()

Next, we load precalculated neural-network representations:

In [None]:
url_nn_rep_fcc = 'https://www.dropbox.com/s/e4ny6a1ird1v7u8/nn_representations_Cu_fcc_100.npy?dl=0'
url_nn_rep_bcc = 'https://www.dropbox.com/s/wbpjgiwyd0iozgm/nn_representations_Fe_bcc_100.npy?dl=0'
url_nn_rep_hcp = 'https://www.dropbox.com/s/l59chdveknm4mq5/nn_representations_Ti_hcp_0001.npy?dl=0'
    
!wget -q $url_nn_rep_fcc -O 'nn_rep_Cu_fcc_100.npy'
!wget -q $url_nn_rep_bcc -O 'nn_rep_Fe_bcc_100.npy'
!wget -q $url_nn_rep_hcp -O 'nn_rep_Ti_hcp_0001.npy'

To calculate these representations, we employed a 12x12 pixels stride for Cu and Ti, while reducing the stride to 6x6 pixels for Fe. A window size of 12 Angstrom is selected (corresponding to 96 pixels for Fe, Ti and 136 for Cu).

Next, we concatenate these representations in order to calculate the embedding into 2D via UMAP:

In [None]:
nn_representations = [np.load('nn_rep_{}.npy'.format(_)) for _ in image_names]
nn_representations_combined = np.concatenate(nn_representations, axis=0)
print(nn_representations_combined.shape)

To be able to conduct a similar analysis as done for the Fe bcc [100] image before, we also load precalculated  assignments and uncertainty estimates:

In [None]:
url_assignments_fcc = 'https://www.dropbox.com/s/e0hav8kkufat57w/assignments_Cu_fcc_100.npy?dl=0'
url_assignments_bcc = 'https://www.dropbox.com/s/ovtadac1whxkm2g/assignments_Fe_bcc_100.npy?dl=0'
url_assignments_hcp = 'https://www.dropbox.com/s/ksrayk5dopizdqa/assignments_Ti_hcp_0001.npy?dl=0'

!wget -q $url_assignments_fcc -O 'assignments_Cu_fcc_100.npy'
!wget -q $url_assignments_bcc -O 'assignments_Fe_bcc_100.npy'
!wget -q $url_assignments_hcp -O 'assignments_Ti_hcp_0001.npy'

assignments = [np.load('assignments_{}.npy'.format(_)) for _ in image_names]

url_uncertainty_fcc = 'https://www.dropbox.com/s/hy1hrr4rq22cqgu/uncertainty_Cu_fcc_100.npy?dl=0'
url_uncertainty_bcc = 'https://www.dropbox.com/s/y9g5r1u0k3h7vvs/uncertainty_Fe_bcc_100.npy?dl=0'
url_uncertainty_hcp = 'https://www.dropbox.com/s/e1mx9rjeyadg4m9/uncertainty_Ti_hcp_0001.npy?dl=0'

!wget -q $url_uncertainty_fcc -O 'uncertainty_Cu_fcc_100.npy'
!wget -q $url_uncertainty_bcc -O 'uncertainty_Fe_bcc_100.npy'
!wget -q $url_uncertainty_hcp -O 'uncertainty_Ti_hcp_0001.npy'

uncertainty = [np.load('uncertainty_{}.npy'.format(_)) for _ in image_names]

assignments_combined = np.concatenate(assignments, axis=0)
uncertainty_combined = np.concatenate(uncertainty, axis=0)
print(assignments_combined.shape, uncertainty_combined.shape)

Now we calculated the UMAP embedding, where we choose a specific number of neighbors and minimum distance value (other values may be easily tested, see above for the code to test different settings of, for instance, number of neighbors and minimum distance):

In [None]:
n_neighbors = 500
min_dist = 0.9
metric = 'euclidean'
n_components = 2
s = 1

data_for_fitting = nn_representations_combined

mapper = umap.UMAP(n_neighbors=n_neighbors, 
                   min_dist=min_dist,
                   metric=metric,
                   n_components=n_components).fit(data_for_fitting)
embedding = mapper.transform(data_for_fitting)

fig, axs = plt.subplots(figsize=(10, 10))
axs.scatter(embedding[:, 0], embedding[:, 1], s=s)
axs.set_xticks([])
axs.set_yticks([])

axs.set_aspect('equal')

Employing assignments and mutual information as color scales, respectively, supports the above claim of AI-STEM being able to separate not only different bulk symmetries or bulk from interface regions - but also different interface types, despite never being explicitly instructed to do so:

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(20, 20), gridspec_kw={'width_ratios': [0.91, 1]})

df = pd.DataFrame({'e1': embedding[:, 0], 'e2': embedding[:, 1], 
                   'target': assignments_combined})
im = sns.scatterplot(x="e1", y="e2", hue="target", 
                     data=df, ax=axs[0], palette='tab10', s=s)
im.set(xticks=[])
im.set(yticks=[])
im.set(xlabel=None)
im.set(ylabel=None)
axs[0].set_aspect('equal')
axs[0].legend(loc='lower right')
axs[0].set_title('Color scale: most likely class')


im = axs[1].scatter(embedding[:, 0], embedding[:, 1],
                    s=s, c=uncertainty_combined, cmap='hot')
axs[1].set_xticks([])
axs[1].set_yticks([])
axs[1].set_aspect('equal')
axs[1].set_title('Color scale: Bayesian uncertainty (mutual information)')
fig.colorbar(im, ax=axs[1], fraction=0.05)
plt.show()

# Apply UMAP and visualize results

In [None]:
# define dictionaries for visalizing
layer_activations = {'nn_rep': nn_representations}
targets = {'nn_rep': {'argmax': prediction.argmax(axis=-1), 'mut_info': uncertainty['mutual_information']}}

In [None]:
# Apply UMAP

# most important parameter:
# number of neighbors employed
# for calculating low-dimensional (here, 2D)
# embedding
n_neighbors_list = [5, 50, 200]
# choose Euclidean metric
# for measuring distance between data points
metric = 'euclidean'
# Choose 2 as embedding dimension
n_components = 2
# plotting parameters
s = 2.5
edgecolors = 'face'

for n_neighbors in n_neighbors_list:
    logger.info('Apply UMAP for number of neighbors = {}'.format(n_neighbors))

    for key in layer_activations:
        
        data_for_fitting = layer_activations[key]

        mapper1 = umap.UMAP(n_neighbors=n_neighbors, 
                            metric=metric, 
                            n_components=n_components).fit(data_for_fitting)
        embedding = mapper1.transform(data_for_fitting)

        for target in targets[key]:
            cmap = None
            nber_unique_colors = np.unique(targets[key][target]).size
            if target == 'mut_info':
                cmap = 'hot'
            else:
                cmap = 'tab10'
            fig, axs = plt.subplots(facecolor='white', figsize=(10, 10))
            df = pd.DataFrame({'e1': embedding[:, 0], 'e2': embedding[:, 1], 'target': targets[key][target]})
            
            if target == 'argmax_pred':
                df['target'] = [text_to_numerical_label[_] for _ in df['target'].values]
            
            im = axs.scatter(df['e1'].values, df['e2'].values, c=df['target'], cmap=cmap, s=s)
            axs.set_aspect('equal')
            fig.colorbar(im, ax=axs)
            plt.tight_layout()
            
            # uncomment if want to save
            #plt.savefig('{}_{}_nn_{}_embedding.png'.format(key, target, n_neighbors), dpi=200)
            #plt.close()
            plt.show()

# Repeat analysis for several images

In [None]:
download_link_fcc = 'https://www.dropbox.com/s/flfy5qe1qxv47t6/Cu_fcc_111.npy?dl=0'
download_link_bcc = 'https://www.dropbox.com/s/ukab367rktmddse/Fe_bcc_100.npy?dl=0'
download_link_hcp = 'https://www.dropbox.com/s/q4rvqcy87u3ath9/Ti_hcp_0001.npy?dl=0'

!wget -q $download_link_fcc -O 'Cu_fcc_100.npy'
!wget -q $download_link_bcc -O 'Fe_bcc_100.npy'
!wget -q $download_link_hcp -O 'Ti_hcp_0001.npy'

In [None]:
images = [np.load('Cu_fcc_100.npy'),
          np.load('Fe_bcc_100.npy'),
          np.load('Ti_hcp_0001.npy')]

image_names = ['Cu_fcc_100',
               'Fe_bcc_100',
               'Ti_hcp_0001']

window_size = 12. # units: Angstrom

pixel_to_angstrom= [0.08805239,
                    0.12452489,
                    0.12452489]

strides = [[12, 12], [6, 6], [12, 12]]
strides = [[36, 36], [36, 36], [36, 36]]

adapted_window_sizes = [int(window_size * (1. / ratio)) for ratio in  pixel_to_angstrom]
print(adapted_window_sizes)

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(20,20))
axs[0].imshow(images[0], cmap='gray')
axs[0].set_title('Cu fcc [100]')
axs[1].imshow(images[1], cmap='gray')
axs[1].set_title('Fe bcc [100]')
axs[2].imshow(images[2], cmap='gray')
axs[2].set_title('Ti hcp [0001]')
plt.show()

In [None]:
fft_descriptors = []
for idx, input_image in enumerate(images):
    
    stride_size = strides[idx]
    adapted_window_size = adapted_window_sizes[idx]
    image_name = image_names[idx]
    
    logger.info('Extract local fragments for image {}.'.format(image_name))
    # calc fft
    sliced_images, spm_pos, ni, nj = localwindow(input_image, 
                                                 stride_size=stride_size, 
                                                 pixel_max=adapted_window_size)

    logger.info('Calculate FFT-HAADF descriptor for image {}.'.format(image_name))
    
    for im in sliced_images:
        fft_desc = calc_fft(im, sigma=None, thresholding=True)
        fft_descriptors.append(fft_desc)

# reshape such that matches model input shape
data = np.array([np.stack([_]) for _ in fft_descriptors])
data = np.moveaxis(data, 1, -1)

In [None]:
# Define model, where remove last classification layer

inputs = model.input
# select layer before last classification layer
# as new final layer:
outpout_layer_name = 'Dense_1' 
outputs = model.get_layer(outpout_layer_name).output
intermediate_layer_model = Model(inputs=inputs,
                                 outputs=outputs)
intermediate_layer_model.summary()


# Compute representations
nn_representations = decode_preds(data, intermediate_layer_model, n_iter=10)
prediction, uncertainty = predict_with_uncertainty(data, model, 
                                                   model_type='classification', 
                                                   n_iter=10)

In [None]:
# define dictionaries for visalizing
layer_activations = {'nn_rep': nn_representations}
targets = {'nn_rep': {'argmax': prediction.argmax(axis=-1), 'mut_info': uncertainty['mutual_information']}}

In [None]:
# Apply UMAP

# most important parameter:
# number of neighbors employed
# for calculating low-dimensional (here, 2D)
# embedding
n_neighbors_list = [5, 50, 200]
# choose Euclidean metric
# for measuring distance between data points
metric = 'euclidean'
# Choose 2 as embedding dimension
n_components = 2
# plotting parameters
s = 2.5
edgecolors = 'face'

for n_neighbors in n_neighbors_list:
    logger.info('Apply UMAP for number of neighbors = {}'.format(n_neighbors))

    for key in layer_activations:
        
        data_for_fitting = layer_activations[key]

        mapper1 = umap.UMAP(n_neighbors=n_neighbors, 
                            metric=metric, 
                            n_components=n_components).fit(data_for_fitting)
        embedding = mapper1.transform(data_for_fitting)

        for target in targets[key]:
            cmap = None
            nber_unique_colors = np.unique(targets[key][target]).size
            if target == 'mut_info':
                cmap = 'hot'
            else:
                cmap = 'tab10'
            fig, axs = plt.subplots(facecolor='white', figsize=(10, 10))
            df = pd.DataFrame({'e1': embedding[:, 0], 'e2': embedding[:, 1], 'target': targets[key][target]})
            
            if target == 'argmax_pred':
                df['target'] = [text_to_numerical_label[_] for _ in df['target'].values]
            
            im = axs.scatter(df['e1'].values, df['e2'].values, c=df['target'], cmap=cmap, s=s)
            axs.set_aspect('equal')
            fig.colorbar(im, ax=axs)
            plt.tight_layout()
            
            # uncomment if want to save
            #plt.savefig('{}_{}_nn_{}_embedding.png'.format(key, target, n_neighbors), dpi=200)
            #plt.close()
            plt.show()

In [None]:
download_link_fcc = 'https://www.dropbox.com/s/flfy5qe1qxv47t6/Cu_fcc_111.npy?dl=0'

!wget -q $download_link_fcc -O 'Cu_fcc_100.npy'

image = np.load('Cu_fcc_100.npy')
image_name = 'Cu_fcc_100'
pixel_to_angstrom = 0.08805239

plt.imshow(image, cmap='gray')
plt.show()

In [None]:
prediction, uncertainty = predict_with_uncertainty(data, model, 
                                                   model_type='classification', 
                                                   n_iter=10)