# Benchmarks and Tests of Maps for Perovskite Oxides

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

In [None]:
default_fontsize = plt.rcParams['font.size']
publication_fontsize_large = 20
publication = False
if publication: plt.rcParams.update({'font.size': publication_fontsize_large})

## $LaSrCo_2O_6$ Surface

Generate the ASE.Atoms instance using the XYZ read function. 

In [None]:
from ase.io import read
LaSrCo2O6 = read('La.5Sr.5CoO3.xyz')

In [None]:
from ase.visualize import view
view(LaSrCo2O6, viewer='x3d')

In [None]:
LaSrCo2O6.positions

From the `ASE.Cell` and the generated `ASE.Atoms`, create a `MapSy.Grid` and a `MapSy.System`

In [None]:
from mapsy.data import Grid
grid: Grid = Grid(cell=LaSrCo2O6.cell)

In [None]:
from mapsy.data import System
system: System = System(grid, LaSrCo2O6, dimension=2, axis=2)

In this notebook we will consider a simplified contact space composed by points that are all in the same planes above and below the material. This contact space is generated starting from a smoothly-varying boundary function that is centered on the system's center of mass and varies smoothly from 1 to 0 at the specified `distance` from the center. The smoothness of the transition is controlled by the `spread` parameter. The resolution of the points is controlled by the `cutoff` keyword, with larger values corresponding to more fine grids. Given the 2D nature of the system (as specified in the `dimension` and `axis` keywords above) the generated points ideally correspond to two flat regions above and below the materials and perpendicural to the z Cartesian direction. The `side` keyword allows to choose the top (+1) or bottom (-1) region. Setting the `threshold` to a negative value will select the points that have the largest modulus of the gradient of the boundary function, it thus allows us to only focus on the points at the center of the selected transition region. 

In [None]:
from mapsy.io.parser import ContactSpaceGenerator, ContactSpaceModel
contactspacesettings = ContactSpaceModel.parse_obj({"mode": "system", "distance": 5.5, "spread": 1.0, "cutoff": 80, "threshold": -1, 'side':1})
contactspace = ContactSpaceGenerator(contactspacesettings).generate(system)

Check the generated contact space: each point is associated with a `probability` column that corresponds to the scaled modulus of the gradient of the contact space interface. For each point we also compute the indexes of the neighboring points and the region (topologically disconnected groups of points) to which they belong. Given the slab nature of the substrate, we expect to generate two regions of points on the two opposite faces of the material.

In [None]:
contactspace.data

In [None]:
from mapsy.symfunc.input import SymmetryFunctionsModel, SymFuncModel
from mapsy.symfunc.parser import SymmetryFunctionsParser
symfuncsettings_structural = SymmetryFunctionsModel.parse_obj({"functions": [SymFuncModel.parse_obj({"type":"ac","radius":4.5,"order":10,"compositional":False,"structural":True}),SymFuncModel.parse_obj({"type":"ac","radius":4.5,"order":10,"compositional":False,"structural":True,"radial":False})]})
symmetryfunctions_structural = SymmetryFunctionsParser(symfuncsettings_structural).parse()
symfuncsettings_compositional = SymmetryFunctionsModel.parse_obj({"functions": [SymFuncModel.parse_obj({"type":"ac","radius":4.5,"order":10,"compositional":True,"structural":False}),SymFuncModel.parse_obj({"type":"ac","radius":4.5,"order":10,"compositional":True,"structural":False,"radial":False})]})
symmetryfunctions_compositional = SymmetryFunctionsParser(symfuncsettings_compositional).parse()

In [None]:
from mapsy.maps import Maps
maps_structural = Maps(system,symmetryfunctions_structural,contactspace)
maps_compositional = Maps(system,symmetryfunctions_compositional,contactspace)    

In [None]:
data_structural = maps_structural.atcontactspace()
data_compositional = maps_compositional.atcontactspace()

We can visualize features to check how they look using `Maps.plot(feature: str)` or `Maps.plot(index: int)`. 

In [None]:
for index in range(20):
    fig, axes = maps_compositional.plot(index=index, cmap='Spectral', set_aspect='scaled', levels=20)
    axes.set_title(f'{maps_compositional.features[index]}')
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')
    plt.show()

## Dimensionality Reduction (PCA)

For visualization and post-processing purposes, perform dimensionality reduction on the generated features. We can first determine how many components we need, by checking the explained variance. 

In [None]:
fig, ax1, ax2 = maps_structural.reduce(scale=True)
if (publication) : 
    ax1.set_title('PCA')
    fig.tight_layout()

In [None]:
fig, ax1, ax2 = maps_compositional.reduce(scale=True)
if (publication) : 
    ax1.set_title('PCA')
    fig.tight_layout()

For visualization purposes we don't want more than 3 components, but it seems that for this specific system 5 components are enough to explain 99% of the variance, with 6 components able to fully explain the variance in the features. 

In [None]:
npca_structural = 4
maps_structural.reduce(npca_structural, scale=True)

In [None]:
npca_compositional = 5
maps_compositional.reduce(npca_compositional, scale=True)

We can visually inspect how the PCAs correlate with the Cartesian coordinates of the points (e.g., PCA3 distinguishes between HCP and FCC hollow sites)

In [None]:
for i in range(npca_structural):
    fig, axes = maps_structural.plot(feature=f'pca{i}', axes=['x', 'y'],cmap='Spectral', set_aspect='scaled', levels=20)
    axes.set_title(f'PCA {i+1}')
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')
    plt.show()

In [None]:
for i in range(npca_compositional):
    fig, axes = maps_compositional.plot(feature=f'pca{i}', axes=['x', 'y'],cmap='Spectral', set_aspect='scaled', levels=20)
    axes.set_title(f'PCA {i+1}')
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')
    plt.show()

We can also verify how the contact space is transformed (folded) in the symmetry function space. 

In [None]:
fig, ax = maps_structural.scatter(index=0, cmap='Spectral', axes=['x','y'], alpha=1., set_aspect='scaled', s=10)
ax.set_xlabel('x (Å)')
ax.set_ylabel('y (Å)')
if publication : ax.set_title("")
plt.show()

In [None]:
fig, ax = maps_compositional.scatter(index=0, cmap='Spectral', axes=['x','y'], alpha=1., set_aspect='scaled', s=10)
ax.set_xlabel('x (Å)')
ax.set_ylabel('y (Å)')
if publication : ax.set_title("")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 4*1))
fig.subplots_adjust(hspace=0.3)
ax.set_xlabel('Principal Component 1')
ax.set_ylabel('Principal Component 2')
ax.set_title("")
#ax.set_xlim(-8.5,13)
#ax.set_ylim(-1.5,4.8)
x1m = maps_structural.data['pca0'].values.astype(np.float64)
x2m = maps_structural.data['pca1'].values.astype(np.float64)
fm = maps_structural.data[maps_structural.features[0]].values.astype(np.float64)
fmin = np.min(fm)
fmax = np.max(fm)
scatter = ax.scatter(x1m,x2m,c=fm,vmin=fmin,vmax=fmax,cmap='Spectral',alpha=0.05,s=60,edgecolors='black')
ax.axis('on')
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 4*1))
fig.subplots_adjust(hspace=0.3)
ax.set_xlabel('Principal Component 1')
ax.set_ylabel('Principal Component 3')
ax.set_title("")
#ax.set_xlim(-8.5,13)
#ax.set_ylim(-1.5,4.8)
x1m = maps_compositional.data['pca0'].values.astype(np.float64)
x2m = maps_compositional.data['pca2'].values.astype(np.float64)
fm = maps_compositional.data[maps_compositional.features[0]].values.astype(np.float64)
fmin = np.min(fm)
fmax = np.max(fm)
scatter = ax.scatter(x1m,x2m,c=fm,vmin=fmin,vmax=fmax,cmap='Spectral',alpha=0.05,s=60,edgecolors='black')
ax.axis('on')
plt.show()

More in general, we can plot any feature in all possible 2D spaces of principal components:

In [None]:
fig, gs = maps_structural.scatter_pca_grid(index=0,cmap='Spectral',set_aspect='equal',s=70, alpha=0.05)
fig.tight_layout()

In [None]:
fig, gs = maps_compositional.scatter_pca_grid(index=0,cmap='Spectral',set_aspect='equal',s=70, alpha=0.05)
fig.tight_layout()

## Perform Clustering on Generated Features

Use SpectralClustering to find N clusters in the featured data. First, we run the analysis to identify promising values of N in terms of the Silouette scores and Davis-Bouldin indexes. Local maxima in Silouette score and local minima in Davis-Bouldin index correspond to better clustered data. 

In [None]:
ntries = 1
if publication: ntries = 100
fig, ax1, ax2 = maps_structural.cluster(maxclusters=16, ntries=ntries)
if publication: 
    ax1.set_title('')
    ax2.set_title('')
plt.show()

The analysis above suggests that $N=3$, $N=6$, and $N=10$ may provide better clusters. We can perform the analysis with one of these values:

In [None]:
nclusters = 11
maps_structural.cluster(nclusters)

Given the clusters, plot the connectivity matrix

In [None]:
plt.matshow(maps_structural.cluster_edges)
plt.show()

Given the clusters and the connectivity, find the high-symmetry sites

In [None]:
maps_structural.sites()

Visualize the results

In [None]:
fig, ax = maps_structural.scatter(feature='Cluster', categorical=True, s=20, alpha=0.8, set_aspect='scaled', centroids=True)
ax.set_xlabel('x (Å)')
ax.set_ylabel('y (Å)')
ax.set_title('Clusters')
if publication:
    ax.set_title('')
    ax.get_legend().remove()
plt.show()

We can now visualize the clustering in PCA space, together with the connectivity matrix and centroids

In [None]:
axes = ['pca0','pca1']
fig, ax = maps_structural.scatter(feature='Cluster', categorical=True, axes=axes, alpha=0.05, s=70, edgecolors='black', set_aspect='on')
G = nx.from_numpy_array(maps_structural.cluster_edges,create_using=nx.DiGraph,parallel_edges=False)
pos = maps_structural.data.loc[maps_structural.centroids,axes].values
weights = [ d['weight']/200 for (u, v, d) in G.edges(data=True)]
nx.draw(G, pos, node_size=maps_structural.cluster_sizes, width=weights, ax=ax, alpha=0.5)
limits=ax.axis('on') # turns on axis
ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)
ax.set_xlabel('Principal Component 1')
ax.set_ylabel('Principal Component 2')
ax.set_title('Clusters')
if publication:
    ax.set_title('')
    ax.get_legend().remove()
plt.show()

In [None]:
ntries = 1
if publication: ntries = 100
fig, ax1, ax2 = maps_compositional.cluster(maxclusters=16, ntries=ntries)
if publication: 
    ax1.set_title('')
    ax2.set_title('')
plt.show()

The analysis above suggests that $N=5$, and $N=13$ may provide better clusters. We can perform the analysis with one of these values:

In [None]:
nclusters = 13
maps_compositional.cluster(nclusters)

In [None]:
maps_compositional.sites()

In [None]:
fig, ax = maps_compositional.scatter(feature='Cluster', categorical=True, s=20, alpha=0.8, set_aspect='scaled', centroids=True)
ax.set_xlabel('x (Å)')
ax.set_ylabel('y (Å)')
ax.set_title('Clusters')
if publication:
    ax.set_title('')
    ax.get_legend().remove()
plt.show()

In [None]:
axes = ['pca0','pca2']
fig, ax = maps_compositional.scatter(feature='Cluster', categorical=True, axes=axes, alpha=0.05, s=70, edgecolors='black', set_aspect='on')
G = nx.from_numpy_array(maps_compositional.cluster_edges,create_using=nx.DiGraph,parallel_edges=False)
pos = maps_compositional.data.loc[maps_compositional.centroids,axes].values
weights = [ d['weight']/200 for (u, v, d) in G.edges(data=True)]
nx.draw(G, pos, node_size=maps_compositional.cluster_sizes, width=weights, ax=ax, alpha=0.5)
limits=ax.axis('on') # turns on axis
ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)
ax.set_xlabel('Principal Component 1')
ax.set_ylabel('Principal Component 3')
ax.set_title('Clusters')
if publication:
    ax.set_title('')
    ax.get_legend().remove()
plt.show()