# Testing the Effects of the Model's Hyperparameters

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

In [None]:
default_fontsize = plt.rcParams['font.size']
publication_fontsize_large = 20
publication = True
if publication: plt.rcParams.update({'font.size': publication_fontsize_large})

## Benchmark Systems Setup

In [None]:
from mapsy.data import Grid
from mapsy.data import System

Pt (111) surface

In [None]:
from ase.build import fcc111
Pt111 = fcc111("Pt",size=(4,4,3),a=3.94,orthogonal=True,periodic=True,vacuum = 10)
grid111: Grid = Grid(cell=Pt111.cell)
system111: System = System(grid111, Pt111, dimension=2, axis=2)

Pt (100) surface

In [None]:
from ase.build import fcc100
Pt100 = fcc100("Pt",size=(4,4,3),a=3.94,orthogonal=True,periodic=True,vacuum = 10)
grid100: Grid = Grid(cell=Pt100.cell)
system100: System = System(grid100, Pt100, dimension=2, axis=2)

## Base Hyperparameters Settings

In [None]:
from mapsy.io.parser import ContactSpaceGenerator, ContactSpaceModel
contactspacesettings = ContactSpaceModel.parse_obj({"mode": "system", "distance": 3.5, "spread": 1.0, "cutoff": 80, "threshold": -1, 'side':1})
contactspace_111_base = ContactSpaceGenerator(contactspacesettings).generate(system111)
contactspace_100_base = ContactSpaceGenerator(contactspacesettings).generate(system100)

In [None]:
from mapsy.symfunc.input import SymmetryFunctionsModel, SymFuncModel
from mapsy.symfunc.parser import SymmetryFunctionsParser
symfuncsettings = SymmetryFunctionsModel.parse_obj({"functions": [SymFuncModel.parse_obj({"type":"ac","radius":4.5,"order":10,"compositional":False,"structural":True}),SymFuncModel.parse_obj({"type":"ac","radius":4.5,"order":10,"compositional":False,"structural":True,"radial":False})]})
symmetryfunctions_base = SymmetryFunctionsParser(symfuncsettings).parse()

In [None]:
from mapsy.maps import Maps
maps_111_base = Maps(system111,symmetryfunctions_base,contactspace_111_base)
maps_100_base = Maps(system100,symmetryfunctions_base,contactspace_100_base)

In [None]:
data = maps_111_base.atcontactspace()
fig, axes = maps_111_base.plot(index=0, cmap='Spectral', set_aspect='scaled', levels=20)
axes.set_title(f'{maps_111_base.features[0]}')
axes.set_xlabel('x (Å)')
axes.set_ylabel('y (Å)')
plt.show()

In [None]:
npca_base = 2
scale_base = True
maps_111_base.reduce(npca_base, scale=scale_base)

In [None]:
for i in range(npca_base):
    fig, axes = maps_111_base.plot(feature=f'pca{i}', axes=['x', 'y'],cmap='Spectral', set_aspect='scaled', levels=20)
    axes.set_title(f'PCA {i+1}')
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')
    plt.show()

Clustering

In [None]:
ntries_base = 50
max_clusters_base = 10
fig, ax1, ax2 = maps_111_base.cluster(maxclusters=max_clusters_base, ntries=ntries_base)
ax1.set_title('')
ax2.set_title('')
plt.show()

# Contact Space Hyperparameters

## Effects of Cutoff

In [None]:
from mapsy.io.parser import ContactSpaceGenerator, ContactSpaceModel
cssettings_veryfine = ContactSpaceModel.parse_obj({"mode": "system", "distance": 3.5, "spread": 1.0, "cutoff": 160, "threshold": -1, 'side':1})
cssettings_fine = ContactSpaceModel.parse_obj({"mode": "system", "distance": 3.5, "spread": 1.0, "cutoff": 80, "threshold": -1, 'side':1})
cssettings_medium = ContactSpaceModel.parse_obj({"mode": "system", "distance": 3.5, "spread": 1.0, "cutoff": 40, "threshold": -1, 'side':1})
cssettings_coarse = ContactSpaceModel.parse_obj({"mode": "system", "distance": 3.5, "spread": 1.0, "cutoff": 20, "threshold": -1, 'side':1})
cssettings_verycoarse = ContactSpaceModel.parse_obj({"mode": "system", "distance": 3.5, "spread": 1.0, "cutoff": 10, "threshold": -1, 'side':1})
contacspacemodels = [cssettings_verycoarse, cssettings_coarse, cssettings_medium, cssettings_fine, cssettings_veryfine]
cutoffs = [10, 20, 40, 80, 160]

Generate maps for each contact space model

In [None]:
maps_testing = []
for cssettings in contacspacemodels:
    csg = ContactSpaceGenerator(cssettings).generate(system111)
    maps_testing.append(Maps(system111,symmetryfunctions_base,csg))
    data = maps_testing[-1].atcontactspace()

In [None]:
len(maps_testing[0].contactspace.data)

In [None]:
points = [ len(maps.contactspace.data) for maps in maps_testing ]
plt.plot(cutoffs, points, 'o-')
plt.xlabel('Cutoff (Ry)')
plt.title('Number of Contact Space Points')

In [None]:
positions = [ maps.contactspace.data.loc[0,'z'] for maps in maps_testing ]
plt.plot(cutoffs, positions, 'o-')
plt.plot(cutoffs, [Pt111.get_center_of_mass()[2] + 3.5]*len(cutoffs), 'k--', label='Center of Mass + 3.5 Å')
plt.xlabel('Cutoff (Ry)')
plt.legend(fontsize=14,frameon=False)
plt.title('Position Above the Slab (Å)')

In [None]:
for maps,cutoff in zip(maps_testing,cutoffs):
    fig, axes = maps.scatter(index=0, cmap='Spectral', set_aspect='scaled')
    axes.set_title(f'Cutoff = {cutoff} Ry')
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')
plt.show()

In [None]:
for maps,cutoff in zip(maps_testing,cutoffs):
    maps.reduce(npca_base, scale=scale_base)
    fig, axes = maps.plot(feature='pca1', axes=['x', 'y'],cmap='Spectral', set_aspect='scaled', levels=20)
    axes.set_title(f'PCA 2, Cutoff = {cutoff} Ry')
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')

In [None]:
for maps,cutoff in zip(maps_testing,cutoffs):
    fig, axes = maps.scatter(index=0, axes=['pca0','pca1'], cmap='Spectral', set_aspect='scaled')
    axes.set_title(f'Cutoff = {cutoff} Ry')
    axes.set_xlabel('Principal Component 1')
    axes.set_ylabel('Principal Component 2')

Cutoffs effects on clustering

In [None]:
for maps,cutoff in zip(maps_testing,cutoffs):
    fig, ax1, ax2 = maps.cluster(maxclusters=max_clusters_base, ntries=ntries_base)
    ax1.set_title(f'Cutoff = {cutoff} Ry')
    ax2.set_title('')
plt.show()

In [None]:
for maps in maps_testing:
    nclusters = 7
    maps.cluster(nclusters)
    maps.sites()

In [None]:
for maps,cutoff in zip(maps_testing,cutoffs):
    fig, ax = maps.scatter(feature='Cluster', categorical=True, s=20, alpha=0.8, set_aspect='scaled', centroids=True)
    ax.set_xlabel('x (Å)')
    ax.set_ylabel('y (Å)')
    ax.set_title(f'Cutoff = {cutoff} Ry')
plt.show()

In [None]:
axes = ['pca0','pca1']
for maps,cutoff in zip(maps_testing,cutoffs):
    fig, ax = maps.scatter(feature='Cluster', categorical=True, axes=axes, alpha=0.05, s=70, edgecolors='black', set_aspect='on')
    G = nx.from_numpy_array(maps.cluster_edges,create_using=nx.DiGraph,parallel_edges=False)
    pos = maps.data.loc[maps.centroids,axes].values
    weights = [ d['weight']/200 for (u, v, d) in G.edges(data=True)]
    nx.draw(G, pos, node_size=maps.cluster_sizes, width=weights, ax=ax, alpha=0.5)
    limits=ax.axis('on') # turns on axis
    ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)
    ax.set_xlabel('Principal Component 1')
    ax.set_ylabel('Principal Component 2')
    ax.set_title(f'Cutoff = {cutoff} Ry')
plt.show()

# Features Hyperparameters

## Effects of RCut

In [None]:
rcut_step = 0.5
rcut_min = Pt111.get_center_of_mass()[2] + 3.5 - np.max(Pt111.positions[:,2])
rcut_min = np.round(rcut_min,0)+rcut_step
rcut_max = np.min(np.array(Pt111.cell).diagonal())/2
rcuts = np.arange(rcut_min,rcut_max,rcut_step)
print(rcuts)

In [None]:
sfmodels = []
for rcut in rcuts:
    symfuncsettings = SymmetryFunctionsModel.parse_obj({"functions": [SymFuncModel.parse_obj({"type":"ac","radius":rcut,"order":10,"compositional":False,"structural":True}),SymFuncModel.parse_obj({"type":"ac","radius":rcut,"order":10,"compositional":False,"structural":True,"radial":False})]})
    sfmodels.append(SymmetryFunctionsParser(symfuncsettings).parse())

In [None]:
maps_testrcut = []
for symmetryfunctions in sfmodels:
    maps_testrcut.append(Maps(system111,symmetryfunctions,contactspace_111_base))
    data = maps_testrcut[-1].atcontactspace()

In [None]:
for maps,rcut in zip(maps_testrcut,rcuts):
    fig, axes = maps.scatter(index=0, cmap='Spectral', set_aspect='scaled', s=10)
    axes.set_title(f'RCut = {rcut} Å')
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')
plt.show()

In [None]:
for maps,rcut in zip(maps_testrcut,rcuts):
    maps.reduce(4, scale=scale_base)
    fig, axes = maps.plot(feature='pca1', axes=['x', 'y'],cmap='Spectral', set_aspect='scaled', levels=20)
    axes.set_title(f'Princ. Comp. 2, RCut = {rcut} Å')
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')

In [None]:
for maps,rcut in zip(maps_testrcut,rcuts):
    fig, axes = maps.scatter(index=0, axes=['pca0','pca1'], cmap='Spectral', set_aspect='scaled')
    axes.set_title(f'RCut = {rcut} Å')
    axes.set_xlabel('Principal Component 1')
    axes.set_ylabel('Principal Component 2')

In [None]:
ntries_base

In [None]:
rcuts[3:]

In [None]:
maps_testrcut[3:]

In [None]:
for maps,rcut in zip(maps_testrcut[3:],rcuts[3:]):
    fig, ax1, ax2 = maps.cluster(maxclusters=max_clusters_base, ntries=ntries_base)
#    fig, ax1, ax2 = maps.cluster(maxclusters=max_clusters_base, ntries=5)
    ax1.set_title(f'RCut = {rcut} Å')
    ax2.set_title('')
plt.show()

In [None]:
import matplotlib.cm as cm
# Number of lines
n_lines = len(maps_testrcut)
# Create a colormap
cmap = cm.get_cmap('plasma', n_lines)
for i,(maps,rcut) in enumerate(zip(maps_testrcut,rcuts)):
    best_db = maps.cluster_screening.loc[maps.cluster_screening.groupby('nclusters')['db_index'].idxmin()]
    plt.plot(best_db['nclusters'],best_db['db_index'],label=f'RCut = {rcut} Å',color=cmap(i/n_lines))
plt.legend(frameon=False,fontsize=14,loc='upper left')
plt.title('Davies-Bouldin Index')
plt.show()

In [None]:
for maps in maps_testrcut:
    nclusters = 7
    maps.cluster(nclusters)
    maps.sites()

In [None]:
for maps,rcut in zip(maps_testrcut,rcuts):
    fig, ax = maps.scatter(feature='Cluster', categorical=True, s=20, alpha=0.8, set_aspect='scaled', centroids=True)
    ax.set_xlabel('x (Å)')
    ax.set_ylabel('y (Å)')
    ax.set_title(f'RCut = {rcut} Å')
plt.show()

In [None]:
axes = ['pca0','pca1']
for maps,rcut in zip(maps_testrcut,rcuts):
    fig, ax = maps.scatter(feature='Cluster', categorical=True, axes=axes, alpha=0.05, s=70, edgecolors='black', set_aspect='on')
    G = nx.from_numpy_array(maps.cluster_edges,create_using=nx.DiGraph,parallel_edges=False)
    pos = maps.data.loc[maps.centroids,axes].values
    weights = [ d['weight']/200 for (u, v, d) in G.edges(data=True)]
    nx.draw(G, pos, node_size=maps.cluster_sizes, width=weights, ax=ax, alpha=0.5)
    limits=ax.axis('on') # turns on axis
    ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)
    ax.set_xlabel('Principal Component 1')
    ax.set_ylabel('Principal Component 4')
    ax.set_title(f'RCut = {rcut} Å')
plt.show()

## Effects of Chebychev Order

In [None]:
orders = range(5,26,5)
list(orders)

In [None]:
sfmodels_order = []
for order in orders:
    symfuncsettings = SymmetryFunctionsModel.parse_obj({"functions": [SymFuncModel.parse_obj({"type":"ac","radius":4.5,"order":order,"compositional":False,"structural":True,"radial":True})]})
    sfmodels_order.append(SymmetryFunctionsParser(symfuncsettings).parse())

In [None]:
maps_testorder = []
for symmetryfunctions in sfmodels_order:
    maps_testorder.append(Maps(system111,symmetryfunctions,contactspace_111_base))
    data = maps_testorder[-1].atcontactspace()

In [None]:
maps = maps_testorder[-1]
for order in range(25):
    fig, axes = maps.plot(index=order, axes=['x', 'y'],cmap='Spectral', set_aspect='scaled', levels=20)
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')

In [None]:
for maps,order in zip(maps_testorder,orders):
    maps.reduce(4, scale=scale_base)
    fig, axes = maps.plot(feature='pca2', axes=['x', 'y'],cmap='Spectral', set_aspect='scaled', levels=20)
    axes.set_title(f'Principal Component 3, Order = {order}')
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')

In [None]:
for maps,order in zip(maps_testorder,orders):
    fig, axes = maps.scatter(index=0, axes=['pca0','pca1'], cmap='Spectral', set_aspect='scaled')
    axes.set_title(f'Order = {order}')
    axes.set_xlabel('Principal Component 1')
    axes.set_ylabel('Principal Component 2')

In [None]:
for maps,order in zip(maps_testorder,orders):
#    fig, ax1, ax2 = maps.cluster(maxclusters=max_clusters_base, ntries=ntries_base)
    fig, ax1, ax2 = maps.cluster(maxclusters=max_clusters_base, ntries=5)
    ax1.set_title(f'Order = {order}')
    ax2.set_title('')
plt.show()

In [None]:
for maps in maps_testorder:
    nclusters = 7
    maps.cluster(nclusters)
    maps.sites()

In [None]:
for maps,order in zip(maps_testorder,orders):
    fig, ax = maps.scatter(feature='Cluster', categorical=True, s=20, alpha=0.8, set_aspect='scaled', centroids=True)
    ax.set_xlabel('x (Å)')
    ax.set_ylabel('y (Å)')
    ax.set_title(f'Order = {order}')
plt.show()

In [None]:
axes = ['pca0','pca1']
for maps,order in zip(maps_testorder,orders):
    fig, ax = maps.scatter(feature='Cluster', categorical=True, axes=axes, alpha=0.05, s=70, edgecolors='black', set_aspect='on')
    G = nx.from_numpy_array(maps.cluster_edges,create_using=nx.DiGraph,parallel_edges=False)
    pos = maps.data.loc[maps.centroids,axes].values
    weights = [ d['weight']/200 for (u, v, d) in G.edges(data=True)]
    nx.draw(G, pos, node_size=maps.cluster_sizes, width=weights, ax=ax, alpha=0.5)
    limits=ax.axis('on') # turns on axis
    ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)
    ax.set_xlabel('Principal Component 1')
    ax.set_ylabel('Principal Component 4')
    ax.set_title(f'Order = {order}')
plt.show()

## Angular, Radial, or Both

In [None]:
symfuncsettings = SymmetryFunctionsModel.parse_obj({"functions": [SymFuncModel.parse_obj({"type":"ac","radius":4.5,"order":25,"compositional":False,"structural":True,"radial":False})]})
sf_angular = SymmetryFunctionsParser(symfuncsettings).parse()
maps_angular = Maps(system111,sf_angular,contactspace_111_base)
data = maps_angular.atcontactspace()

In [None]:
maps = maps_angular
for order in range(25):
    fig, axes = maps.plot(index=order, axes=['x', 'y'],cmap='Spectral', set_aspect='scaled', levels=20)
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')

In [None]:
for order in orders:
    features = [ f'ACSF_AS_r4.5_{i:03.0f}' for i in range(order) ]
    maps_angular.features = features
    maps_angular.reduce(4, scale=scale_base)
    for i,pca in enumerate(['pca0','pca1','pca2']):
        fig, axes = maps_angular.plot(feature=pca, axes=['x', 'y'],cmap='Spectral', set_aspect='scaled', levels=20)
        axes.set_title(f'Principal Component {i+1}, Order = {order}')
        axes.set_xlabel('x (Å)')
        axes.set_ylabel('y (Å)')

In [None]:
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from matplotlib.gridspec import GridSpec
from itertools import combinations
def local_scatter_pca_grid(self, feature = None, index = None, set_aspect='on', **kwargs) -> None:
    # Check that contact space maps have been generated
    if self.data is None:
        raise RuntimeError("No contact space data available.")
    # Check if feature or index is provided and if it is valid
    if feature is not None:
        if feature not in self.data.columns:
            raise ValueError(f"Feature {feature} not found in maps data.")
    elif index is not None:
        if index >= len(self.features) or index < 0:
            raise ValueError(f"Index {index} out of bounds.")
        feature = self.features[index]
        print(f"Plotting feature {self.features[index]}")
    else:
        f = None # No feature provided
    if self.npca is None :
            raise ValueError(f"Missing principal components")
    if set_aspect not in ['on','off','equal','scaled']:
        raise ValueError(f"set_aspect must be one of ['on','off','equal','scaled']")
    # Step 1: Generate all combinations of PCA components
    component_pairs = list(combinations(range(self.npca), 2))
    pcalabels=[f'pca{i}' for i in range(self.npca)]
    pcamaxs = [self.data[pcalabels[i]].max() for i in range(self.npca)]
    pcamins = [self.data[pcalabels[i]].min() for i in range(self.npca)]
    pcaranges = [pcamaxs[i] - pcamins[i] for i in range(self.npca)]
    maxrange = max(pcaranges)
    pcaproportions = [pcaranges[i] / maxrange for i in range(self.npca)]
    xratios = [pcaproportions[i+1] for i in range(self.npca-1)]
    yratios = [pcaproportions[i] for i in range(self.npca-1)]
    y=self.data[feature].values.astype(np.float64)
    # Step 2: Create subplots
    n_cols = len(xratios)  # Set the number of columns in the subplot grid
    n_rows = len(yratios)
    fig = plt.figure(figsize=(12, 12))
    gs = GridSpec(n_cols, n_rows, figure=fig, height_ratios=yratios, width_ratios=xratios)
    # Step 3: Plot each upper triangle component pair
    for i in range(self.npca - 1):
        for j in range(i + 1, self.npca):
            ax = fig.add_subplot(gs[i, j-1])
            X1 = self.data[pcalabels[i]].values.astype(np.float64)
            X2 = self.data[pcalabels[j]].values.astype(np.float64)
            scatter = ax.scatter(X2, X1, c=y, edgecolor='k', **kwargs)
            ax.set_title(f'PC{j + 1} vs PC{i + 1}')
            ax.set_xlim(np.min(X2)-0.5, np.max(X2)+0.5)
            ax.set_ylim(np.min(X1)-0.5, np.max(X1)+0.5)
            ax.axis(set_aspect)
            ax.xaxis.set_major_locator(MultipleLocator(2.5))
            ax.yaxis.set_major_locator(MultipleLocator(2.5))
    # Step 4: Add color bar at the bottom left
    cbar_ax = fig.add_subplot(gs[n_rows-1, 0:n_rows-1])
    cbar_pos = cbar_ax.get_position().bounds
    colorbar = fig.colorbar(scatter, cax=cbar_ax, orientation='horizontal')
    colorbar.solids.set_alpha(1.0)
    # Step 5: Add Title in the remaining space
    if n_rows == 2:
        fig.suptitle(f"Maps of {self.features[index]} in PC Space", fontsize=22)
    else:
        title_ax = fig.add_subplot(gs[n_rows-2, 0:n_rows-2])  
        title_ax.text(0.5, 0.5, f"Maps of {self.features[index]}\n\n in PC Space", ha='center', va='center', fontsize=22)
        title_ax.axis('off')
    return fig, gs

In [None]:
for order in orders:
    features = [ f'ACSF_AS_r4.5_{i:03.0f}' for i in range(order) ]
    maps_angular.features = features
    maps_angular.reduce(3, scale=scale_base)
    fig, gs = local_scatter_pca_grid(maps_angular,index=0,cmap='Spectral', set_aspect='equal',s=70, alpha=0.05)
    fig.suptitle(f"Maps of ACSF_AS_r4.5_000 in PC Space for Oder = {order}", fontsize=22)
    fig.tight_layout()

In [None]:
sfmodels_type = []
types = ['radial', 'angular', 'both']
symfuncsettings = SymmetryFunctionsModel.parse_obj({"functions": [SymFuncModel.parse_obj({"type":"ac","radius":4.5,"order":10,"compositional":False,"structural":True,"radial":True})]})
sfmodels_type.append(SymmetryFunctionsParser(symfuncsettings).parse())
symfuncsettings = SymmetryFunctionsModel.parse_obj({"functions": [SymFuncModel.parse_obj({"type":"ac","radius":4.5,"order":10,"compositional":False,"structural":True,"radial":False})]})
sfmodels_type.append(SymmetryFunctionsParser(symfuncsettings).parse())
symfuncsettings = SymmetryFunctionsModel.parse_obj({"functions": [SymFuncModel.parse_obj({"type":"ac","radius":4.5,"order":10,"compositional":False,"structural":True}),SymFuncModel.parse_obj({"type":"ac","radius":4.5,"order":10,"compositional":False,"structural":True,"radial":False})]})
sfmodels_type.append(SymmetryFunctionsParser(symfuncsettings).parse())

In [None]:
maps_testtype = []
for symmetryfunctions in sfmodels_type:
    maps_testtype.append(Maps(system111,symmetryfunctions,contactspace_111_base))
    data = maps_testtype[-1].atcontactspace()

In [None]:
for maps,type_ in zip(maps_testtype,types):
    maps.reduce(4, scale=scale_base)
    fig, axes = maps.plot(feature='pca2', axes=['x', 'y'],cmap='Spectral', set_aspect='scaled', levels=20)
    axes.set_title(f'Principal Component 3, Type = {type_}')
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')

In [None]:
for maps,type_ in zip(maps_testtype,types):
    fig, axes = maps.scatter(index=0, axes=['pca0','pca1'], cmap='Spectral', set_aspect='scaled')
    axes.set_title(f'Type = {type_}')
    axes.set_xlabel('Principal Component 1')
    axes.set_ylabel('Principal Component 2')

In [None]:
for maps,type_ in zip(maps_testtype,types):
    fig, ax1, ax2 = maps.cluster(maxclusters=max_clusters_base, ntries=ntries_base)
    ax1.set_title(f'Type = {type_}')
    ax2.set_title('')
plt.show()

In [None]:
for maps in maps_testtype:
    nclusters = 8
    maps.cluster(nclusters)
    maps.sites()

In [None]:
for maps,type_ in zip(maps_testtype,types):
    fig, ax = maps.scatter(feature='Cluster', categorical=True, s=20, alpha=0.8, set_aspect='scaled', centroids=True)
    ax.set_xlabel('x (Å)')
    ax.set_ylabel('y (Å)')
    ax.set_title(f'Type = {type_}')
plt.show()

In [None]:
axes = ['pca0','pca1']
for maps,type_ in zip(maps_testtype,types):
    fig, ax = maps.scatter(feature='Cluster', categorical=True, axes=axes, alpha=0.05, s=70, edgecolors='black', set_aspect='on')
    G = nx.from_numpy_array(maps.cluster_edges,create_using=nx.DiGraph,parallel_edges=False)
    pos = maps.data.loc[maps.centroids,axes].values
    weights = [ d['weight']/200 for (u, v, d) in G.edges(data=True)]
    nx.draw(G, pos, node_size=maps.cluster_sizes, width=weights, ax=ax, alpha=0.5)
    limits=ax.axis('on') # turns on axis
    ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)
    ax.set_xlabel('Principal Component 1')
    ax.set_ylabel('Principal Component 4')
    ax.set_title(f'Type = {type_}')
plt.show()

# PCA Hyperparameters

In [None]:
fig, ax1, ax2 = maps_111_base.reduce(scale=scale_base)
if (publication) : 
    ax1.set_title('PCA')
    fig.tight_layout()

In [None]:
fig, ax1, ax2 = maps_111_base.reduce(scale=False)
if (publication) : 
    ax1.set_title('PCA')
    fig.tight_layout()

In [None]:
maps_111_base.reduce(4, scale=scale_base)

In [None]:
for i in range(4):
    fig, axes = maps_111_base.plot(feature=f'pca{i}', axes=['x', 'y'],cmap='Spectral', set_aspect='scaled', levels=20)
    axes.set_title(f'PCA {i+1}')
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')
    plt.show()

In [None]:
fig, gs = maps_111_base.scatter_pca_grid(index=0,cmap='Spectral',set_aspect='equal',s=70, alpha=0.05)
fig.tight_layout()

In [None]:
maps_111_base.reduce(4, scale=False)

In [None]:
for i in range(4):
    fig, axes = maps_111_base.plot(feature=f'pca{i}', axes=['x', 'y'],cmap='Spectral', set_aspect='scaled', levels=20)
    axes.set_title(f'PCA {i+1}')
    axes.set_xlabel('x (Å)')
    axes.set_ylabel('y (Å)')
    plt.show()

In [None]:
fig, gs = maps_111_base.scatter_pca_grid(index=0,cmap='Spectral',set_aspect='equal',s=70, alpha=0.05)
fig.tight_layout()

# Clustering Hyperparameters

In [None]:
fig, ax1, ax2 = maps_111_base.cluster(maxclusters=max_clusters_base, ntries=200)

In [None]:
cluster_screening_base = maps_111_base.cluster_screening.copy()

In [None]:
min_db = cluster_screening_base.groupby('nclusters').min()['db_index'].values
max_db = cluster_screening_base.groupby('nclusters').max()['db_index'].values
ax = cluster_screening_base.groupby('nclusters').min().plot(y='db_index',color='gray',label='')
cluster_screening_base.groupby('nclusters').max().plot(y='db_index',ax=ax,color='gray',label='')
ax.fill_between(cluster_screening_base.groupby('nclusters').min().index,min_db,max_db,alpha=0.2,color='gray')
cluster_screening_base.groupby('nclusters').median().plot(y='db_index',ax=ax,label='median')
cluster_screening_base.groupby('nclusters')['db_index'].apply(lambda x: x.mode().iloc[0]).plot(y='db_index',ax=ax,label='mode')
cluster_screening_base.plot.scatter('nclusters','db_index',ax=ax)
plt.legend(frameon=False)

In [None]:
nclusters = 9
cluster_screening_selection = cluster_screening_base[cluster_screening_base['nclusters']==nclusters]
rs_median = cluster_screening_selection.loc[(cluster_screening_selection-cluster_screening_selection.median()).abs().idxmin()['db_index']]['random_state']
rs_min = cluster_screening_selection.loc[cluster_screening_selection['db_index'].idxmin()]['random_state']
rs_max = cluster_screening_selection.loc[cluster_screening_selection['db_index'].idxmax()]['random_state']
random_states = [rs_min,rs_median,rs_max]
random_states_labels = ['Minimum','Median','Maximum']

In [None]:
for random_state,label in zip(random_states,random_states_labels):
    maps_111_base.cluster(nclusters,random_state=int(random_state))
    maps_111_base.sites()
    fig, ax = maps_111_base.scatter(feature='Cluster', categorical=True, s=20, alpha=0.8, set_aspect='scaled', centroids=True)
    ax.set_xlabel('x (Å)')
    ax.set_ylabel('y (Å)')
    ax.set_title(f'Random State = {label}')
    axes = ['pca0','pca1']
    fig, ax = maps_111_base.scatter(feature='Cluster', categorical=True, axes=axes, alpha=0.05, s=70, edgecolors='black', set_aspect='on')
    G = nx.from_numpy_array(maps_111_base.cluster_edges,create_using=nx.DiGraph,parallel_edges=False)
    pos = maps_111_base.data.loc[maps_111_base.centroids,axes].values
    weights = [ d['weight']/200 for (u, v, d) in G.edges(data=True)]
    nx.draw(G, pos, node_size=maps_111_base.cluster_sizes, width=weights, ax=ax, alpha=0.5)
    limits=ax.axis('on') # turns on axis
    ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)
    ax.set_xlabel('Principal Component 1')
    ax.set_ylabel('Principal Component 2')
    ax.set_title(f'Random State = {label}')

In [None]:
pca_features = [f'pca{i}' for i in range(4)]
fig, ax1, ax2 = maps_111_base.cluster(features=pca_features,maxclusters=max_clusters_base, ntries=200)

In [None]:
cluster_screening_pca = maps_111_base.cluster_screening.copy()

In [None]:
min_db = cluster_screening_pca.groupby('nclusters').min()['db_index'].values
max_db = cluster_screening_pca.groupby('nclusters').max()['db_index'].values
ax = cluster_screening_pca.groupby('nclusters').min().plot(y='db_index',color='gray',label='')
cluster_screening_pca.groupby('nclusters').max().plot(y='db_index',ax=ax,color='gray',label='')
ax.fill_between(cluster_screening_pca.groupby('nclusters').min().index,min_db,max_db,alpha=0.2,color='gray')
cluster_screening_pca.groupby('nclusters').median().plot(y='db_index',ax=ax,label='median')
cluster_screening_pca.groupby('nclusters')['db_index'].apply(lambda x: x.mode().iloc[0]).plot(y='db_index',ax=ax,label='mode')
cluster_screening_pca.plot.scatter('nclusters','db_index',ax=ax)
plt.legend(frameon=False)

In [None]:
nclusters = 9
cluster_screening_selection = cluster_screening_pca[cluster_screening_pca['nclusters']==nclusters]
rs_median = cluster_screening_selection.loc[(cluster_screening_selection-cluster_screening_selection.median()).abs().idxmin()['db_index']]['random_state']
rs_min = cluster_screening_selection.loc[cluster_screening_selection['db_index'].idxmin()]['random_state']
rs_max = cluster_screening_selection.loc[cluster_screening_selection['db_index'].idxmax()]['random_state']
random_states = [rs_min,rs_median,rs_max]
random_states_labels = ['Minimum','Median','Maximum']

In [None]:
for random_state,label in zip(random_states,random_states_labels):
    maps_111_base.cluster(nclusters,features=pca_features,random_state=int(random_state))
    maps_111_base.sites()
    fig, ax = maps_111_base.scatter(feature='Cluster', categorical=True, s=20, alpha=0.8, set_aspect='scaled', centroids=True)
    ax.set_xlabel('x (Å)')
    ax.set_ylabel('y (Å)')
    ax.set_title(f'Random State = {label}')
    axes = ['pca0','pca1']
    fig, ax = maps_111_base.scatter(feature='Cluster', categorical=True, axes=axes, alpha=0.05, s=70, edgecolors='black', set_aspect='on')
    G = nx.from_numpy_array(maps_111_base.cluster_edges,create_using=nx.DiGraph,parallel_edges=False)
    pos = maps_111_base.data.loc[maps_111_base.centroids,axes].values
    weights = [ d['weight']/200 for (u, v, d) in G.edges(data=True)]
    nx.draw(G, pos, node_size=maps_111_base.cluster_sizes, width=weights, ax=ax, alpha=0.5)
    limits=ax.axis('on') # turns on axis
    ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)
    ax.set_xlabel('Principal Component 1')
    ax.set_ylabel('Principal Component 4')
    ax.set_title(f'Random State = {label}')

In [None]:
maps_111_base.reduce(4, scale=True)

In [None]:
pca_features = [f'pca{i}' for i in range(4)]
fig, ax1, ax2 = maps_111_base.cluster(features=pca_features,maxclusters=max_clusters_base, ntries=200)

In [None]:
cluster_screening_pca_scaled = maps_111_base.cluster_screening.copy()

In [None]:
min_db = cluster_screening_pca_scaled.groupby('nclusters').min()['db_index'].values
max_db = cluster_screening_pca_scaled.groupby('nclusters').max()['db_index'].values
ax = cluster_screening_pca_scaled.groupby('nclusters').min().plot(y='db_index',color='gray',label='')
cluster_screening_pca_scaled.groupby('nclusters').max().plot(y='db_index',ax=ax,color='gray',label='')
ax.fill_between(cluster_screening_pca_scaled.groupby('nclusters').min().index,min_db,max_db,alpha=0.2,color='gray')
cluster_screening_pca_scaled.groupby('nclusters').median().plot(y='db_index',ax=ax,label='median')
cluster_screening_pca_scaled.groupby('nclusters')['db_index'].apply(lambda x: x.mode().iloc[0]).plot(y='db_index',ax=ax,label='mode')
cluster_screening_pca_scaled.plot.scatter('nclusters','db_index',ax=ax)
plt.legend(frameon=False)

In [None]:
nclusters = 9
cluster_screening_selection = cluster_screening_pca_scaled[cluster_screening_pca_scaled['nclusters']==nclusters]
rs_median = cluster_screening_selection.loc[(cluster_screening_selection-cluster_screening_selection.median()).abs().idxmin()['db_index']]['random_state']
rs_min = cluster_screening_selection.loc[cluster_screening_selection['db_index'].idxmin()]['random_state']
rs_max = cluster_screening_selection.loc[cluster_screening_selection['db_index'].idxmax()]['random_state']
random_states = [rs_min,rs_median,rs_max]
random_states_labels = ['Minimum','Median','Maximum']

In [None]:
for random_state,label in zip(random_states,random_states_labels):
    maps_111_base.cluster(nclusters,features=pca_features,random_state=int(random_state))
    maps_111_base.sites()
    fig, ax = maps_111_base.scatter(feature='Cluster', categorical=True, s=20, alpha=0.8, set_aspect='scaled', centroids=True)
    ax.set_xlabel('x (Å)')
    ax.set_ylabel('y (Å)')
    ax.set_title(f'Random State = {label}')
    axes = ['pca0','pca1']
    fig, ax = maps_111_base.scatter(feature='Cluster', categorical=True, axes=axes, alpha=0.05, s=70, edgecolors='black', set_aspect='on')
    G = nx.from_numpy_array(maps_111_base.cluster_edges,create_using=nx.DiGraph,parallel_edges=False)
    pos = maps_111_base.data.loc[maps_111_base.centroids,axes].values
    weights = [ d['weight']/200 for (u, v, d) in G.edges(data=True)]
    nx.draw(G, pos, node_size=maps_111_base.cluster_sizes, width=weights, ax=ax, alpha=0.5)
    limits=ax.axis('on') # turns on axis
    ax.tick_params(left=True, bottom=True, labelleft=True, labelbottom=True)
    ax.set_xlabel('Principal Component 1')
    ax.set_ylabel('Principal Component 4')
    ax.set_title(f'Random State = {label}')