In [None]:
import sys
sys.path.append("../py_src")

from glob import glob
import os

import numpy as np
import matplotlib.pyplot as plt

import sort_neigh

from ase.io import read as ase_read
from ase.neighborlist import natural_cutoffs, NeighborList

## Analyze all Cu Nanoparticle
Find all possible positions for nanoparticle that only contains Cu

### Load special sorter

In [None]:
nano_path = "../test_data/220518_cunano_mcswap/220518_cunano_mcswap.lammpstrj"
save_txt_path = "../test_data/220518_cunano_mcswap/220518_cunano_mcswap.txt"

rcut=9.0
nmax=12
lmax=12
sigma=0.5
gamma_kernel=0.05

cu_classifier = sort_neigh.onlyCuClassifier()
cu_classifier.load_identifiers(
    rcut=rcut, nmax=nmax, lmax=lmax, 
    sigma=sigma, gamma_kernel=gamma_kernel
)
rh_sorter = sort_neigh.NeighbourSort(
    rcut=rcut, nmax=nmax, lmax=lmax, 
    sigma=sigma, gamma_kernel=gamma_kernel
)

### Create folder structure
Particle needs to be separated into timesteps, so why not do it now.

In [None]:
n_particles = 1515
n_rhod = 15
n_timesteps = 2450

parent_dir = os.path.dirname(nano_path)
out_dir = parent_dir+"/220518_cunano_mcswap_analysis/"

rh_sorter.init_folder_structure(
    nano_path,
    n_atoms_in_part=n_particles,
    timesteps=n_timesteps,
    out_dir=out_dir
)

In [None]:
ts_0_path = out_dir+"/ts_0/cunano_0.lammpstrj"
only_cu_path = parent_dir + "/cunano_only_cu.lammpstrj"
with open(ts_0_path, 'r') as f:
    ts_0_cont = f.read()
    f.close()
    
ts_0_onlyCu = ts_0_cont.replace("Rh", "Cu")

with open(only_cu_path, 'w') as f:
    f.write(ts_0_onlyCu)
    f.close()

### Run Sorter
Classification and number of neighbors for every single atom in nanoparticle

In [None]:
mode="pre_group"

full_particle = ase_read(only_cu_path)

cut_off = natural_cutoffs(full_particle, mult=0.95)
neighbour_list = NeighborList(cut_off, bothways=True, self_interaction=True)
neighbour_list.update(full_particle)

soaps = []
cu_cat_counter = np.zeros(shape=(rh_sorter.classifier.n_classes), dtype=np.int32)
categories = np.zeros((len(full_particle),), dtype=np.int32)
neighbours = np.zeros((len(full_particle),), dtype=np.int32)
rh_cat_counter = np.zeros(shape=(rh_sorter.classifier.n_classes), dtype=np.int32)
rh_categories = np.zeros((len(full_particle),), dtype=np.int32)
rh_neighbours = np.zeros((len(full_particle),), dtype=np.int32)

for index in range(len(full_particle)):
    neighbour_indices, trash = neighbour_list.get_neighbors(index)
    neighbour_particle = full_particle[neighbour_indices[:-1]]
    n_neigh, class_id = cu_classifier.classify(neighbour_particle, mode=mode, ensure_position=False)
    cu_cat_counter[class_id] += 1
    neighbours[index] = int(n_neigh)
    categories[index] = int(class_id)
    
    symbs = neighbour_particle.get_chemical_symbols()
    symbs[0] = "Rh"
    neighbour_particle.set_chemical_symbols(symbs)
    n_neigh, class_id = rh_sorter.classifier.classify(
        neighbour_particle, mode=mode, ensure_position=False
    )
    rh_cat_counter[class_id] += 1
    rh_neighbours[index] = int(n_neigh)
    rh_categories[index] = int(class_id)

diff_rh_replace = np.sum(rh_categories!=categories)
print("Replacing center particle with Rh led to differences in %u classifications"%diff_rh_replace)


In [None]:
%matplotlib inline
if len(cu_cat_counter.shape) < 2:
    cu_cat_counter = cu_cat_counter[np.newaxis, :]
cu_sorted_cats, cu_sorted_counter = rh_sorter.sort_cat_counter(cu_cat_counter)
rh_sorter.plot_dist(cu_sorted_cats, cu_sorted_counter)

bulk_lim = -5
print("First bulk category: %s"%(cu_sorted_cats[bulk_lim]))
site_tot = np.sum(cu_sorted_counter)
bulk = np.sum(cu_sorted_counter[:, bulk_lim:])
surface = np.sum(cu_sorted_counter[:, :bulk_lim])
print('''SITE CLASSIFICATION:
Bulk (n_neigh >= 10): %u out of %u (%.2f percent).
Surface (n_neigh < 10): %u out of %u (%.2f percent).
'''%(
    bulk, site_tot, (bulk/site_tot)*100,
    surface, site_tot, (surface/site_tot)*100
))

In [None]:
at_pos = full_particle.get_positions()
print(at_pos.shape)

In [None]:
%matplotlib auto
plt.ion()

actual_categories = True

cond = neighbours < 12

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(projection='3d')

if actual_categories:
    sc = ax.scatter(
        at_pos[cond, 0], at_pos[cond, 1], at_pos[cond, 2], c=categories[cond], cmap="tab20", alpha=1,
        s=800, edgecolors="k"
    )
    cb = fig.colorbar(sc, ticks=np.arange(len(cu_sorted_cats)))
    cb.ax.set_yticklabels(cu_classifier.get_unsorted_cats())
else:
    sc = ax.scatter(
        at_pos[cond, 0], at_pos[cond, 1], at_pos[cond, 2], c=neighbours[cond], cmap="Set3", alpha=1,
        s=800, edgecolors="k"
    )
    cb = fig.colorbar(sc)
plt.show()

In [None]:
newcats = False
if newcats:
    rh_cats = rh_sorter.create_local_structure(last_n=n_rhod, create_subfolders=False)
    rh_sorter.sort_save_cat(save_txt_path, rh_cats)

sorted_counts, timesteps, sorted_cats = rh_sorter.load_sort_cat(save_txt_path)

In [None]:
normalised_counts = sorted_counts / cu_cat_counter.squeeze() * 100.

%matplotlib inline
fig, ax = plt.subplots(2, 1, figsize=(10, 10))

for ii_col in range(sorted_counts.shape[1]):
    if np.sum(sorted_counts[:, ii_col]) != 0 and int(sorted_cats[ii_col].split("_")[0]) < 15:
        cur_line = sorted_counts[:, ii_col]
        ax[0].plot(timesteps[cur_line > 0], cur_line[cur_line > 0], 'o', label=sorted_cats[ii_col])
        
ax[0].legend()
ax[0].set_ylabel("Number of Rh")
ax[0].set_xlabel("timestep")

for ii_col in range(normalised_counts.shape[1]):
    if np.sum(sorted_counts[:, ii_col]) != 0: # and int(sorted_cats[ii_col].split("_")[0]) < 10:
        cur_line = normalised_counts[:, ii_col]
        ax[1].plot(timesteps[cur_line > 0], cur_line[cur_line > 0], 'o', label=sorted_cats[ii_col])
        
ax[1].legend()
ax[1].set_ylabel("Percentage of available sites")
ax[1].set_xlabel("timestep")

plt.show()


In [None]:
print(cu_sorted_cats[-9])
print(cu_sorted_counter.shape)
where_eight_vac = np.argwhere(cu_sorted_counter[:, -9]>0)

which_vac = where_eight_vac[0]
ts_path = out_dir+("/ts_%u/cunano_%u.lammpstrj"%(which_vac, which_vac))

site_part = ase_read(ts_path)
site_positions = site_part.get_positions()

plt.ion()

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(projection='3d')

sc = ax.scatter(
    site_positions[:-15, 0], site_positions[:-15, 1], site_positions[:-15, 2],
    c="tab:grey", alpha=0.07,
    s=400, edgecolors="k"
)
sc = ax.scatter(
    site_positions[-15:, 0], site_positions[-15:, 1], site_positions[-15:, 2], 
    c="tab:blue", alpha=1,
    s=400, edgecolors="k"
)
plt.show()

## Run for multiple files

In [None]:
%matplotlib inline
newcats = False

specific_files = [
    "../test_data/220523_cunano_mcswap/mc/1000.lammpstrj",
    "../test_data/220523_cunano_mcswap/mcmd/1000opt.lammpstrj",
]

mult_folders = "../test_data/220523_cunano_mcswap"
for t_file in glob(mult_folders+'/*.lammpstrj'):
    t_file = os.path.abspath(t_file)
    cur_dir = os.path.dirname(t_file)
    cur_fname = os.path.basename(t_file).split(".")[0]
    
    cur_out_dir = os.path.join(cur_dir, cur_fname+"_out/")
    cur_out_file = os.path.join(cur_dir, cur_fname+"_count.txt")
    
    cur_sorter = sort_neigh.NeighbourSort(
        rcut=rcut, nmax=nmax, lmax=lmax, 
        sigma=sigma, gamma_kernel=gamma_kernel
    ) 
    if newcats:
        cur_sorter.init_folder_structure(
            t_file,
            n_atoms_in_part=n_particles,
            timesteps=n_timesteps,
            out_dir=cur_out_dir
        )
        cur_cats = cur_sorter.create_local_structure(last_n=n_rhod, create_subfolders=False)
        cur_sorter.sort_save_cat(cur_out_file, cur_cats)
    else:
        print("Outdir: %s"%cur_out_dir)
        print("Outfile: %s"%cur_out_file)
    
    cur_sorted_counts, cur_timesteps, cur_sorted_cats = cur_sorter.load_sort_cat(cur_out_file)

    print("#############################################################")
    print("Plots for %s"%cur_fname)
    print("#############################################################")


In [None]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, DBSCAN, Birch
from dscribe.descriptors import SOAP

#soaps = np.asarray(soaps)
soaper = SOAP(species=cu_classifier.soap_species, rcut=5.1, nmax=4, lmax=3, sigma=0.5, periodic=False)
soaps = soaper.create(full_particle)
print(soaps.shape)

red_mode = "pca"

tsne = TSNE(n_components=2)
pca = PCA(n_components=40)
if red_mode =="tsne":
    reduced = tsne.fit_transform(soaps)
    pca.fit(soaps)
elif red_mode == 'pca':
    reduced = pca.fit_transform(soaps)

k_means = KMeans(12)
db_scan = DBSCAN(eps=0.5)
n_clust = Birch(n_clusters=6).fit_predict(reduced)

%matplotlib inline
plt.plot(np.arange(pca.n_components), np.abs(pca.singular_values_))
plt.yscale('log')
plt.show()

In [None]:
%matplotlib inline
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
ax.set_title("Unsupervised Regression")
sc = ax.scatter(reduced[:, 0], reduced[:, 1], c=n_clust, cmap="tab20")
cb = fig.colorbar(sc)

fig, ax = plt.subplots(1, 1, figsize=(8, 8))
ax.set_title("Number of Neighbours")
sc = ax.scatter(reduced[:, 0], reduced[:, 1], c=neighbours, cmap="tab20")
cb = fig.colorbar(sc)

fig, ax = plt.subplots(1, 1, figsize=(8, 8))
ax.set_title("Classes by Neighbours")
sc = ax.scatter(reduced[:, 0], reduced[:, 1], c=categories, cmap="tab20")
cb = fig.colorbar(sc)

In [None]:
%matplotlib auto
plt.ion()

actual_categories = True

cond = neighbours < 15

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(projection='3d')

sc = ax.scatter(
    at_pos[cond, 0], at_pos[cond, 1], at_pos[cond, 2], c=n_clust[cond], cmap="tab20", alpha=1,
    s=800, edgecolors="k"
)
# cb = fig.colorbar(sc, ticks=np.arange(len(sorted_cats)))
# cb.ax.set_yticklabels(cu_classifier.get_unsorted_cats())