## Imports

In [None]:
import sys
sys.path.append("../py_src")

from glob import glob
import os

import numpy as np
import matplotlib.pyplot as plt

import sort_neigh

from ase.io import read as ase_read
from ase.neighborlist import natural_cutoffs, NeighborList
from dscribe.descriptors import LMBTR, SOAP

## Define some Values

In [None]:
target_dir = "../test_data/221230_finalres/rh"
only_cu_dir = target_dir + "/cunanoparticle"
only_cu_path = only_cu_dir + "/cusingle.lammpstrj"

n_particles = 1577
n_rhod = 15

rcut=4.2 # 4.2 # 2.7 
nmax=4
lmax=3
sigma=0.6
gamma_kernel=1.

## ML Classifier

In [None]:
ml_classifier = sort_neigh.USMLClassifier()

use_soap = True
if use_soap:
    descr = SOAP(species=["Rh", "Cu"], rcut=rcut, nmax=nmax, lmax=lmax, sigma=sigma, periodic=False)
else:
    n_spec = 180
    dim_red = LMBTR(
        species=["Rh", "Cu"],
    #    k2={
    #        "geometry": {"function": "distance"},
    #        "grid": {"min": 0, "max": 5, "n": 100, "sigma": 0.1},
    #        "weighting": {"function": "exp", "scale": 0.5, "threshold": 1e-3},
    #    },
        k3={
            "geometry": {"function": "angle"},
            "grid": {"min": 0, "max": 180, "n": n_spec, "sigma": 3},
            "weighting": {"function": "unity"},
        },
        periodic=False,
        sparse=False,
        normalization="none",
        flatten=True
    )

standard_classifier = sort_neigh.NeighbourClassifier(
    local_structures_path=os.path.abspath("../src/localstructures_newopt_rh"),
    non_class_max=14
)
standard_classifier.load_identifiers(descr_func=descr)

## Import Particle

In [None]:
full_particle = ase_read(only_cu_path)
at_pos = full_particle.get_positions()

cut_off = natural_cutoffs(full_particle, mult=0.98)# mult=0.98)
neighbour_list = NeighborList(cut_off, bothways=True, self_interaction=False)
neighbour_list.update(full_particle)

ind_soaps = np.zeros((len(full_particle), descr.get_number_of_features()))
for index in range(len(full_particle)):
    neighbour_indices, trash = neighbour_list.get_neighbors(index)
    neighbour_indices = np.append(np.array([index]), neighbour_indices, axis=0)
    neighbour_particle = full_particle[neighbour_indices]
    
    # Make center atom Rh
    symbs = neighbour_particle.get_chemical_symbols()
    symbs[0] = "Rh"
    neighbour_particle.set_chemical_symbols(symbs)
    
    ind_soaps[index] = descr.create(neighbour_particle, positions=[0])

## Training

In [None]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA, KernelPCA
from sklearn.cluster import KMeans, DBSCAN, Birch
from dscribe.descriptors import SOAP

train_on_particle = False
if train_on_particle:
    n_clust = ml_classifier.train_on_particle(
        full_particle,
        soap_species=["Cu"], dim_red=PCA(n_components=4), 
        clusterer=Birch(n_clusters=10),
        rcut=rcut, nmax=nmax, lmax=lmax, sigma=sigma
    )
else:
    n_clust = ml_classifier._train_on_data(
        ind_soaps,
        dim_red=PCA(n_components=20), clusterer=Birch(n_clusters=8)
    )
    ml_classifier.descr = descr

soaps = ml_classifier.descr.create(full_particle)
reduced_particle = ml_classifier.dim_red.transform(ind_soaps)

## Load Soaps from localstructures

In [None]:
soaps_from_classifier = []
labels = []

for key in standard_classifier.identification_dict.keys():
    entry = standard_classifier.identification_dict[key]
    if entry is not None:
        soaps_from_classifier.append(entry["soap_descr"][:, 0, :])
        labels.append(entry["id"])

buff = soaps_from_classifier[0].copy()
for ii_soap in range(1, len(soaps_from_classifier)):
    buff = np.append(buff, soaps_from_classifier[ii_soap], axis=0)

soaps_from_classifier = buff.copy()
del buff

buff = []
for label in labels:
    for entry in label:
        buff.append(entry)

labels=buff
del buff

print("Loaded localstructures: ")
print(labels)
print(soaps_from_classifier.shape)
soap_prediction = ml_classifier.dim_red.transform(soaps_from_classifier)

## Load Existing Atomic Descriptors

In [None]:
target_folders = [
    target_dir+"/mc",
    target_dir+"/mcmd"
]

if use_soap:
    load_name = "_soap.npy"
else:
    load_name = "_lmbtr.npy"

results_dict = {}

for target_folder in target_folders:
    for target_file in glob(target_folder+"/*.lammpstrj"):
        target_file = os.path.abspath(target_file)
        only_file = os.path.basename(target_file).split(".")[0]
        save_txt_path = os.path.join(os.path.dirname(target_file), only_file+load_name)

        with open(save_txt_path, 'rb') as f:
            load_descriptors = np.load(f)
            f.close()

        dir_name = save_txt_path.split("/")[-2]
        cur_key = '_'.join([dir_name, only_file])
        results_dict[cur_key] = {}
        results_dict[cur_key]["descriptors"] = load_descriptors.copy()
        import_shape = load_descriptors.shape
        load_descriptors = load_descriptors.reshape((import_shape[0]*import_shape[1], import_shape[2]))
        results_dict[cur_key]["dim_red"] = ml_classifier.dim_red.transform(load_descriptors)
        results_dict[cur_key]["dim_red"] = results_dict[cur_key]["dim_red"].reshape((import_shape[0], import_shape[1], results_dict[cur_key]["dim_red"].shape[-1]))


## Plot Reduction Maps

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
ax.set_title("PCA Map of Unsupervised Regression")

sc = ax.scatter(reduced_particle[:, 0], reduced_particle[:, 1], label='particle')
ax.scatter(soap_prediction[:, 0], soap_prediction[:, 1], c="k", label="localstructures")
for ii_label, label in enumerate(labels):  
        ax.annotate(label, soap_prediction[ii_label, 0:2])

load_descriptors = results_dict["mcmd_600"]["dim_red"][..., :2]
load_descriptors = load_descriptors.reshape((load_descriptors.shape[0]*load_descriptors.shape[1], load_descriptors.shape[2]))
ax.scatter(load_descriptors[:, 0], load_descriptors[:, 1], label='trajectory')
ax.legend()
plt.show()