In [9]:
import h5py
from sklearn.decomposition import PCA
import SOAPify
from sys import argv

lmax=8
nmax=8

def preparePCAFitSet(fitsetGroup: h5py.Group, PCAdim: int):
    print("preparing the PCA with from the fitset")
    fitset = fitsetGroup["SOAPFitSet"][:]
    fitset= SOAPify.fillSOAPVectorFromdscribe(fitset, lmax, nmax)
    fitset = SOAPify.normalizeArray(fitset)
    pcaMaker = PCA(PCAdim)
    pcaMaker.fit(fitset[:])
    return pcaMaker

#if __name__ == "__main__": #capisce di lanciarlo come programma
def applypca(fname):
#uso come base per il fit i dati che sono nel file della variabile  pcaFilename 
    referencepcaFilename = "fitsets_red.hdf5"
    pcaname = "test210_redux"
    chunklen = 100
    pcadim = 3
    with h5py.File(referencepcaFilename, "r") as fsFile, h5py.File(fname,"a") as SOAPFile:
        pcaEngine = preparePCAFitSet(fsFile[pcaname], pcadim)
        pcaGroup = SOAPFile.require_group(f"PCAs/{pcaname}")
        pcaGroup.attrs["PCAOrigin"] = f"{referencepcaFilename}/{pcaname}"

        for key in SOAPFile["SOAP"].keys():

            print(f"appling PCA to {key}")

            data = SOAPFile["SOAP"][key]
            pcaout = pcaGroup.require_dataset(
                key,
                shape=(data.shape[0], data.shape[1], pcadim),
                dtype=data.dtype,
                chunks=(chunklen, data.shape[1], pcadim),
                maxshape=(None, data.shape[1], pcadim),
                compression="gzip",
            )
            for chunkTraj in data.iter_chunks():
                print(f'{key}:working on SOAP chunk "{chunkTraj}"')
                normalizedData = SOAPify.normalizeArray(
                    SOAPify.fillSOAPVectorFromdscribe(data[chunkTraj], lmax, nmax))
                pcaRes = pcaEngine.transform(
                    normalizedData.reshape((-1, normalizedData.shape[-1]))
                )
                pcaout[chunkTraj[0]] = pcaRes.reshape((-1, data.shape[1], pcadim))

            pcaout.attrs["variance"] = pcaEngine.explained_variance_ratio_

In [None]:
import matplotlib.pyplot as plp
import scipy.cluster.hierarchy as sch
from  h5py import File
from pandas import DataFrame
import h5py
from HDF5er import saveXYZfromTrajGroup,MDA2HDF5,saveXYZfromTrajGroup
import numpy
from MDAnalysis import Universe as mdaUniverse
from SOAPify import (saponifyGroup,
                    createReferencesFromTrajectory,
                    mergeReferences,
                    SOAPdistanceNormalized,
                    saveReferences,
                    getReferencesFromDataset,
                    classify
                    )

loadReferences=True
soapReferences=True



def patchBoxFromTopology(hdf5TrajFile:str,topologyFile:str):
    u=mdaUniverse(topologyFile,atom_style="id type x y z")
    with h5py.File(hdf5TrajFile,"a") as workFile:
        for key in workFile['Trajectories']:
            tgroup=workFile[f'Trajectories/{key}']
            tgroup['Box'][:]=[u.dimensions]*tgroup['Box'].shape[0]

In [None]:
if loadReferences:
    for surf in ["210"]:
        for fname in [f"210_T_500.lammpsdump" ]:
            u=mdaUniverse(fname)#, atom_style="id type x y z")
            u.atoms.types = ["Cu"] * len(u.atoms)
            print(u.coord[0])
            MDA2HDF5(u,f"{surf}.hdf5",fname.split('.')[0], trajChunkSize=1000)
            print(surf)
if soapReferences:
    for surf in ["210"]:
        patchBoxFromTopology(f"{surf}.hdf5",f"210.data") 
        with File(f"{surf}.hdf5","a") as workFile:
            saponifyGroup(
            trajContainers=workFile["Trajectories"],
            SOAPoutContainers=workFile.require_group("SOAP"),
            SOAPOutputChunkDim=1000,
            SOAPnJobs=32,
            SOAPrcut=6,
            SOAPnmax= 8,
            SOAPlmax= 8,
        )

In [10]:
applypca("210.hdf5")

preparing the PCA with from the fitset
appling PCA to 210_T_700
210_T_700:working on SOAP chunk "(slice(0, 100, 1), slice(0, 2304, 1), slice(0, 324, 1))"
210_T_700:working on SOAP chunk "(slice(100, 200, 1), slice(0, 2304, 1), slice(0, 324, 1))"
210_T_700:working on SOAP chunk "(slice(200, 300, 1), slice(0, 2304, 1), slice(0, 324, 1))"
210_T_700:working on SOAP chunk "(slice(300, 400, 1), slice(0, 2304, 1), slice(0, 324, 1))"
210_T_700:working on SOAP chunk "(slice(400, 500, 1), slice(0, 2304, 1), slice(0, 324, 1))"
210_T_700:working on SOAP chunk "(slice(500, 502, 1), slice(0, 2304, 1), slice(0, 324, 1))"
