# Load References

This notebook will create the reference to be used in the top-down analysis

In [None]:
from h5py import File
from HDF5er import saveXYZfromTrajGroup,MDA2HDF5,saveXYZfromTrajGroup
import numpy
from MDAnalysis import Universe as mdaUniverse
from SOAPify import (saponifyGroup, 
                    createReferencesFromTrajectory,
                    mergeReferences,
                    SOAPdistanceNormalized,
                    saveReferences,
                    )


In [None]:
loadReferences=True
soapReferences=True

In [None]:

if loadReferences:
    for fname in ["110.data"  ,"111.data"  ,"210.data"  ,"211.data" ]:
        u=mdaUniverse(fname)#, atom_style="id type x y z")
        u.atoms.types = ["Cu"] * len(u.atoms)
        print(u.coord[0])
        MDA2HDF5(u,"bases.hdf5",fname.split('.')[0], trajChunkSize=1000)

    with File("bases.hdf5","r") as workFile:
        for id in ['111','211','210','110']:
            saveXYZfromTrajGroup(f"{id}.xyz",workFile[f'Trajectories/{id}'])
if soapReferences:
    with File("bases.hdf5","a") as workFile:
        saponifyGroup(
        trajContainers=workFile["Trajectories"],
        SOAPoutContainers=workFile.require_group("SOAP"),
        SOAPOutputChunkDim=1000,
        SOAPnJobs=32,
        SOAPrcut=6,    
        SOAPnmax= 8,
        SOAPlmax= 8,
    )

references={}
request={
    "111":dict(s=(0,1312),ss=(0,1313),b=(0,1099)),
    "110":dict(slc=(0,1072),shc=(0,1089),sslc=(0,1074),sshc=(0,1091),b=(0,1080)),
    "211":dict(slc=(0,1176),smc=(0,1297),shc=(0,1202),sslc=(0,1275),ssmc=(0,1204),sshc=(0,1301),b=(0,1309)),
    "210":dict(slc=(0,1320),smc=(0,1297),shc=(0,1298),sslc=(0,1611),ssmc=(0,1324),sshc=(0,1301),b=(0,1308))
}
with File("bases.hdf5","r") as workFile:
    for k in request:
        references[k]=createReferencesFromTrajectory(workFile[f'SOAP/{k}'],request[k],8,8)
        for i,name in enumerate(references[k].names):
            references[k].names[i]=f'{k}_{name}'
    

wholeData=mergeReferences(references['111'],references['110'],references['211'],references['210'])
ndataset=len(wholeData)
wholeDistances=numpy.zeros((int(ndataset*(ndataset-1)/2)))
cpos=0
for i in range(ndataset):
    for j in range(i+1,ndataset):
        wholeDistances[cpos]=SOAPdistanceNormalized(wholeData.spectra[i],wholeData.spectra[j])
        cpos+=1


with File("references.hdf5",'w') as refFile:
    g=refFile.require_group('testReferences')
    for k in references:
        saveReferences(g,k,references[k])