# Checking Volume

In [9]:
import ase 
import ase.io
import numpy as np
import pandas as pd

In [7]:
org_struct = ase.io.read('/home/lls34/GitHub/01_PhD/PhD_Code/submodules/testing-framework/testing-framework/tests/In2O3/bulk_In2O3_Ia3_relaxed/bulk.xyz')
qe_rel = ase.io.read('/home/lls34/GitHub/01_PhD/PhD_Code/submodules/testing-framework/testing-framework/example_models/In2O3/run_dir/In2O3-model-DFT_QE-test-bulk_In2O3_Ia3-relaxed.xyz')
gap_rel = ase.io.read('/home/lls34/GitHub/01_PhD/PhD_Code/submodules/testing-framework/testing-framework/example_models/In2O3/run_dir/In2O3-model-GAP_it1_50s-test-bulk_In2O3_Ia3-relaxed.xyz')

In [12]:
pd.DataFrame({
    'method':['Material Porject', 'QE relaxed', 'GAP relaxed'],  
    'Volume/atom': np.array([org_struct.get_volume(), qe_rel.get_volume(), gap_rel.get_volume()])/gap_rel.get_global_number_of_atoms()
})

Unnamed: 0,method,Volume/atom
0,Material Porject,13.022751
1,QE relaxed,13.569964
2,GAP relaxed,13.654191


# How to relax cell aswells!!

The unit cell isminimised in the ase-espresso framework using the `relax_config` funciton in `utilities.py`

```python
if relax_cell:
    atoms_cell = ExpCellFilter(
        atoms,
        mask=strain_mask,
        constant_volume=constant_volume,
        scalar_pressure=applied_P * GPa,
        hydrostatic_strain=hydrostatic_strain,
    )
```

In the ase documentaiton it is recommended to use: `ase.constraints.StrainFilter(atoms, mask=None, include_ideal_gas=False)`

The strain filter is for optimizing the unit cell while keeping scaled positions fixed. An example of which is here https://wiki.fysik.dtu.dk/ase/tutorials/lattice_constant.html



In [14]:
a= np.array([org_struct.get_volume(), qe_rel.get_volume(), gap_rel.get_volume()])/gap_rel.get_global_number_of_atoms()
a[2]-a[1]

0.08422650543047716

In [15]:
gap_rel.get_global_number_of_atoms()

40

# Creating Training set 

In [30]:
import uuid

In [60]:
import os
dirs = [
#     '/data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/02_Ameliorate_GAP/02_Itterative/03_Bulk/Volume_Curves/It2/Training/Data_b_GAP',
#     '/data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/02_Ameliorate_GAP/02_Itterative/03_Bulk/Volume_Curves/It1_2/Train/Data_b_GAP',
#     "/data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/02_Ameliorate_GAP/02_Itterative/03_Bulk/Volume_Curves/It1_2/Train/Data_a_unrelaxed",
#     "/data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/02_Ameliorate_GAP/02_Itterative/03_Bulk/Volume_Curves/It1_2/Train/Data_b_GAP",
#     "/data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/02_Ameliorate_GAP/02_Itterative/03_Bulk/Volume_Curves/It1_2/Train/Data_c_DFT",
]
for direct in  dirs:
    atoms=[]
    for file in os.listdir(direct):
        if file.endswith(".xyz"):
            print(os.path.join(direct, file))
            ats = ase.io.read(os.path.join(direct, file))
            ats.info['type'] = 'bulk'
            ats.info['origin'] = 'itB_1'
            ats.info['uid'] = uuid.uuid4().hex
            atoms.append(ats)

    #print(direct+'.xyz')
    #ase.io.write(direct+'.xyz', atoms)
    
    print('To:', direct+'_train.xyz')
    atoms = [*ase.io.read('/data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/02_Ameliorate_GAP/02_Itterative/01_HighT_MD/MD_itteration_1/Train/join1_highTmd_itt_1.xyz',':'), *atoms]
    ase.io.write(direct+'_train.xyz', atoms)

/data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/02_Ameliorate_GAP/02_Itterative/03_Bulk/Volume_Curves/It2/Training/Data_b_GAP/QE_In2O3-model-GAP_itB_1_b-test-bulk_In2O3_Ia3-E_vs_V_-08-relaxed_0.xyz
/data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/02_Ameliorate_GAP/02_Itterative/03_Bulk/Volume_Curves/It2/Training/Data_b_GAP/QE_In2O3-model-GAP_itB_1_b-test-bulk_In2O3_Ia3-E_vs_V_-02-relaxed_0.xyz
/data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/02_Ameliorate_GAP/02_Itterative/03_Bulk/Volume_Curves/It2/Training/Data_b_GAP/QE_In2O3-model-GAP_itB_1_b-test-bulk_In2O3_Ia3-E_vs_V_-10-relaxed_0.xyz
/data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/02_Ameliorate_GAP/02_Itterative/03_Bulk/Volume_Curves/It2/Training/Data_b_GAP/QE_In2O3-model-GAP_itB_1_b-test-bulk_In2O3_Ia3-E_vs_V_-04-relaxed_0.xyz
/data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/02_Ameliorate_GAP/02_Itterative/03_Bulk/Volume_Curves/It2/Training/Data_b_GAP/QE_In2O3-model-GAP_itB_1_b-test-bulk_In2O3_Ia3-E_vs_V_-06-relaxed_0.xyz
To: /data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/

import os
for file in os.listdir("/mydir"):
    if file.endswith(".txt"):
        print(os.path.join("/mydir", file))

In [25]:
os.path.split(direct)[0]

'/data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/02_Ameliorate_GAP/02_Itterative/03_Bulk/Volume_Curves/It1'

# Creating Dataframe for storing

In [70]:
import hashlib

In [71]:
at = ase.io.read('/data/lls34/Data/In2O3/In2O3_LS_1/04_QE2/02_Ameliorate_GAP/02_Itterative/03_Bulk/Volume_Curves/It2/Training/Data_b_GAP/QE_In2O3-model-GAP_itB_1_b-test-bulk_In2O3_Ia3-E_vs_V_-08-relaxed_0.xyz')

In [99]:


def hash_array(v):
    
    # Firstly convert array to string
    v_str = np.array2string(v, precision=8, sign='+', floatmode='fixed').encode()
    
    # Convert string to hash
    # Note there are alternative has algorithms: SHA256, RSA’s MD5, ...
    v_hash =  hashlib.sha256(v_str).hexdigest()
    #print(v_str)
    
    return v_hash

def hash_atoms(at):
    '''
    Takes atom object and returns unique hash of legnth 64
    '''
    # Create array
    
    # Atomic numbers + positions
    v = np.concatenate((at.numbers.reshape(-1,1), at.positions),axis=1).flatten() 
    # Cell
    v = np.concatenate((v.flatten(), at.get_cell().flatten()))
    
    v_hash = hash_array(v)
    
    return v_hash

In [123]:
a = np.array(['tst', 1.000001, 3])

len(hash_array(a))

64

In [106]:
s = np.array2string(a, precision=10, sign='+', floatmode='fixed').encode()
h= hashlib.md5(s)
h. hexdigest()

'7bc2410f60c7966af598b9c1b8cefb08'

In [None]:
has_atom = 

In [118]:
a = np.concatenate((at.numbers.reshape(-1,1), at.positions),axis=1)
a = np.concatenate(a.flatten(), at.get_cell().flatten())
#np.array2string(a.flatten(), precision=10, sign='+', floatmode='fixed')

TypeError: only integer scalar arrays can be converted to a scalar index

In [113]:
np.concatenate(a.flatten(), at.cell().flatten())

TypeError: 'Cell' object is not callable

In [117]:
at.get_cell().flatten()

array([ 8.28142705e+00,  6.49078230e-10,  7.36214989e-16, -2.76047568e+00,
        7.80780430e+00, -1.12424517e-09, -2.76047568e+00, -3.90390215e+00,
        6.76175687e+00])

In [122]:
np.concatenate((at.get_cell().flatten(), a.flatten()))


array([ 8.28142705e+00,  6.49078230e-10,  7.36214989e-16, -2.76047568e+00,
        7.80780430e+00, -1.12424517e-09, -2.76047568e+00, -3.90390215e+00,
        6.76175687e+00,  4.90000000e+01, -4.08019009e+00,  2.88513009e+00,
        5.14544342e+00,  4.90000000e+01,  6.84066578e+00,  1.01877207e+00,
        1.61631345e+00,  4.90000000e+01,  1.31971441e+00, -9.33179010e-01,
        4.99719189e+00,  4.90000000e+01, -1.38023784e+00, -1.95195107e+00,
        3.38087843e+00,  4.90000000e+01, -1.31971441e+00,  3.86110562e+00,
        3.30675267e+00,  4.90000000e+01,  4.20123696e+00, -2.84233355e+00,
        5.07131765e+00,  4.90000000e+01, -1.31971441e+00,  1.06156860e+00,
        5.07131765e+00,  4.90000000e+01, -1.44076128e+00,  6.74623571e+00,
        1.69043922e+00,  4.90000000e+01,  4.08019009e+00,  4.27965300e-02,
        3.45500420e+00,  4.90000000e+01, -1.38023784e+00,  3.90390215e+00,
       -0.00000000e+00,  4.90000000e+01,  4.08019009e+00,  5.89864976e+00,
        7.41257600e-02,  

In [None]:
pd.DataFrame({
    'uid': ,
    'type': ,
    'origin': ,
    
})

In [None]:
pd.DataFrame(columns=['uid', 'hash', 'origin', 'type', 'symbols', 'volume'])

In [137]:
str(at.symbols)

'In16O24'

In [124]:
import json

In [126]:
d = {'test': 12, 'y':40, 'yum':'it'}
d

{'test': 12, 'y': 40, 'yum': 'it'}

In [127]:
json_file = open('test.yml', 'w')
json.dump(d, json_file)
json_file.close()

In [130]:
json_file = open('test.yml', 'r')
out = json.load(json_file)
json_file.close()
out

{'test': 12, 'y': 40, 'yum': 'it'}

In [None]:
json_file = open(json_file_name, 'w')
json.dump(test.properties, json_file)
json_file.close()

json_data = json.load(model_data_file)