In [1]:
# in this tutorial, we will build a PdZn binary alloys with random composition using CELL software, to do so differents steps are needed:
# I have added some questions for the training at the end

#step 1 is to build a parent cell which will be composed from the primitive cell here Pd and the substitute here Zn

%matplotlib inline 
from ase.build import fcc111, add_adsorbate # ASE's utilities to build the surface
from clusterx.parent_lattice import ParentLattice 
from clusterx.structures_set import StructuresSet
from clusterx.visualization import juview
from clusterx.super_cell import SuperCell
from ase import Atoms
from ase.build import surface
from random import randint
import numpy as np
np.random.seed(10)

from ase.build import molecule
a = 2.889
c = 3.329

Alloy= Atoms('CuPd', scaled_positions=[(0, 0, 0),
                                (0.5, 0.5, 0),
                                ],
              cell=[a, a, c],
              pbc=True)

pri = surface(Alloy, (1, 1, 1), 7)
pri.center(vacuum=10, axis=2)

juview(pri)



NGLWidget()

In [2]:
# before we start the subtitution it is recommended to know the index of the atoms
symbols = pri.get_chemical_symbols() # Get Chemical symbols of slab
z_coords = pri.get_positions()[:,2] # Get z-coordinate of atomic positions in slab

print("{0:<19s}|{1:<19s}|{2:<19s}".format("Atom index","Chemical symbol","z coordinate")) # Print headers
for i, (symbol, z_coord) in enumerate(zip(symbols,z_coords)):
    print("{0:<19d}|{1:<19s}|{2:<19.3f}".format(i,symbol,z_coord)) # Print atom indexes, symbols and z_coordinat

Atom index         |Chemical symbol    |z coordinate       
0                  |Cu                 |10.000             
1                  |Pd                 |10.000             
2                  |Cu                 |11.741             
3                  |Pd                 |11.741             
4                  |Cu                 |13.482             
5                  |Pd                 |13.482             
6                  |Cu                 |15.223             
7                  |Pd                 |15.223             
8                  |Cu                 |16.965             
9                  |Pd                 |16.965             
10                 |Cu                 |18.706             
11                 |Pd                 |18.706             
12                 |Cu                 |20.447             
13                 |Pd                 |20.447             


In [3]:
# now we are asking that Zn could substitute all the 7 atoms of Pd that we have created above
list_of_elements = [["Pd", "Cu"]] * 14
print(list_of_elements)
platt = ParentLattice(pri, symbols= list_of_elements)
juview(platt)

[['Pd', 'Cu'], ['Pd', 'Cu'], ['Pd', 'Cu'], ['Pd', 'Cu'], ['Pd', 'Cu'], ['Pd', 'Cu'], ['Pd', 'Cu'], ['Pd', 'Cu'], ['Pd', 'Cu'], ['Pd', 'Cu'], ['Pd', 'Cu'], ['Pd', 'Cu'], ['Pd', 'Cu'], ['Pd', 'Cu']]


VBox(children=(HBox(children=(NGLWidget(), NGLWidget())),))

In [4]:
platt.get_sublattice_types(pretty_print=True)


+--------------------------------------------------------------------+
|              The structure consists of 1 sublattices               |
+--------------------------------------------------------------------+
| Sublattice type |       Chemical symbols       |  Atomic numbers   |
+--------------------------------------------------------------------+
|        0        |         ['Pd' 'Cu']          |      [46 29]      |
+--------------------------------------------------------------------+



In [5]:
# from above we can see that we have only one sublattice

In [28]:
# step 2 is to build the supercell from the parent lattice: in this example we are asking it to be a 4x4
scell = SuperCell(platt,[2,3])
juview(scell)

scell.get_sublattice_types(pretty_print=True)
sset = StructuresSet(platt)
z_coords_2 = scell.get_positions()[:]

nstruc = 10 # we are generating 100 ramdom structures with Zn concentration changing from 1 to 100 %
for i in range(nstruc):
    # check concentrations in 5% increments
    increment =int(randint(1,20) *0.05* 2*3*14)
    #print(increment)
    concentration = {0:[increment]} # Pick a random concentration of "Zn" substitutions starting from 1 to 4*4*7
    sset.add_structure(scell.gen_random(concentration)) # Generate and add a random structure to the StructuresSet

#print("\nRandom structures (first 3) of nstru:")
sset.serialize("sset3.json") # Write JSON db file for visualization with ASE's GUI.
from ase.io import read
from ase.visualize import view
#view(read("sset3.json@:"))
#juview(sset,n=3) # Plot the first 3 created random structrues



+--------------------------------------------------------------------+
|              The structure consists of 1 sublattices               |
+--------------------------------------------------------------------+
| Sublattice type |       Chemical symbols       |  Atomic numbers   |
+--------------------------------------------------------------------+
|        0        |         ['Pd' 'Cu']          |      [46 29]      |
+--------------------------------------------------------------------+



In [30]:
# step 3: we need to calculate the energied of the 100 structures defined above
from clusterx.calculators.emt import EMT2 # Load the EMT calculator from ASE
from clusterx.visualization import plot_property_vs_concentration
import os
from ase.io import read
parent_directory = os.path.abspath(".")
parent_directory = str(parent_directory) # path to current folder saved for later
 
sset.write_files(prefix="random_CuPd") # generate the folder structure with geometries for ab initio
structure_locations = sset.get_folders()

for paths in structure_locations:
    os.chdir(paths)
    model = read("geometry.json")
    model.set_calculator(EMT2()) # FHI-aims calculator go here
    e_model = model.get_potential_energy()
    
    f = open("total_energy_dft.dat", "w")
    f.write(str(e_model))
    f.close()
    
    os.chdir(parent_directory) # go back to the parent_directory to finish the loop

sset.read_property_values("total_energy_dft", write_to_file=False) # test
#sset = sset.get_property_values("total_energy_dft")

#sset.calculate_property("total_energy_emt") # Calculate energies with Effective Medium Theory calculator of ASE, the output below is the enrgies of the 60 structures

Could not read propery  total_energy_dft from folder  ./random_CuPd0
Could not read propery  total_energy_dft from folder  ./random_CuPd1
Could not read propery  total_energy_dft from folder  ./random_CuPd2
Could not read propery  total_energy_dft from folder  ./random_CuPd3
Could not read propery  total_energy_dft from folder  ./random_CuPd4
Could not read propery  total_energy_dft from folder  ./random_CuPd5
Could not read propery  total_energy_dft from folder  ./random_CuPd6
Could not read propery  total_energy_dft from folder  ./random_CuPd7
Could not read propery  total_energy_dft from folder  ./random_CuPd8
Could not read propery  total_energy_dft from folder  ./random_CuPd9


In [11]:
#In the next cell, we will create a small structures set containing just two structures: one pristine, non-substituted (all Pd) and free of adsorbants, and the other a fully substituted one (Pd->Zn), still without O adsorbants. NO CO2 HERE. The energies 𝐸0 and 𝐸1 of these structures will serve as references for generating figures depicting the total energy versus concentration: 𝑒=𝐸−𝐸0∗(1−𝑥)+𝐸1∗𝑥, with 𝐸 the EMT energy of the structure and 𝑥 the Zn concentration.

In [None]:
# step 4 we need to compare and plot the energy of the 100 structures with the energy of structure with 0 %Zn and 100*Zn
refs = StructuresSet(platt)
refs.add_structure(scell.gen_random({0:[0]})) # Pristine
refs.add_structure(scell.gen_random({0:[0],0:[4*4*7]})) # Full Zn substitution 4:4:7
refs.set_calculator(EMT2()) # AIMS calculator - how to incorporate socketed calc? Where is the calc called?
refs.calculate_property("total_energy_emt_refs")
ref_en = refs.get_property_values("total_energy_emt_refs")
plot_property_vs_concentration(sset, site_type=0, property_name="total_energy_emt",refs=ref_en,scale=0.6)


In [None]:
# step 5 we need to create a pool of cluster that would be applied later our supercell

r = 3.939 #/(2**(1/2))*2 # distance in Angstrom
from clusterx.clusters.clusters_pool import ClustersPool
cpool = ClustersPool(platt, npoints=[0,1,2,3], radii=[r, r,r,r]) #here we are asking for pool of clusters with a substitution npoints with specific radii, more we add points and increase the sistances more it will be accurate however it will take more times to generate
print(len(cpool)," clusters were generated.")

In [None]:
# Ask for pool clusters with a substitution npoints wth specific radius. Suggestions for different cutoffs for  n-points? how far we can go for 4x4x7

In [None]:
cpool.serialize(db_name="cpool.json")
#juview(cpool.get_cpool_atoms())
#view(cpool.get_cpool_atoms())

In [None]:
#step 5: Building the CE model, here we will use the g linear regression method (other methods are available too) from our pool and supercell that we have build in step 2 and 4

from clusterx.model import ModelBuilder
mb = ModelBuilder(selector_type="linreg",selector_opts={'clusters_sets':'size'},estimator_type="skl_LinearRegression",estimator_opts={"fit_intercept":False})
cemodel1 = mb.build(sset, cpool, "total_energy_emt") #Build CE model using the training data set
cpool_opt1 = mb.get_opt_cpool()
cemodel1.report_errors(sset)
cpool_opt1.display_info(ecis=cemodel1.get_ecis())
cpool_opt1.write_clusters_db(db_name="cpool_opt.json") #CV?

In [None]:
# plotting the results
from clusterx.visualization import plot_optimization_vs_number_of_clusters
from clusterx.visualization import plot_predictions_vs_target
plot_optimization_vs_number_of_clusters(mb.get_selector(),scale=0.5)
plot_predictions_vs_target(sset,cemodel1,"total_energy_emt",scale=0.5)
plot_property_vs_concentration(sset, site_type=0, property_name="total_energy_emt",cemodel=cemodel1,refs=ref_en,scale=0.5)

In [None]:
# more accurate set
#mb = ModelBuilder(selector_type="linreg",selector_opts={'clusters_sets':'size+combinations','nclmax':2,'set0':[1,3.939]},estimator_type="skl_LinearRegression",estimator_opts={"fit_intercept":True})
#cemodel3 = mb.build(sset, cpool, "total_energy_emt") #Build CE model using the training data set
#cpool_opt3 = mb.get_opt_cpool()
#cemodel3.report_errors(sset)
#cpool_opt3.display_info(ecis=cemodel3.get_ecis())
#cpool_opt3.write_clusters_db(db_name="cpool_opt3.json")

In [None]:
#plot_optimization_vs_number_of_clusters(mb.get_selector(),scale=0.5)
#plot_predictions_vs_target(sset,cemodel3,"total_energy_emt",scale=0.5)
#plot_property_vs_concentration(sset, site_type=2, property_name="total_energy_emt",cemodel=cemodel3,refs=ref_en,scale=0.5)

In [None]:
#step 6: Monte Carlo simulations
scell.get_sublattice_types(pretty_print=True)
sites_dict = scell.get_nsites_per_type()
for key in sites_dict.keys():
    print("Number of atoms in sublattice "+str(key)+":", sites_dict[key])

In [None]:
nsites = len(scell.get_substitutional_atoms())
print(nsites)

In [None]:
from clusterx.model import Model
cemodelE = Model(json_db_filepath ="sset2.json") # unable to import the file, what are we missing here
nsubs = {0:[4]}
kb = float(8.6173303*10**(-5)) # Boltzmann constant in eV/K
temp = 300 # Temperature in K

# Initialization of a MonteCarlo object
from clusterx.thermodynamics.monte_carlo import MonteCarlo
mc = MonteCarlo(cemodelE,
                scell,
                ensemble = "canonical",
                nsubs = nsubs,
                predict_swap = False)

#print(mc)
# Execution of a Metropolis Monte-Carlo sampling
traj = mc.metropolis(no_of_sampling_steps = 1000,
                     temperature = 800,
                     boltzmann_constant = kb,
                     scale_factor = [1/(1.0*nsites)])
'''

nsubs = {0:[4]}
kb = float(8.6173303*10**(-5)) # Boltzmann constant in eV/K
temp = 300 # Temperature in K

# Initialization of a MonteCarlo object
from clusterx.thermodynamics.monte_carlo import MonteCarlo
mc = MonteCarlo(cemodelE, \
                scell, \
                ensemble = "canonical", \
                nsubs = nsubs, \
                predict_swap = True)

# Execution of a Metropolis Monte-Carlo sampling
traj = mc.metropolis(no_of_sampling_steps = 1000, \
                     temperature = 800, \
                     boltzmann_constant = kb, \
                     scale_factor = [1/(1.0*nsites)])

In [None]:
# questions to be asked
#1- is how is EMT2 working on Zn while normally EMT is not available for Zn atoms
#2-I belive we can install it on supercomputer, dos it work on more than 1 nodes, could socket be used,
#3-how far we can go with the npoints and distances for 4x4x7 
#3-what are CV-RMSE and training-RMSE, what is supposed to be more stabe
#4-where are located the predicted, calculated and most stable structures, where can I see them
#5-why in the more accurate CE model we have to redefine clusters_sets':'size+combinations','nclmax':2,'set0':[1,3.939], aren't these defined in the cpool, is that not enough,
#6-How to apply MC on the model we create,
#7-could I make it for all the compositions in a loop, 