# Running Canonical Monte Carlo Sampling

In [1]:
import numpy as np
import json
from pymatgen.core.structure import Structure
from smol.io import load_work

### 0) Load the previous LNO CE with electrostatics

In [2]:
work = load_work('data/basic_ce_ewald.mson')
expansion = work['ClusterExpansion']

### 1) Create a cluster expansion processor
The `CEProcessor` and `EwaldCEProcessor` classes represent the Monte Carlo simulation domain. They essentially represent a way to quickly compute cluster expansion values for a set supercell size. The classes have optimized ways of computing the energy change from species flips at certain sites.

In [3]:
from smol.moca import EwaldCEProcessor

# Create a supercell matrix
# This specifies the size of the MC simulation domain.
sc_matrix = np.array([[6, 1, 1],
                      [1, 2, 1],
                      [1, 1, 2]])

# Create the processor
# In this case we will use the EwaldCEProcessor
# to handle the changes in the electrostatic interaction
processor = EwaldCEProcessor(expansion, sc_matrix)

# In a real scenario you may want a much larger processor.size
# An MC step is O(1) with the processor.size, meaning it runs at
# the same speed regardless of the size. However, larger sizes
# will need many more steps to reach equilibrium in an MC simulation.
print(f'The supercell size for the processor is {processor.size} prims.')

The supercell size for the processor is 16 prims.


### 2) Create an initial structure
You will need to create an initial test structure to obtain an initial occupancy for the ensemble class. There are many ways to do this, you could simply rescale a training structure and use that. But since the composition is fixed in a canonical ensemble you need to make sure you input the right composition. It can also be helpful to run a simulated anneal step to get a good initial structure rather than starting with a low energy one.

In [4]:
from pymatgen.transformations.standard_transformations import OrderDisorderedStructureTransformation

# Here we will just use the order disordered transformation from
# pymatgen to get an ordered version of a prim supercell.
# The structure will have the same composition set in the prim.
transformation = OrderDisorderedStructureTransformation()

supercell = expansion.cluster_subspace.structure.copy()
supercell.make_supercell(sc_matrix)

# this can take a bit of time....
test_struct = transformation.apply_transformation(supercell)
print(test_struct.composition)

Li+8 Ni3+8 Ni4+8 O2-32


In [5]:
# Obtain the initial occupancy string from the
# test structure created above.
init_occu = processor.occupancy_from_structure(test_struct)

# The occupancy strings created by the processor
# are by default "encoded" by the indices of the species
# for each given site. You can always see the actual
# species in the occupancy string by decoding it.
print(f'The encoded occupancy is: {init_occu}')
print(f'The initial occupancy is: {processor.decode_occupancy(init_occu)}')

The encoded occupancy is: [0 1 1 0 1 0 0 0 0 1 0 0 1 1 1 1 1 0 0 1 1 1 0 1 1 0 1 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
The initial occupancy is: ['Li+', 'Vacancy', 'Vacancy', 'Li+', 'Vacancy', 'Li+', 'Li+', 'Li+', 'Li+', 'Vacancy', 'Li+', 'Li+', 'Vacancy', 'Vacancy', 'Vacancy', 'Vacancy', 'Ni4+', 'Ni3+', 'Ni3+', 'Ni4+', 'Ni4+', 'Ni4+', 'Ni3+', 'Ni4+', 'Ni4+', 'Ni3+', 'Ni4+', 'Ni3+', 'Ni3+', 'Ni3+', 'Ni3+', 'Ni4+', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-', 'O2-']


### 3) Create a canonical ensemble
The `CanonicalEnsemble` class is used to run MC in a fixed composition ensemble.
The ensemble classes will determine the **active** sublattices by grouping all sites that have the same possible partial occupancies.

To run for fixed chemical potential see the notebook on semigrand ensemble MC.

In [6]:
from smol.moca import CanonicalEnsemble

# Create the ensemble
ensemble = CanonicalEnsemble(processor,
                             temperature=800,
                             sample_interval=200,  # How many steps to take between saved samples
                             initial_occupancy=init_occu)

print(f'The active sublattices are: {ensemble.sublattices}')

The active sublattices are: ['Ni3+/Ni4+', 'Li+/Vacancy']


### 4) Run MC iterations
MC iterations are by default run by flipping sites from all active sublattices, but fine grained simulations can also be ran by only flipping on some of the active sublattices or even freezen specific sites in active sublattices.

In [7]:
# run 1M iterations
ensemble.run(1000000)

print(f'Fraction of succesfull steps {ensemble.acceptance_ratio}')
print(f'The current step energy is {ensemble.current_energy} eV')
print(f'The minimum energy in trajectory is {ensemble.minimum_energy} eV')

# You can get the minimum energy structure and current structure
curr_s = ensemble.current_structure
min_s = ensemble.minimum_energy_structure

Fraction of succesfull steps 0.166251
The current step energy is -552.1488971864993 eV
The minimum energy in trajectory is -552.6314360817438 eV


#### 4.1) Continuing or reseting the MC trajectory
You can always continue running more iterations from where the trajectory left off by calling `run` again.
You can also reset to the initial state. (we will skip this step for now so we can show results from the run above.

In [8]:
# You can continue the MC trajectory simmply by calling run again
#ensemble.run(10000)  # this will append new data

# If you want to start from scratch
#ensemble.reset()  # this will delete data, and reset the ensemble to its initial state
# Now you can start a fresh run
#censemble.run(10000)

### 5) Look at trajectory samples and averages
We can look at the sampled energies, the average and varience directly from the class properties.

For further analysis samples are stored as a list of dictionaries for each sampled step in the `CanonicalEnsemble.data` attribute.
In the `CanonicalEnsemble` class only the energy and occupancy string of each sample are saved.

In [9]:
# you can set the production start in iteration numbers
# ie here we set 50000 iterations as burn-in
ensemble.production_start = 50000
print(f'A total of {len(ensemble.data)} samples taken.')
print(f'A total of {len(ensemble.energy_samples)} samples used for production.')
print(f'The average energy is {ensemble.average_energy} eV')
print(f'The energy variance is {ensemble.energy_variance} eV^2')

A total of 5000 samples taken.
A total of 4750 samples used for production.
The average energy is -552.0970846695453 eV
The energy variance is 0.05801302462263268 eV^2


### Save your work
You can use the same `save_work` convenience function to save your work.

In [10]:
from smol.io import save_work

file_path = 'data/canonical_mc.mson'
save_work(file_path, processor, ensemble)