In [2]:
import itertools as it
import pandas as pd
import numpy as np
import pickle
import os
import shutil

from pymatgen.core import Element

from ase.io import read,write
from ase.visualize import view

from ast import literal_eval
from mendeleev import element
from itertools import combinations, permutations


# Define dictionary and elements for mendeleev
roman_dict = {'I':1, 'II':2, 'III':3, 'IV':4, 'V':5, 'VI':6, 'VII':7, 'VIII':8, 'IX':9, 'X':10, 
             'XI':11, 'XII':12, 'XIII':13, 'XIV':14, 'XV':15}

alkali = ['Li','Na','K','Cs','Rb','Be','Mg','Ca','Sr','Ba']
transition = ['Sc','Ti','V','Cr','Mn','Fe','Co','Ni','Cu','Zn','Y','Zr','Nb','Mo','Ru','Rh','Pd',
             'Ag','Cd','Hf','Ta','W','Re','Os','Ir','Pt','Au','Hg'] # Tc removed: radioactive
main = ['Al','Ga','In','Sn','Tl','Pb','Bi','B','Si','Ge','As','Sb','Te']
rare = ['La','Ce','Pr','Nd','Pm','Sm','Eu','Gd','Tb','Dy','Ho','Er','Tm','Yb','Lu']

  from .autonotebook import tqdm as notebook_tqdm


## Collect Coordination, Charge information from Mendeleev

In [3]:
cn6, cn12 = {}, {}

for i in alkali+transition+rare+main:
    elem = element(i)
    ionic_radii = elem.ionic_radii
    for radii in ionic_radii:
        if radii.coordination == 'VI':
            for charge in range(1,9):
                if radii.charge == charge:
                    cn6[i+'_'+str(radii.charge)] = radii.ionic_radius

        elif radii.coordination == 'XII':
            for charge in range(1,9):
                if radii.charge == charge:
                    cn12[i+'_'+str(radii.charge)] = radii.ionic_radius


a_site = [i for i in cn12.keys()] # +1 ~ +4
b_site = [i for i in cn6.keys()] # +1 ~ +7

len(a_site), len(b_site)

(15, 130)

## Collect possible elemetnal combinations for AA'BB'O6 type DP

In [4]:
def get_charge(elems):
    assert type(elems) == str or list
    
    if type(elems) == str:
        return elems.split('_')[1]
    elif type(elems) == list:
        return sum(int(elem.split('_')[1]) for elem in elems)

def get_elem(elems):
    assert type(elems) == str
    return elems.split('_')[0]

a_site_comb_lst = [i for i in combinations(a_site,2)]
b_site_comb_lst = [i for i in combinations(b_site,2)]
total_comb = []

for a in a_site_comb_lst:
    for b in b_site_comb_lst:
        if get_charge([a[0],a[1],b[0],b[1]]) == 12:
            total_comb.append(a+b)

total_comb_flatten = [sorted([i.split('_')[0] for i in j]) for j in total_comb]
unique_comb = [i for i,_ in it.groupby(sorted(total_comb_flatten))]

total_radii = []
for comb in total_comb:
    total_radii.append([cn12[i] for i in comb[:2]]+
                       [cn6[i] for i in comb[2:]])
    
len(total_comb)

125566

## Enumerate data as dataframe and Calculate Goldschmidt tolerance factor (using averaged radii of A site metal 1 and 2, that of B as well)

In [5]:
df = pd.DataFrame()
df['A_atom_1'] = [get_elem(comb[0]) for comb in total_comb]
df['A_atom_2'] = [get_elem(comb[1]) for comb in total_comb]
df['A_atom_1_oxi'] = [get_charge(comb[0]) for comb in total_comb]
df['A_atom_2_oxi'] = [get_charge(comb[1]) for comb in total_comb]
df['A_atom_1_radii'] = [i[0] for i in total_radii]
df['A_atom_2_radii'] = [i[1] for i in total_radii]
df['A_total'] = ['_'.join(sorted([get_elem(comb[0]),get_elem(comb[1])])) for comb in total_comb]
df['B_atom_1'] = [get_elem(comb[2]) for comb in total_comb]
df['B_atom_2'] = [get_elem(comb[3]) for comb in total_comb]
df['B_atom_1_oxi'] = [get_charge(comb[2]) for comb in total_comb]
df['B_atom_2_oxi'] = [get_charge(comb[3]) for comb in total_comb]
df['B_atom_1_radii'] = [i[2] for i in total_radii]
df['B_atom_2_radii'] = [i[3] for i in total_radii]
df['B_total'] = ['_'.join(sorted([get_elem(comb[2]),get_elem(comb[3])])) for comb in total_comb]

O_atom_radii = [i for i in element('O').ionic_radii if i.coordination == 'VI'][0] 
O_atom_radii = O_atom_radii.ionic_radius

t_factor = []

# Calculate tolerance factor
for index, row in df.iterrows():
    averaged_A = (row['A_atom_1_radii'] + row['A_atom_2_radii'])/2
    averaged_B = (row['B_atom_1_radii'] + row['B_atom_2_radii'])/2
    
    # By definition, size of A should be larger than size of B
    if averaged_A > averaged_B:
        t = (averaged_A + O_atom_radii)/np.sqrt(2)/(averaged_B + O_atom_radii)
        t_factor.append(t)
    else:
        t_factor.append('wrong_radii')

df['tolerance_factor'] = t_factor
mask0 = df['tolerance_factor'] != 'wrong_radii'
df = df[mask0]

df.head(10)

Unnamed: 0,A_atom_1,A_atom_2,A_atom_1_oxi,A_atom_2_oxi,A_atom_1_radii,A_atom_2_radii,A_total,B_atom_1,B_atom_2,B_atom_1_oxi,B_atom_2_oxi,B_atom_1_radii,B_atom_2_radii,B_total,tolerance_factor
0,Na,K,1,1,139.0,164.0,K_Na,Sc,Mn,3,7,74.5,46.0,Mn_Sc,1.029321
1,Na,K,1,1,139.0,164.0,K_Na,Sc,Re,3,7,74.5,53.0,Re_Sc,1.01164
2,Na,K,1,1,139.0,164.0,K_Na,Sc,Os,3,7,74.5,52.5,Os_Sc,1.012883
3,Na,K,1,1,139.0,164.0,K_Na,Ti,Mn,3,7,67.0,46.0,Mn_Ti,1.048965
4,Na,K,1,1,139.0,164.0,K_Na,Ti,Re,3,7,67.0,53.0,Re_Ti,1.030608
5,Na,K,1,1,139.0,164.0,K_Na,Ti,Os,3,7,67.0,52.5,Os_Ti,1.031898
6,Na,K,1,1,139.0,164.0,K_Na,Ti,Cr,4,6,60.5,44.0,Cr_Ti,1.072154
7,Na,K,1,1,139.0,164.0,K_Na,Ti,Mo,4,6,60.5,59.0,Mo_Ti,1.031898
8,Na,K,1,1,139.0,164.0,K_Na,Ti,W,4,6,60.5,60.0,Ti_W,1.029321
9,Na,K,1,1,139.0,164.0,K_Na,Ti,Re,4,6,60.5,55.0,Re_Ti,1.042334


In [6]:
# T-facotr screening
df = df.drop_duplicates(['A_total','B_total'],keep='first')

mask1 = (0.825 <= df['tolerance_factor']) & (df['tolerance_factor'] <= 1.059)
mask2 = (0.99 <= df['tolerance_factor']) & (df['tolerance_factor'] <= 1.01) # let's consider this 

print(f'Total : {len(df)}')
print(f'With loose threshold : {len(df[mask1])}')
print(f'With tight threshold : {len(df[mask2])}')

Total : 83652
With loose threshold : 71685
With tight threshold : 6510


## Calculate ionic radii of CaTiO3 to estimate the initial lattice parameters

In [7]:
row_dict = {'1':'a','2':'b','3':'c','4':'d','5':'e','6':'f','7':'g','8':'h','9':'i'}

elements = ['H','He','Li','Be','B','C','N','O','F','Ne',
        'Na','Mg','Al','Si','P','S','Cl','Ar',
        'K','Ca','Sc','Ti','V','Cr','Mn','Fe','Co','Ni','Cu','Zn','Ga','Ge','As','Se','Br','Kr',
        'Rb','Sr','Y','Zr','Nb','Mo','Tc','Ru','Rh','Pd','Ag','Cd','In','Sn','Sb','Te','I','Xe',
        'Cs','Ba','La','Ce','Pr','Nd','Pm','Sm','Eu','Gd','Tb','Dy','Ho','Er','Tm','Yb','Lu','Hf','Ta','W','Re','Os','Ir','Pt','Au','Hg','Tl','Pb','Bi','Po','At','Rn',
        'Fr','Ra','Ac','Th','Pa','U','Np','Pu','Am','Cm','Bk','Cf','Es','Fm','Md','No','Lr','Rf','Db','Sg','Bh','Hs']

def convert_to_id(element):
    e = Element(element)
    if 57 <= e.Z <= 71:
        row = 'h'
        group = str((e.Z - 54) % 32)
    elif 89 <= e.Z <= 103:
        row = 'i'
        group = str((e.Z - 54) % 32)
    else:
        row = row_dict[str(e.row)]
        group = str(e.group)
    if len(group) == 1:
        group = ''.join(['0',group])
    return row+group

id_lst = []
for i,v in df.iterrows():
    atoms = [v['A_atom_1'],v['A_atom_2'],v['B_atom_1'],v['B_atom_2']]
    id_ = ''.join(map(convert_to_id,atoms))
    id_lst.append('BK_'+id_)
df['id'] = id_lst

In [8]:
# Ca : 12 coordination, Ti : 6 coordinations

Ca_radii = cn12['Ca_2'] # Ca2+
Ti_radii = cn6['Ti_4'] # Ti4+

factor = []
name = []
for index, row in df.iterrows():
    sum_radii = (row['A_atom_1_radii'] + row['A_atom_2_radii'])/2 + (row['B_atom_1_radii'] + row['B_atom_2_radii'])/2
    factor.append(sum_radii/(Ca_radii + Ti_radii))
    name.append(''.join([f'{index:05}_']+row['A_total'].split('_')+row['B_total'].split('_')))
    

df['relative_lattice_factor'] = factor    
df['name'] = name
df.to_csv('./data/total_bulk_data.csv')

## Generate trajectories

In [13]:
from ase.io import read, write
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.io.cif import CifWriter

CaTiO3 = read('CaTiO3.traj')
cell = CaTiO3.get_cell()

A_index = []
B_index = []

for i in CaTiO3:
    if i.symbol == 'Ca': # Ca is A-site
        A_index.append(i.index)
    elif i.symbol == 'Ti': # Ti is B-site
        B_index.append(i.index)

masked_df = df[mask2].reset_index(drop=True)
        
structures_as_cif = []
for index, row in masked_df.iterrows():
    new_cell = cell*row['relative_lattice_factor']
    new_atoms = CaTiO3
    new_atoms.set_cell(new_cell, scale_atoms = True)
    # Change elements
    new_atoms[A_index[0]].symbol = row['A_atom_1']
    new_atoms[A_index[1]].symbol = row['A_atom_2']
    new_atoms[B_index[0]].symbol = row['B_atom_1']
    new_atoms[B_index[1]].symbol = row['B_atom_2']
    
    cif = CifWriter(AseAtomsAdaptor.get_structure(new_atoms)).__str__()
    structures_as_cif.append(cif)
    
    # You can load structures from cif strings using 
    # pymatgen.core.Structure.from_str(cif_string, fmt='cif') 
    
    
masked_df['cif'] = structures_as_cif
masked_df.to_csv('../data/cubic_DP_data_with_cif.csv')