In [None]:
import os
import time
import joblib
import numpy as np

import matplotlib as mpl
import matplotlib.pyplot as plt 

from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.core.lattice import Lattice
from pymatgen.core.structure import Structure
from pymatgen.analysis.diffraction.xrd import XRDCalculator

import xrayutilities as xru
from xrayutilities.materials.material import Crystal

import warnings
warnings.filterwarnings("ignore")

def load_plt_setting():
    plt.style.use('seaborn-white')
    mpl.rcParams['font.sans-serif'] = "Arial"
    mpl.rcParams['font.family'] = "sans-serif"
    mpl.rcParams['axes.linewidth'] = 2
    font = {'size': 32}
    mpl.rc('font', **font)
    mpl.rcParams['xtick.major.pad']='8'
    mpl.rcParams['ytick.major.pad']='8'
    plt.rcParams["font.weight"] = "normal"
    plt.rcParams["axes.labelweight"] = "normal"
    plt.rcParams['svg.fonttype'] = 'none'
    mpl.rcParams['axes.linewidth'] = 2
    
def scherrer_fwhm(crystal_size, theta, wavelength=1.5406, shape_factor=0.9):
    theta_rad = np.deg2rad(theta/2)
    fwhm = (shape_factor*wavelength)/(crystal_size*np.cos(theta_rad))
    return fwhm

def get_simulated_pattern(index, xrd_dataset):
    return xrd_dataset['xrd_patterns'][index], xrd_dataset['texts'][index], xrd_dataset['labels'][index]

def generate_two_mixed_xrd(idx1,idx2):
    new_texts = []
    new_labels = []
    phase1, text1, label1 = get_simulated_pattern(idx1)
    noisy_phase1 = generate_noisy_xrd(phase1, crystal_size_range, position_shift_std_range, intensity_variation_std_range, peak_loss_probability_range)
    new_texts.append(text1)
    new_labels.append(label1)
    phase2, text2, label2 = get_simulated_pattern(idx2)
    noisy_phase2 = generate_noisy_xrd(phase2, crystal_size_range, position_shift_std_range, intensity_variation_std_range, peak_loss_probability_range)
    new_texts.append(text2)
    new_labels.append(label2)
    ratio1 = np.random.uniform(0,1)
    ratio2 = 1. - ratio1
    two_mixed_xrd = ratio1 * noisy_phase1 + ratio2 * noisy_phase2
    two_mixed_xrd = two_mixed_xrd / np.max(two_mixed_xrd) * 100.  # Normalize to 100
    return two_mixed_xrd, new_texts, new_labels

# 1. Generate new CIFs through alloying the A/B/X-site of halide perovskite

In [2]:
folder = os.getcwd()
cif_folder = os.path.join(folder, 'cif')

mix_dic = {}
with open(os.path.join(cif_folder, 'cif_for_alloying.txt')) as file:
    cif_for_mix = [line.rstrip() for line in file]

for data in cif_for_mix:
    cif_name, A, B, X = data.split('\t')
    print(cif_name, A, B, X)
    for x, y in [(A,B),(A,X),(B,X)]:
        mix_element = list(set([A,B,X]) - set([x,y]))[0]
        mix_pos = [A,B,X].index(mix_element)
        try:
            sample_list = mix_dic['{}-{}'.format(x,y)]['sample_list']
            element_list = mix_dic['{}-{}'.format(x,y)]['element_list']
            sample_list.append(cif_name)
            element_list.append(mix_element)
            mix_dic['{}-{}'.format(x,y)]['sample_list'] = sample_list
            mix_dic['{}-{}'.format(x,y)]['element_list'] = element_list
        except:
            mix_dic['{}-{}'.format(x,y)] = {'sample_list':[cif_name], 
                                            'element_list':[mix_element],
                                            'mix_pos':mix_pos}

print(mix_dic)

161481 Cs Pb I
201251 Cs Pb Cl
231017 Cs Pb Br
235794 MA Pb Br
235795 FA Pb Br
250735 MA Pb I
250736 FA Pb I
{'Cs-Pb': {'sample_list': ['161481', '201251', '231017'], 'element_list': ['I', 'Cl', 'Br'], 'mix_pos': 2}, 'Cs-I': {'sample_list': ['161481'], 'element_list': ['Pb'], 'mix_pos': 1}, 'Pb-I': {'sample_list': ['161481', '250735', '250736'], 'element_list': ['Cs', 'MA', 'FA'], 'mix_pos': 0}, 'Cs-Cl': {'sample_list': ['201251'], 'element_list': ['Pb'], 'mix_pos': 1}, 'Pb-Cl': {'sample_list': ['201251'], 'element_list': ['Cs'], 'mix_pos': 0}, 'Cs-Br': {'sample_list': ['231017'], 'element_list': ['Pb'], 'mix_pos': 1}, 'Pb-Br': {'sample_list': ['231017', '235794', '235795'], 'element_list': ['Cs', 'MA', 'FA'], 'mix_pos': 0}, 'MA-Pb': {'sample_list': ['235794', '250735'], 'element_list': ['Br', 'I'], 'mix_pos': 2}, 'MA-Br': {'sample_list': ['235794'], 'element_list': ['Pb'], 'mix_pos': 1}, 'FA-Pb': {'sample_list': ['235795', '250736'], 'element_list': ['Br', 'I'], 'mix_pos': 2}, 'FA-Br'

In [3]:
for shared_element, data in mix_dic.items():
    sample_list = data['sample_list']
    element_list = data['element_list']
    mix_pos = data['mix_pos']
    shared_element_list = shared_element.split('-')
    
    if len(sample_list) > 1:
        sample_pair_list = [(sample1, sample2) for i, sample1 in enumerate(sample_list) for sample2 in sample_list[i + 1:]]
        
        for (sample1, sample2) in sample_pair_list:
            structure1 = Structure.from_file(os.path.join(cif_folder, sample1+'.cif'))
            abc1 = np.array(structure1.lattice.abc)
            structure2 = Structure.from_file(os.path.join(cif_folder, sample2+'.cif'))
            abc2 = np.array(structure2.lattice.abc)
            strain3D_list = np.linspace([0,0,0],abc2/abc1-1,6)[1:-1]
            
            for i, strain in enumerate(strain3D_list):
                formula = shared_element_list.copy()
                formula.insert(mix_pos, element_list[sample_list.index(sample2)]+'0.{}'.format(i*2+2))
                formula.insert(mix_pos, element_list[sample_list.index(sample1)]+'0.{}'.format(8-i*2))
                formula = ''.join(formula)
                print(formula)
                structure1.apply_strain(strain)
                structure1.to(os.path.join(cif_folder, 'mix', '{}.cif'.format(formula)))
                structure1.apply_strain(1/(1+strain)-1)

CsPbI0.8Cl0.2
CsPbI0.6Cl0.4
CsPbI0.4Cl0.6
CsPbI0.2Cl0.8
CsPbI0.8Br0.2
CsPbI0.6Br0.4
CsPbI0.4Br0.6
CsPbI0.2Br0.8
CsPbCl0.8Br0.2
CsPbCl0.6Br0.4
CsPbCl0.4Br0.6
CsPbCl0.2Br0.8
Cs0.8MA0.2PbI
Cs0.6MA0.4PbI
Cs0.4MA0.6PbI
Cs0.2MA0.8PbI
Cs0.8FA0.2PbI
Cs0.6FA0.4PbI
Cs0.4FA0.6PbI
Cs0.2FA0.8PbI
MA0.8FA0.2PbI
MA0.6FA0.4PbI
MA0.4FA0.6PbI
MA0.2FA0.8PbI
Cs0.8MA0.2PbBr
Cs0.6MA0.4PbBr
Cs0.4MA0.6PbBr
Cs0.2MA0.8PbBr
Cs0.8FA0.2PbBr
Cs0.6FA0.4PbBr
Cs0.4FA0.6PbBr
Cs0.2FA0.8PbBr
MA0.8FA0.2PbBr
MA0.6FA0.4PbBr
MA0.4FA0.6PbBr
MA0.2FA0.8PbBr
MAPbBr0.8I0.2
MAPbBr0.6I0.4
MAPbBr0.4I0.6
MAPbBr0.2I0.8
FAPbBr0.8I0.2
FAPbBr0.6I0.4
FAPbBr0.4I0.6
FAPbBr0.2I0.8


# 2. Apply lattice strain and create the dataset dictionary

In [4]:
folder = os.getcwd()
cif_folder = os.path.join(folder, 'cif')

xrd_calculator = XRDCalculator(wavelength="CuKa1")
strain1D_list = list(np.linspace(-0.002,0.002,5))
strain3D_list = np.array(np.meshgrid(strain1D_list,strain1D_list,strain1D_list)).T.reshape(-1,3)
possible_element = ['Cs','FA','MA','Pb','Sn','I','Br','Cl']

dataset = {}

for cif_file in os.listdir(os.path.join(cif_folder, 'mix')):
    if cif_file.endswith('.cif'):
        structure = Structure.from_file(os.path.join(cif_folder, 'mix', cif_file))
        space_group = SpacegroupAnalyzer(structure).get_crystal_system()
        formula = cif_file[:-4]
        element_list = []
        halide_list = []
        for element in possible_element:
            if element in formula:
                element_list.append(element)
                if element in ['I','Br','Cl']:
                    halide_list.append(element)
        if len(halide_list) > 1:
            first_halide_index = min(formula.index(halide_list[0]),formula.index(halide_list[-1]))
            formula = formula[:first_halide_index] + '(' + formula[first_halide_index:] + ')3'
        else:
            formula = formula + '3'
            
        print('Processing {}'.format(formula))
        xrd_list = []
        time_start = time.time()
        for strain in strain3D_list:
            structure.apply_strain(strain)
            xrd_pattern = xrd_calculator.get_pattern(structure)
            mask = (xrd_pattern.x >= 5) & (xrd_pattern.x <= 60)
            xrd_list.append((xrd_pattern.x[mask], xrd_pattern.y[mask]))
            structure.apply_strain(1/(1+strain)-1)
        print('Finished in {} s.'.format(time.time() - time_start))
        
        dataset[cif_file[:-4]] = {'formula':formula, 'element_list':element_list, 'space_group':space_group, 'xrd_list':xrd_list}
        print(formula,space_group,element_list)

Processing Cs0.2FA0.8PbBr3
Finished in 5.449138879776001 s.
Cs0.2FA0.8PbBr3 cubic ['Cs', 'FA', 'Pb', 'Br']
Processing Cs0.2FA0.8PbI3
Finished in 31.178075075149536 s.
Cs0.2FA0.8PbI3 tetragonal ['Cs', 'FA', 'Pb', 'I']
Processing Cs0.2MA0.8PbBr3
Finished in 9.861707925796509 s.
Cs0.2MA0.8PbBr3 cubic ['Cs', 'MA', 'Pb', 'Br']
Processing Cs0.2MA0.8PbI3
Finished in 10.385281324386597 s.
Cs0.2MA0.8PbI3 cubic ['Cs', 'MA', 'Pb', 'I']
Processing Cs0.4FA0.6PbBr3
Finished in 8.587735652923584 s.
Cs0.4FA0.6PbBr3 cubic ['Cs', 'FA', 'Pb', 'Br']
Processing Cs0.4FA0.6PbI3
Finished in 22.333346843719482 s.
Cs0.4FA0.6PbI3 tetragonal ['Cs', 'FA', 'Pb', 'I']
Processing Cs0.4MA0.6PbBr3
Finished in 7.956103086471558 s.
Cs0.4MA0.6PbBr3 cubic ['Cs', 'MA', 'Pb', 'Br']
Processing Cs0.4MA0.6PbI3
Finished in 9.578854084014893 s.
Cs0.4MA0.6PbI3 cubic ['Cs', 'MA', 'Pb', 'I']
Processing Cs0.6FA0.4PbBr3
Finished in 8.003965377807617 s.
Cs0.6FA0.4PbBr3 cubic ['Cs', 'FA', 'Pb', 'Br']
Processing Cs0.6FA0.4PbI3
Finished i

In [5]:
ICSD_label_list = []
with open(os.path.join(cif_folder, 'cif_label.txt')) as file:
    for line in file:
        ICSD_label_list.append(line.rstrip().split('\t'))
ICSD_label_list = np.array(ICSD_label_list)
print(ICSD_label_list)

[['21955' 'CsPbI3' 'gamma-CsPbI3']
 ['32306' 'CsPbI3' 'delta-CsPbI3']
 ['42013' 'PbI2' 'PbI2']
 ['44540' 'Cs4PbBr6' 'Cs4PbBr6']
 ['48997' 'CsPb2Br5' 'CsPb2Br5']
 ['56522' 'CsI' 'CsI']
 ['84525' 'CsPbBr3' 'orthorhombic-CsPbBr3']
 ['161481' 'CsPbI3' 'alpha-CsPbI3']
 ['230491' 'FAPbI3' 'delta-FAPbI3']
 ['230492' 'FAPbI3' 'beta-FAPbI3']
 ['235794' 'MAPbBr3' 'cubic-MAPbBr3']
 ['235795' 'FAPbBr3' 'cubic-FAPbBr3']
 ['236387' 'CsBr' 'CsBr']
 ['238610' 'MAPbI3' 'tetragonal-MAPbI3']
 ['239760' 'PbBr2' 'PbBr2']
 ['250736' 'FAPbI3' 'alpha-FAPbI3']
 ['33037' 'MAPbBr3' 'tetragonal-MAPbBr3']
 ['231017' 'CsPbBr3' 'cubic-CsPbBr3']
 ['250735' 'MAPbI3' 'cubic-MAPbI3']
 ['201251' 'CsPbCl3' 'cubic-CsPbCl3']
 ['241415' 'MAPbCl3' 'orthorhombic-MAPbCl3']
 ['243734' 'CsPbCl3' 'orthorhombic-CsPbCl3']]


In [6]:
for i, cif_file in enumerate(ICSD_label_list[:,0]):
    
    structure = Structure.from_file(os.path.join(cif_folder, cif_file+'.cif'))
    space_group = SpacegroupAnalyzer(structure).get_crystal_system()
    element_list = []
    for element in possible_element:
        if element in ICSD_label_list[i,1]:
            element_list.append(element)
    formula = ICSD_label_list[i,2]
    print(cif_file, element_list, formula)

    print('Processing {}'.format(formula))
    xrd_list = []
    time_start = time.time()
    for strain in strain3D_list:
        structure.apply_strain(strain)
        xrd_pattern = xrd_calculator.get_pattern(structure)
        mask = (xrd_pattern.x >= 5) & (xrd_pattern.x <= 60)
        xrd_list.append((xrd_pattern.x[mask], xrd_pattern.y[mask]))
        structure.apply_strain(1/(1+strain)-1)
    print('Finished in {} s.'.format(time.time() - time_start))

    dataset[cif_file] = {'formula':formula, 'element_list':element_list, 'space_group':space_group, 'xrd_list':xrd_list}
        
with open(os.path.join(cif_folder, 'dataset.npy'), 'wb') as handle:
    joblib.dump(dataset, handle)

21955 ['Cs', 'Pb', 'I'] gamma-CsPbI3
Processing gamma-CsPbI3
Finished in 40.204381465911865 s.
32306 ['Cs', 'Pb', 'I'] delta-CsPbI3
Processing delta-CsPbI3
Finished in 40.014403104782104 s.
42013 ['Pb', 'I'] PbI2
Processing PbI2
Finished in 4.693908929824829 s.
44540 ['Cs', 'Pb', 'Br'] Cs4PbBr6
Processing Cs4PbBr6
Finished in 169.41958451271057 s.
48997 ['Cs', 'Pb', 'Br'] CsPb2Br5
Processing CsPb2Br5
Finished in 47.40173864364624 s.
56522 ['Cs', 'I'] CsI
Processing CsI
Finished in 3.5373287200927734 s.
84525 ['Cs', 'Pb', 'Br'] orthorhombic-CsPbBr3
Processing orthorhombic-CsPbBr3
Finished in 32.4366717338562 s.
161481 ['Cs', 'Pb', 'I'] alpha-CsPbI3
Processing alpha-CsPbI3
Finished in 9.297422170639038 s.
230491 ['FA', 'Pb', 'I'] delta-FAPbI3
Processing delta-FAPbI3
Finished in 26.412991046905518 s.
230492 ['FA', 'Pb', 'I'] beta-FAPbI3
Processing beta-FAPbI3
Finished in 19.059160232543945 s.
235794 ['MA', 'Pb', 'Br'] cubic-MAPbBr3
Processing cubic-MAPbBr3
Finished in 8.483395099639893 s.