In [1]:
import pandas as pd
import numpy as np
import threading
import pandas as pd
import math

In [9]:
A_cations = ['Sm','Sc','Gd','Y','La','Pr','Nd','Tm','Rb','Dy','Pm','Yb',
             'Mg','Hf','Cs','Ce','Ca','Pb','Cd','Bi','Ba','Tl','Ag','Tb',
             'Sb','Eu','K','Sn','In','Na','Sr']
B_cations = ['Cr','Co','Fe','Nb','In','Er','Sn','Zr','Be','Ge','Hf','Ti','Ga','Si','Ta','Al','Bi','B','Sb','Ce']

In [8]:
X_anions = ['O']
A_ion_radii = {
    'Eu':1.437,'La':1.36,'Pr':1.304,'Nd':1.27,'Tm':1.168,'Rb':1.72,'Dy':1.192,'Pm':1.339,'Yb':1.343,'Mg':1.21,'Hf':1.07,
    'Cs':1.88,'Gd':1.212,'Ce':1.14,'Ca':1.34,'Pb':1.49,'Ba':1.61,'Sr':1.44,'Cd':1.31,'Y':1.223,'Sm':1.24,'Tl':1.70,'Bi':1.380,
    'In':1.596,'Ag':1.521,'Tb':1.201,'Sc':1.122,'Sb':1.234,'K':1.64,'Sn':1.408,'Na':1.39
}
B_ion_radii = {
    'Cr':0.615,'Nb':0.64,'In':0.80,'Er':0.89,'Sn':0.69,'Zr':0.72,'Be':0.45,'Ge':0.53,'Ti':0.605,'Si':0.40,'Sn':0.69,'Fe':0.645,
    'Co':0.61,'Sb':0.60,'Ta':0.64,'Al':0.535,'Bi':1.03,'B':0.27,'Ce':0.87,'Hf':0.71,'Ga':0.62
}
X_ion_radii = {'O':1.40}
nA_jiatai = {
    'Eu':2,'La':3,'Pr':3,'Nd':3,'Tm':3,'Rb':1,'Dy':3,'Pm':3,'Yb':2,'Mg':2,'Hf':4,'Cs':1,'Gd':3,'Ce':4,'Ca':2,'Pb':2,
    'Ba':2,'Cd':2,'Ag':1,'Tb':3,'Sc':3,'Sb':3,'K':1,'Sn':2,'Na':1,'Sr':2,'Cd':2,'Y':3,'Sm':3,'Tl':1,'Bi':3,'In':1
}

# no new_tolerance

In [9]:
# All possible chemical formulas after ion substitution
import concurrent.futures
from pymatgen.core import Composition
def generate_candidate_all(i,site):
    candidates = []
    for X in X_anions:
        if site == 'A':
            for A1 in A_cations:
                for A2 in A_cations:
                    for B in B_cations:
                        B1, B2 = B,B
                        b = 1
                        a = i
                        if (A1 != A2) and (A1 != B) and (A2 != B):
                            candidate = ''.join([X, '3', A1, str(a), A2, str(1 - a), B1, str(b), B2, str(1 - b)])
                            tmp_dict = {'formula': Composition(candidate).reduced_formula,
                                        'A1': A1, 'A1_ratio': a,'nA1':nA_jiatai[A1],
                                        'A2': A2, 'A2_ratio': 1 - a,'nA2':nA_jiatai[A2],
                                        'B1': B1, 'B1_ratio': b, 'B2': B2, 'B2_ratio': 1 - b,
                                        'X': X, 'X_ratio': 3,
                                        'Ra': A_ion_radii[A1] * a + A_ion_radii[A2] * (1 - a),
                                        'Rb': B_ion_radii[B1] * b + B_ion_radii[B2] * (1 - b),
                                        'Rx': X_ion_radii[X],
                                        'nA': round(nA_jiatai[A1] * a + nA_jiatai[A2] * (1 - a))}
                            candidates.append(tmp_dict)
        else:
            for A in A_cations:
                for B1 in B_cations:
                    for B2 in B_cations:
                        A1, A2 = A,A
                        a = 1
                        b = i 
                        if (B1 != B2) and (B1 != A) and (B2 != A):
                            candidate = ''.join([X, '3', A1, str(a), A2, str(1 - a), B1, str(b), B2, str(1 - b)])
                            tmp_dict = {'formula': Composition(candidate).reduced_formula,
                                        'A1': A1, 'A1_ratio': a,'nA1':nA_jiatai[A1],
                                        'A2': A2, 'A2_ratio': 1 - a,'nA2':nA_jiatai[A2],
                                        'B1': B1, 'B1_ratio': b, 'B2': B2, 'B2_ratio': 1 - b,
                                        'X': X, 'X_ratio': 3,
                                        'Ra': A_ion_radii[A1] * a + A_ion_radii[A2] * (1 - a),
                                        'Rb': B_ion_radii[B1] * b + B_ion_radii[B2] * (1 - b),
                                        'Rx': X_ion_radii[X],
                                        'nA': nA_jiatai[A1]}
                            candidates.append(tmp_dict)
    return candidates

In [12]:
def generate_candidates_concurrently_all(dope_range,site):
    all_candidates = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor:
        # Create all possible combinations of a and b values
        futures = [executor.submit(generate_candidate_all, a,site) 
                   for a in dope_range]
        for future in concurrent.futures.as_completed(futures):
            all_candidates.extend(future.result())
        result = pd.DataFrame(all_candidates)
    return result.drop_duplicates(subset=['formula'], keep='first').reset_index(drop=True)

# new_tolerance

In [36]:
# All possible chemical formulas after ion substitution (add tolerance factor screening)
import concurrent.futures
from pymatgen.core import Composition
def generate_candidate(i,site):
    candidates = []
    for X in X_anions:
        if site == 'A':
            for A1 in A_cations:
                for A2 in A_cations:
                    for B in B_cations:
                        B1, B2 = B,B
                        b = 1
                        a = i
                        if (A1 != A2) and (A1 != B) and (A2 != B):
                            candidate = ''.join([X, '3', A1, str(a), A2, str(1 - a), B1, str(b), B2, str(1 - b)])
                            tmp_dict = {'formula': Composition(candidate).reduced_formula,
                                        'A1': A1, 'A1_ratio': a,'nA1':nA_jiatai[A1],
                                        'A2': A2, 'A2_ratio': 1 - a,'nA2':nA_jiatai[A2],
                                        'B1': B1, 'B1_ratio': b, 'B2': B2, 'B2_ratio': 1 - b,
                                        'X': X, 'X_ratio': 3,
                                        'Ra': A_ion_radii[A1] * a + A_ion_radii[A2] * (1 - a),
                                        'Rb': B_ion_radii[B1] * b + B_ion_radii[B2] * (1 - b),
                                        'Rx': X_ion_radii[X],
                                        'nA': round(nA_jiatai[A1] * a + nA_jiatai[A2] * (1 - a))}
                            new_tor = tmp_dict['Rx']/tmp_dict['Rb'] - tmp_dict['nA']*(tmp_dict['nA'] - (tmp_dict['Ra']/tmp_dict['Rb'])/np.log1p(tmp_dict['Ra']/tmp_dict['Rb'] - 1))
                            if (tmp_dict['Ra'] > tmp_dict['Rb']) and (new_tor <= 4.18):
                                candidates.append(tmp_dict)
                            else:
                                continue
        else:
            for A in A_cations:
                for B1 in B_cations:
                    for B2 in B_cations:
                        A1, A2 = A,A
                        a = 1
                        b = i 
                        if (B1 != B2) and (B1 != A) and (B2 != A):
                            candidate = ''.join([X, '3', A1, str(a), A2, str(1 - a), B1, str(b), B2, str(1 - b)])
                            tmp_dict = {'formula': Composition(candidate).reduced_formula,
                                        'A1': A1, 'A1_ratio': a,'nA1':nA_jiatai[A1],
                                        'A2': A2, 'A2_ratio': 1 - a,'nA2':nA_jiatai[A2],
                                        'B1': B1, 'B1_ratio': b, 'B2': B2, 'B2_ratio': 1 - b,
                                        'X': X, 'X_ratio': 3,
                                        'Ra': A_ion_radii[A1] * a + A_ion_radii[A2] * (1 - a),
                                        'Rb': B_ion_radii[B1] * b + B_ion_radii[B2] * (1 - b),
                                        'Rx': X_ion_radii[X],
                                        'nA': nA_jiatai[A1]}
                            new_tor = tmp_dict['Rx']/tmp_dict['Rb'] - tmp_dict['nA']*(tmp_dict['nA'] - (tmp_dict['Ra']/tmp_dict['Rb'])/np.log1p(tmp_dict['Ra']/tmp_dict['Rb'] - 1))
                            if (tmp_dict['Ra'] > tmp_dict['Rb']) & (new_tor <= 4.18):
                                candidates.append(tmp_dict)
                            else:
                                continue
    return candidates

In [10]:
def generate_candidates_concurrently(dope_range,site):
    all_candidates = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor:
        # Create all possible combinations of a and b values
        futures = [executor.submit(generate_candidate, a,site) 
                   for a in dope_range]
        for future in concurrent.futures.as_completed(futures):
            all_candidates.extend(future.result())
        result = pd.DataFrame(all_candidates)
    return result.drop_duplicates(subset=['formula'], keep='first').reset_index(drop=True)

In [13]:
dope_range = np.arange(0, 1.1, 0.1)
site = 'A'
result_a_all = generate_candidates_concurrently_all(dope_range,site)
result_a_all

Unnamed: 0,formula,A1,A1_ratio,nA1,A2,A2_ratio,nA2,B1,B1_ratio,B2,B2_ratio,X,X_ratio,Ra,Rb,Rx,nA
0,Sm0.8Sc0.2Cr1O3,Sm,0.8,3,Sc,0.2,3,Cr,1,Cr,0,O,3,1.2164,0.615,1.4,3
1,Sm0.8Sc0.2Co1O3,Sm,0.8,3,Sc,0.2,3,Co,1,Co,0,O,3,1.2164,0.610,1.4,3
2,Sm0.8Sc0.2Fe1O3,Sm,0.8,3,Sc,0.2,3,Fe,1,Fe,0,O,3,1.2164,0.645,1.4,3
3,Sm0.8Sc0.2Nb1O3,Sm,0.8,3,Sc,0.2,3,Nb,1,Nb,0,O,3,1.2164,0.640,1.4,3
4,Sm0.8Sc0.2In1O3,Sm,0.8,3,Sc,0.2,3,In,1,In,0,O,3,1.2164,0.800,1.4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83934,K0.7Rb0.3Al1O3,K,0.7,1,Rb,0.3,1,Al,1,Al,0,O,3,1.6640,0.535,1.4,1
83935,K0.7Rb0.3Bi1O3,K,0.7,1,Rb,0.3,1,Bi,1,Bi,0,O,3,1.6640,1.030,1.4,1
83936,K0.7Rb0.3B1O3,K,0.7,1,Rb,0.3,1,B,1,B,0,O,3,1.6640,0.270,1.4,1
83937,K0.7Rb0.3Sb1O3,K,0.7,1,Rb,0.3,1,Sb,1,Sb,0,O,3,1.6640,0.600,1.4,1


In [14]:
result_b_all = generate_candidates_concurrently_all(dope_range,'B')
result_b_all

Unnamed: 0,formula,A1,A1_ratio,nA1,A2,A2_ratio,nA2,B1,B1_ratio,B2,B2_ratio,X,X_ratio,Ra,Rb,Rx,nA
0,Sm1Cr0.5Co0.5O3,Sm,1,3,Sm,0,3,Cr,0.5,Co,0.5,O,3,1.24,0.6125,1.4,3
1,Sm1Cr0.5Fe0.5O3,Sm,1,3,Sm,0,3,Cr,0.5,Fe,0.5,O,3,1.24,0.6300,1.4,3
2,Sm1Nb0.5Cr0.5O3,Sm,1,3,Sm,0,3,Cr,0.5,Nb,0.5,O,3,1.24,0.6275,1.4,3
3,Sm1Cr0.5In0.5O3,Sm,1,3,Sm,0,3,Cr,0.5,In,0.5,O,3,1.24,0.7075,1.4,3
4,Sm1Er0.5Cr0.5O3,Sm,1,3,Sm,0,3,Cr,0.5,Er,0.5,O,3,1.24,0.7525,1.4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52578,SrAlO3,Sr,1,2,Sr,0,2,Al,1.0,Cr,0.0,O,3,1.44,0.5350,1.4,2
52579,SrBiO3,Sr,1,2,Sr,0,2,Bi,1.0,Cr,0.0,O,3,1.44,1.0300,1.4,2
52580,SrBO3,Sr,1,2,Sr,0,2,B,1.0,Cr,0.0,O,3,1.44,0.2700,1.4,2
52581,SrSbO3,Sr,1,2,Sr,0,2,Sb,1.0,Cr,0.0,O,3,1.44,0.6000,1.4,2


In [38]:
dope_range = np.arange(0, 1.1, 0.1)
site = 'A'
result_a_2 = generate_candidates_concurrently(dope_range,site)
result_a_2

Unnamed: 0,formula,A1,A1_ratio,nA1,A2,A2_ratio,nA2,B1,B1_ratio,B2,B2_ratio,X,X_ratio,Ra,Rb,Rx,nA
0,Sm0.2Sc0.8Cr1O3,Sm,0.2,3,Sc,0.8,3,Cr,1,Cr,0,O,3,1.1456,0.615,1.4,3
1,Sm0.2Sc0.8Co1O3,Sm,0.2,3,Sc,0.8,3,Co,1,Co,0,O,3,1.1456,0.610,1.4,3
2,Sm0.2Sc0.8Fe1O3,Sm,0.2,3,Sc,0.8,3,Fe,1,Fe,0,O,3,1.1456,0.645,1.4,3
3,Sm0.2Sc0.8Nb1O3,Sm,0.2,3,Sc,0.8,3,Nb,1,Nb,0,O,3,1.1456,0.640,1.4,3
4,Sm0.2Sc0.8Sn1O3,Sm,0.2,3,Sc,0.8,3,Sn,1,Sn,0,O,3,1.1456,0.690,1.4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64447,K0.8Rb0.2Ga1O3,K,0.8,1,Rb,0.2,1,Ga,1,Ga,0,O,3,1.6560,0.620,1.4,1
64448,K0.8Rb0.2Ta1O3,K,0.8,1,Rb,0.2,1,Ta,1,Ta,0,O,3,1.6560,0.640,1.4,1
64449,K0.8Rb0.2Bi1O3,K,0.8,1,Rb,0.2,1,Bi,1,Bi,0,O,3,1.6560,1.030,1.4,1
64450,K0.8Rb0.2Sb1O3,K,0.8,1,Rb,0.2,1,Sb,1,Sb,0,O,3,1.6560,0.600,1.4,1


In [42]:
# result_a_2.to_csv('/home/zhouhairui/paper_code/a_replace_data.csv')

In [39]:
result_b_2 = generate_candidates_concurrently(dope_range,'B')
result_b_2

Unnamed: 0,formula,A1,A1_ratio,nA1,A2,A2_ratio,nA2,B1,B1_ratio,B2,B2_ratio,X,X_ratio,Ra,Rb,Rx,nA
0,Sm1Cr0.8Co0.2O3,Sm,1,3,Sm,0,3,Cr,0.8,Co,0.2,O,3,1.24,0.614,1.4,3
1,Sm1Cr0.8Fe0.2O3,Sm,1,3,Sm,0,3,Cr,0.8,Fe,0.2,O,3,1.24,0.621,1.4,3
2,Sm1Nb0.2Cr0.8O3,Sm,1,3,Sm,0,3,Cr,0.8,Nb,0.2,O,3,1.24,0.620,1.4,3
3,Sm1Cr0.8In0.2O3,Sm,1,3,Sm,0,3,Cr,0.8,In,0.2,O,3,1.24,0.652,1.4,3
4,Sm1Er0.2Cr0.8O3,Sm,1,3,Sm,0,3,Cr,0.8,Er,0.2,O,3,1.24,0.670,1.4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43177,SrGaO3,Sr,1,2,Sr,0,2,Ga,1.0,Cr,0.0,O,3,1.44,0.620,1.4,2
43178,SrTaO3,Sr,1,2,Sr,0,2,Ta,1.0,Cr,0.0,O,3,1.44,0.640,1.4,2
43179,SrAlO3,Sr,1,2,Sr,0,2,Al,1.0,Cr,0.0,O,3,1.44,0.535,1.4,2
43180,SrSbO3,Sr,1,2,Sr,0,2,Sb,1.0,Cr,0.0,O,3,1.44,0.600,1.4,2


In [43]:
# result_b_2.to_csv('/home/zhouhairui/paper_code/b_replace_data.csv')

In [18]:
# result_b.to_csv('b_dop_data_2.csv')

In [40]:
df_2 = pd.concat([result_a_2, result_b_2],axis = 0).reset_index(drop=True)
df_2

Unnamed: 0,formula,A1,A1_ratio,nA1,A2,A2_ratio,nA2,B1,B1_ratio,B2,B2_ratio,X,X_ratio,Ra,Rb,Rx,nA
0,Sm0.2Sc0.8Cr1O3,Sm,0.2,3,Sc,0.8,3,Cr,1.0,Cr,0.0,O,3,1.1456,0.615,1.4,3
1,Sm0.2Sc0.8Co1O3,Sm,0.2,3,Sc,0.8,3,Co,1.0,Co,0.0,O,3,1.1456,0.610,1.4,3
2,Sm0.2Sc0.8Fe1O3,Sm,0.2,3,Sc,0.8,3,Fe,1.0,Fe,0.0,O,3,1.1456,0.645,1.4,3
3,Sm0.2Sc0.8Nb1O3,Sm,0.2,3,Sc,0.8,3,Nb,1.0,Nb,0.0,O,3,1.1456,0.640,1.4,3
4,Sm0.2Sc0.8Sn1O3,Sm,0.2,3,Sc,0.8,3,Sn,1.0,Sn,0.0,O,3,1.1456,0.690,1.4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107629,SrGaO3,Sr,1.0,2,Sr,0.0,2,Ga,1.0,Cr,0.0,O,3,1.4400,0.620,1.4,2
107630,SrTaO3,Sr,1.0,2,Sr,0.0,2,Ta,1.0,Cr,0.0,O,3,1.4400,0.640,1.4,2
107631,SrAlO3,Sr,1.0,2,Sr,0.0,2,Al,1.0,Cr,0.0,O,3,1.4400,0.535,1.4,2
107632,SrSbO3,Sr,1.0,2,Sr,0.0,2,Sb,1.0,Cr,0.0,O,3,1.4400,0.600,1.4,2


In [44]:
# df_2.to_csv('/home/zhouhairui/paper_code/replacement_data.csv')