In [2]:
from pymatgen.core import Composition
import pandas as pd
import math
import numpy as np
# No warnings about setting value on copy of slice
pd.options.mode.chained_assignment = None

# Display up to 60 columns of a dataframe
pd.set_option('display.max_columns', 60)

# Matplotlib visualization
import matplotlib.pyplot as plt
from matplotlib import rcParams

# Internal ipython tool for setting figure size
from IPython.core.pylabtools import figsize

# Seaborn for visualization
import seaborn as sns

from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

config = {
    "mathtext.fontset":'stix',
    "font.family":'serif',
    "font.serif": ['Times New Roman'],
    "font.size": 24,
    'axes.unicode_minus': False 
}
rcParams.update(config)
plt.rcParams['axes.unicode_minus'] = False  
large = 22; med = 26; small = 12
params = {'axes.titlesize': large,
          'legend.fontsize': med,
          'figure.figsize': (8, 6),
          'axes.labelsize': med,
          'axes.titlesize': med,
          'xtick.labelsize': med,
          'ytick.labelsize': med,
          'figure.titlesize': large}
plt.rcParams.update(params)
plt.rcParams['figure.dpi'] = 400 

In [3]:
def generate_candidate(i):
    candidates = []    
    for A in A_cations:
        for B in B_cations:
            for C in C_cations:
                for X in X_cations: 
                    if A != B and A!=C and B!=C and A!=X and B!=X and C!=X:
                        a = 2
                        b = 1
                        c = 1
                        x = 6
                        candidate = ''.join([ A, str(a), B, C,X, str(x)])
                        Ra= ion_radii_a[A] 
                        Rb= ion_radii_b[B] 
                        Rc= ion_radii_c[C] 
                        Rx= ion_radii_x[X] 
                        nA = i
                        RB = (Rb + Rc) / 2
                        if RB == 0 or math.log(Ra / RB)==0:
                            continue
                        Tf = (Rx / RB) - nA * (nA - (Ra / RB) / math.log(Ra / RB))
                             
                        tmp_dict = {'formula': candidate,
                                    #'formula': Composition(candidate).reduced_formula,
                                    'A': A,  
                                    'B': B,  
                                    'C': C,  
                                    'X': X,
                                    'Ra': ion_radii_a[A], 
                                    'Rb': ion_radii_b[B], 
                                    'Rc': ion_radii_c[C], 
                                    'RB': (Rb + Rc) / 2,
                                    'Rx': ion_radii_x[X],
                                    'Tf': (Rx/RB) - nA* (nA-(Ra/RB)/math.log(Ra/RB)) }
                    #if Ra>RB and Tf < 4.18:
                        candidates.append(tmp_dict) 

    return candidates

In [4]:
import concurrent.futures
def generate_candidates_concurrently(i):
    all_candidates = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor:
        # Create all possible combinations of a and b values
        futures = [executor.submit(generate_candidate,i)]
        for future in concurrent.futures.as_completed(futures):
            all_candidates.extend(future.result())
        result = pd.DataFrame(all_candidates)
    return result.drop_duplicates(subset=['formula'], keep='first').reset_index(drop=True)

In [5]:
#A⁺
A_cations = ['Li', 'Na', 'K', 'Rb','Ag','Cs','Tl']
#B⁺
B_cations = ['Li','Na','K','Cu','Rb','Ag','Cs', 'Au','Hg','Tl']
#B'³⁺
C_cations = ['B','N','Al','P','Sc','Ti','V','Cr','Mn','Fe','Co', 'Ni','Cu','Ga',
             'As','Y','Nb','Mo','Ru','Rh','Pd','Ag','In','Sb','Ta','Ir','Au','Tl','Bi',
              'La','Ce','Pr','Nd','Pm','Sm','Eu','Gd','Tb','Dy','Ho', 'Er','Tm', 'Yb','Lu', 'U']
X_cations = ['F', 'Cl', 'Br','I']
ion_radii_a = {
    'Li':1.159, 'Na':1.392, 'K':1.641, 'Rb':1.722, 'Ag':1.521, 'Cs':1.882, 'Tl':1.698 }
ion_radii_b = {
    'Li':0.756, 'Na':1.028, 'K':1.388, 'Cu':0.774, 'Rb':1.517, 'Ag':1.152 , 'Cs':1.673, 'Au':1.362 ,'Hg':1.37 ,'Tl':1.493, 'Fr':1.8}
ion_radii_c = { 
    'B':0.266, 'N':0.166, 'Al':0.537, 'P':0.444, 'Sc':0.741, 'Ti':0.673, 'V':0.636, 'Cr':0.618,'Mn':0.645, 'Fe':0.646, 'Co':0.612, 'Ni':0.601,'Cu':0.62, 'Ga':0.62,
    'As':0.583,'Y':0.899,'Nb':0.724,'Mo':0.691,'Ru':0.676,'Rh':0.672,'Pd':0.851,'Ag':0.757,'In':0.795,'Sb':0.861,'Ta':0.726,'Ir':0.687,'Au':0.842,'Tl':0.884,'Bi':1.03,
    'La':1.032,'Ce':1.01,'Pr':0.993,'Nd':0.98,'Pm':0.969,'Sm':0.958,'Eu':0.947,'Tb':0.924,'Gd':0.936,'Dy':0.912,'Ho':0.9, 'Er':0.889,'Tm':0.879, 'Yb':0.868,'Lu':0.857, 
    'Ac':1.11,'Pa':1.04,'U':1.024, 'Np':1.01,'Pu':1.0, 'Am':0.98,'Cm':0.97 }
ion_radii_x = {'F':1.331, 'Cl':1.806, 'Br': 1.955, 'I':2.194}

In [7]:
i = 1
group1 = generate_candidates_concurrently(i)
group1

Unnamed: 0,formula,A,B,C,X,Ra,Rb,Rc,RB,Rx,Tf
0,Li2NaBF6,Li,Na,B,F,1.159,1.028,0.266,0.6470,1.331,4.129996
1,Li2NaBCl6,Li,Na,B,Cl,1.159,1.028,0.266,0.6470,1.806,4.864153
2,Li2NaBBr6,Li,Na,B,Br,1.159,1.028,0.266,0.6470,1.955,5.094447
3,Li2NaBI6,Li,Na,B,I,1.159,1.028,0.266,0.6470,2.194,5.463844
4,Li2NaNF6,Li,Na,N,F,1.159,1.028,0.166,0.5970,1.331,4.155899
...,...,...,...,...,...,...,...,...,...,...,...
11155,Tl2HgLuI6,Tl,Hg,Lu,I,1.698,1.370,0.857,1.1135,2.194,4.584411
11156,Tl2HgUF6,Tl,Hg,U,F,1.698,1.370,1.024,1.1970,1.331,4.169194
11157,Tl2HgUCl6,Tl,Hg,U,Cl,1.698,1.370,1.024,1.1970,1.806,4.566020
11158,Tl2HgUBr6,Tl,Hg,U,Br,1.698,1.370,1.024,1.1970,1.955,4.690498


In [8]:
group1['number'] = 1
group1

Unnamed: 0,formula,A,B,C,X,Ra,Rb,Rc,RB,Rx,Tf,number
0,Li2NaBF6,Li,Na,B,F,1.159,1.028,0.266,0.6470,1.331,4.129996,1
1,Li2NaBCl6,Li,Na,B,Cl,1.159,1.028,0.266,0.6470,1.806,4.864153,1
2,Li2NaBBr6,Li,Na,B,Br,1.159,1.028,0.266,0.6470,1.955,5.094447,1
3,Li2NaBI6,Li,Na,B,I,1.159,1.028,0.266,0.6470,2.194,5.463844,1
4,Li2NaNF6,Li,Na,N,F,1.159,1.028,0.166,0.5970,1.331,4.155899,1
...,...,...,...,...,...,...,...,...,...,...,...,...
11155,Tl2HgLuI6,Tl,Hg,Lu,I,1.698,1.370,0.857,1.1135,2.194,4.584411,1
11156,Tl2HgUF6,Tl,Hg,U,F,1.698,1.370,1.024,1.1970,1.331,4.169194,1
11157,Tl2HgUCl6,Tl,Hg,U,Cl,1.698,1.370,1.024,1.1970,1.806,4.566020,1
11158,Tl2HgUBr6,Tl,Hg,U,Br,1.698,1.370,1.024,1.1970,1.955,4.690498,1


In [9]:
group1 = group1[(group1['Ra'] > group1['RB']) & (group1['Tf'] < 4.18)]
group1

Unnamed: 0,formula,A,B,C,X,Ra,Rb,Rc,RB,Rx,Tf,number
0,Li2NaBF6,Li,Na,B,F,1.159,1.028,0.266,0.6470,1.331,4.129996,1
4,Li2NaNF6,Li,Na,N,F,1.159,1.028,0.166,0.5970,1.331,4.155899,1
368,Li2CuAlF6,Li,Cu,Al,F,1.159,0.774,0.537,0.6555,1.331,4.132934,1
372,Li2CuPF6,Li,Cu,P,F,1.159,0.774,0.444,0.6090,1.331,4.143026,1
388,Li2CuCrF6,Li,Cu,Cr,F,1.159,0.774,0.618,0.6960,1.331,4.177749,1
...,...,...,...,...,...,...,...,...,...,...,...,...
11140,Tl2HgErF6,Tl,Hg,Er,F,1.698,1.370,0.889,1.1295,1.331,3.865934,1
11144,Tl2HgTmF6,Tl,Hg,Tm,F,1.698,1.370,0.879,1.1245,1.331,3.847695,1
11148,Tl2HgYbF6,Tl,Hg,Yb,F,1.698,1.370,0.868,1.1190,1.331,3.828230,1
11152,Tl2HgLuF6,Tl,Hg,Lu,F,1.698,1.370,0.857,1.1135,1.331,3.809377,1


In [10]:
import math

def generate_unique_candidates(i):
    candidates = []
    generated_formulas = set()

    for A in A_cations:
        for B in B_cations:
            for C in C_cations:
                for X in X_cations: 
                    if A != B and A != C and B != C and A != X and B != X and C != X:
                        a = 2
                        b = 1
                        c = 1
                        x = 6
                        candidate = f"{A}{a}{B}{C}{X}{x}"
                        sorted_candidate = f"{A}{a}{sorted([B, C])}{X}{x}"

                        if sorted_candidate in generated_formulas:
                            continue

                        Ra = ion_radii_a[A]
                        Rb = ion_radii_b[B]
                        Rc = ion_radii_c[C]
                        Rx = ion_radii_x[X]
                        nA = i
                        RB = (Rb + Rc) / 2
                        if RB == 0 or math.isclose(math.log(Ra / RB), 0):
                            continue
                        Tf = (Rx / RB) - nA * (nA - (Ra / RB) / math.log(Ra / RB))

                        tmp_dict = {
                            'formula': candidate,
                            'A': A,
                            'B': B,
                            'C': C,
                            'X': X,
                            'Ra': Ra,
                            'Rb': Rb,
                            'Rc': Rc,
                            'RB': RB,
                            'Rx': Rx,
                            'Tf': Tf
                        }

                        candidates.append(tmp_dict)
                        generated_formulas.add(candidate)
                        generated_formulas.add(sorted_candidate)

    return candidates

In [11]:
import concurrent.futures
def generate_candidates_concurrently_re(i):
    all_candidates = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=128) as executor:
        # Create all possible combinations of a and b values
        futures = [executor.submit(generate_unique_candidates,i)]
        for future in concurrent.futures.as_completed(futures):
            all_candidates.extend(future.result())
        result = pd.DataFrame(all_candidates)
    return result.drop_duplicates(subset=['formula'], keep='first').reset_index(drop=True)

In [12]:
#A⁺
A_cations = ['Li', 'Na', 'K', 'Rb','Ag','Cs','Tl']
#B²⁺
B_cations = ['Be','Mg','Ca','Sc','Ti','V','Cr','Mn','Fe','Co', 'Ni','Cu','Zn','Ge',
             'Sr','Y','Nb','Pd','Ag','Cd','In','Sn','Ba','Pt','Hg','Pb',
             'La','Ce','Pr','Sm','Eu','Dy','Tm','Yb']
#B'²⁺
C_cations = ['Be','Mg','Ca','Sc','Ti','V','Cr','Mn','Fe','Co', 'Ni','Cu','Zn','Ge',
             'Sr','Y','Nb','Pd','Ag','Cd','In','Sn','Ba','Pt','Hg','Pb',
             'La','Ce','Pr','Sm','Eu','Dy','Tm','Yb']

ion_radii_a = { 'Li':1.159, 'Na':1.392, 'K':1.641, 'Rb':1.722, 'Ag':1.521, 'Cs':1.882, 'Tl':1.698 }

ion_radii_b = {
                'Be':0.449,'Mg':0.724,'Ca':0.999,'Sc':0.936,'Ti':0.856,'V':0.793,'Cr':0.801,'Mn':0.828,'Fe':0.784,'Co':0.742, 'Ni':0.695,'Cu':0.726,'Zn':0.743,'Ge':0.73,
             'Sr':1.179,'Y':1.113,'Nb':0.927,'Pd':0.866,'Ag':0.936,'Cd':0.951,'In':0.989,'Sn':1.02,'Ba':1.34,'Pt':0.984,'Hg':1.03,'Pb':1.18,
            'La':1.31,'Ce':1.287,'Pr':1.26,'Sm':1.19,'Eu':1.16,'Dy':1.07,
               'Tm':1.03,'Yb':1.02,'Np':1.10  }
ion_radii_c = {
                'Be':0.449,'Mg':0.724,'Ca':0.999,'Sc':0.936,'Ti':0.856,'V':0.793,'Cr':0.801,'Mn':0.828,'Fe':0.784,'Co':0.742, 'Ni':0.695,'Cu':0.726,'Zn':0.743,'Ge':0.73,
             'Sr':1.179,'Y':1.113,'Nb':0.927,'Pd':0.866,'Ag':0.936,'Cd':0.951,'In':0.989,'Sn':1.02,'Ba':1.34,'Pt':0.984,'Hg':1.03,'Pb':1.18,
            'La':1.31,'Ce':1.287,'Pr':1.26,'Sm':1.19,'Eu':1.16,'Dy':1.07,
               'Tm':1.03,'Yb':1.02,'Np':1.10  }

ion_radii_x = {'F':1.331, 'Cl':1.806, 'Br': 1.955, 'I':2.194}

In [14]:
group2 = generate_candidates_concurrently_re(i)
group2

Unnamed: 0,formula,A,B,C,X,Ra,Rb,Rc,RB,Rx,Tf
0,Li2BeMgF6,Li,Be,Mg,F,1.159,0.449,0.724,0.5865,1.331,4.170603
1,Li2BeMgCl6,Li,Be,Mg,Cl,1.159,0.449,0.724,0.5865,1.806,4.980492
2,Li2BeMgBr6,Li,Be,Mg,Br,1.159,0.449,0.724,0.5865,1.955,5.234542
3,Li2BeMgI6,Li,Be,Mg,I,1.159,0.449,0.724,0.5865,2.194,5.642044
4,Li2BeCaF6,Li,Be,Ca,F,1.159,0.449,0.999,0.7240,1.331,4.240642
...,...,...,...,...,...,...,...,...,...,...,...
15571,Tl2DyYbI6,Tl,Dy,Yb,I,1.698,1.070,1.020,1.0450,2.194,4.446794
15572,Tl2TmYbF6,Tl,Tm,Yb,F,1.698,1.030,1.020,1.0250,1.331,3.580473
15573,Tl2TmYbCl6,Tl,Tm,Yb,Cl,1.698,1.030,1.020,1.0250,1.806,4.043888
15574,Tl2TmYbBr6,Tl,Tm,Yb,Br,1.698,1.030,1.020,1.0250,1.955,4.189254


In [15]:
group2['number'] = 2
group2

Unnamed: 0,formula,A,B,C,X,Ra,Rb,Rc,RB,Rx,Tf,number
0,Li2BeMgF6,Li,Be,Mg,F,1.159,0.449,0.724,0.5865,1.331,4.170603,2
1,Li2BeMgCl6,Li,Be,Mg,Cl,1.159,0.449,0.724,0.5865,1.806,4.980492,2
2,Li2BeMgBr6,Li,Be,Mg,Br,1.159,0.449,0.724,0.5865,1.955,5.234542,2
3,Li2BeMgI6,Li,Be,Mg,I,1.159,0.449,0.724,0.5865,2.194,5.642044,2
4,Li2BeCaF6,Li,Be,Ca,F,1.159,0.449,0.999,0.7240,1.331,4.240642,2
...,...,...,...,...,...,...,...,...,...,...,...,...
15571,Tl2DyYbI6,Tl,Dy,Yb,I,1.698,1.070,1.020,1.0450,2.194,4.446794,2
15572,Tl2TmYbF6,Tl,Tm,Yb,F,1.698,1.030,1.020,1.0250,1.331,3.580473,2
15573,Tl2TmYbCl6,Tl,Tm,Yb,Cl,1.698,1.030,1.020,1.0250,1.806,4.043888,2
15574,Tl2TmYbBr6,Tl,Tm,Yb,Br,1.698,1.030,1.020,1.0250,1.955,4.189254,2


In [16]:
group2 = group2[(group2['Ra'] > group2['RB']) & (group2['Tf'] < 4.18)]
group2

Unnamed: 0,formula,A,B,C,X,Ra,Rb,Rc,RB,Rx,Tf,number
0,Li2BeMgF6,Li,Be,Mg,F,1.159,0.449,0.724,0.5865,1.331,4.170603,2
8,Li2BeScF6,Li,Be,Sc,F,1.159,0.449,0.936,0.6925,1.331,4.171791,2
12,Li2BeTiF6,Li,Be,Ti,F,1.159,0.449,0.856,0.6525,1.331,4.131648,2
16,Li2BeVF6,Li,Be,V,F,1.159,0.449,0.793,0.6210,1.331,4.134342,2
20,Li2BeCrF6,Li,Be,Cr,F,1.159,0.449,0.801,0.6250,1.331,4.132379,2
...,...,...,...,...,...,...,...,...,...,...,...,...
15565,Tl2DyTmCl6,Tl,Dy,Tm,Cl,1.698,1.070,1.030,1.0500,1.806,4.084415,2
15568,Tl2DyYbF6,Tl,Dy,Yb,F,1.698,1.070,1.020,1.0450,1.331,3.620956,2
15569,Tl2DyYbCl6,Tl,Dy,Yb,Cl,1.698,1.070,1.020,1.0450,1.806,4.075502,2
15572,Tl2TmYbF6,Tl,Tm,Yb,F,1.698,1.030,1.020,1.0250,1.331,3.580473,2


In [17]:
#A²⁺
A_cations = ['Ca','Mn','Fe' ,'Cu','Sr','Cd','Ba','Hg','Pb', 'La','Pr','Nd','Eu','Tm','Yb']
#B⁺
B_cations = ['Li','Na','K','Cu','Rb','Ag','Cs', 'Au','Hg','Tl']
#B'⁺
C_cations = ['Li','Na','K','Cu','Rb','Ag','Cs', 'Au','Hg','Tl']

ion_radii_a = { 'Ca':1.336,'Mn': 1.205, 'Fe':1.186,'Cu':1.151,'Sr':1.444,'Cd':1.314,'Ba':1.61,'Hg':1.349,'Pb':1.492,
               'Ra':1.699,'La':1.581,'Pr':1.527,'Nd':1.501,'Eu':1.437,'Tm':1.352,'Yb':1.342}
ion_radii_b = {
     'Li':0.756, 'Na':1.028, 'K':1.388, 'Cu':0.774, 'Rb':1.517, 'Ag':1.552 , 'Cs':1.673, 'Au':1.362 ,'Hg':1.370 ,'Tl':1.493, 'Fr':1.798}
ion_radii_c = {
    'Li':0.756, 'Na':1.028, 'K':1.388, 'Cu':0.774, 'Rb':1.517, 'Ag':1.552 , 'Cs':1.673, 'Au':1.362 ,'Hg':1.370 ,'Tl':1.493, 'Fr':1.798}
ion_radii_x = {'F':1.331, 'Cl':1.806, 'Br': 1.955, 'I':2.194}

In [19]:
group3 = generate_candidates_concurrently_re(2)
group3

Unnamed: 0,formula,A,B,C,X,Ra,Rb,Rc,RB,Rx,Tf
0,Ca2LiNaF6,Ca,Li,Na,F,1.336,0.756,1.028,0.8920,1.331,4.907360
1,Ca2LiNaCl6,Ca,Li,Na,Cl,1.336,0.756,1.028,0.8920,1.806,5.439871
2,Ca2LiNaBr6,Ca,Li,Na,Br,1.336,0.756,1.028,0.8920,1.955,5.606912
3,Ca2LiNaI6,Ca,Li,Na,I,1.336,0.756,1.028,0.8920,2.194,5.874849
4,Ca2LiKF6,Ca,Li,K,F,1.336,0.756,1.388,1.0720,1.331,8.563394
...,...,...,...,...,...,...,...,...,...,...,...
2623,Yb2AuTlI6,Yb,Au,Tl,I,1.342,1.362,1.493,1.4275,2.194,-32.905078
2624,Yb2HgTlF6,Yb,Hg,Tl,F,1.342,1.370,1.493,1.4315,1.331,-32.111468
2625,Yb2HgTlCl6,Yb,Hg,Tl,Cl,1.342,1.370,1.493,1.4315,1.806,-31.779648
2626,Yb2HgTlBr6,Yb,Hg,Tl,Br,1.342,1.370,1.493,1.4315,1.955,-31.675561


In [20]:
group3['number'] = 3
group3

Unnamed: 0,formula,A,B,C,X,Ra,Rb,Rc,RB,Rx,Tf,number
0,Ca2LiNaF6,Ca,Li,Na,F,1.336,0.756,1.028,0.8920,1.331,4.907360,3
1,Ca2LiNaCl6,Ca,Li,Na,Cl,1.336,0.756,1.028,0.8920,1.806,5.439871,3
2,Ca2LiNaBr6,Ca,Li,Na,Br,1.336,0.756,1.028,0.8920,1.955,5.606912,3
3,Ca2LiNaI6,Ca,Li,Na,I,1.336,0.756,1.028,0.8920,2.194,5.874849,3
4,Ca2LiKF6,Ca,Li,K,F,1.336,0.756,1.388,1.0720,1.331,8.563394,3
...,...,...,...,...,...,...,...,...,...,...,...,...
2623,Yb2AuTlI6,Yb,Au,Tl,I,1.342,1.362,1.493,1.4275,2.194,-32.905078,3
2624,Yb2HgTlF6,Yb,Hg,Tl,F,1.342,1.370,1.493,1.4315,1.331,-32.111468,3
2625,Yb2HgTlCl6,Yb,Hg,Tl,Cl,1.342,1.370,1.493,1.4315,1.806,-31.779648,3
2626,Yb2HgTlBr6,Yb,Hg,Tl,Br,1.342,1.370,1.493,1.4315,1.955,-31.675561,3


In [21]:
group3 = group3[(group3['Ra'] > group3['RB']) & (group3['Tf'] < 4.18)]
group3

Unnamed: 0,formula,A,B,C,X,Ra,Rb,Rc,RB,Rx,Tf,number
8,Ca2LiCuF6,Ca,Li,Cu,F,1.336,0.756,0.774,0.765,1.331,4.004331,3
692,Sr2LiCuF6,Sr,Li,Cu,F,1.444,0.756,0.774,0.765,1.331,3.682234,3
872,Cd2LiCuF6,Cd,Li,Cu,F,1.314,0.756,0.774,0.765,1.331,4.09029,3
1044,Ba2LiNaF6,Ba,Li,Na,F,1.61,0.756,1.028,0.892,1.331,3.605146,3
1045,Ba2LiNaCl6,Ba,Li,Na,Cl,1.61,0.756,1.028,0.892,1.806,4.137658,3
1052,Ba2LiCuF6,Ba,Li,Cu,F,1.61,0.756,0.774,0.765,1.331,3.396466,3
1053,Ba2LiCuCl6,Ba,Li,Cu,Cl,1.61,0.756,0.774,0.765,1.806,4.017381,3
1084,Ba2NaCuF6,Ba,Na,Cu,F,1.61,1.028,0.774,0.901,1.331,3.633844,3
1085,Ba2NaCuCl6,Ba,Na,Cu,Cl,1.61,1.028,0.774,0.901,1.806,4.161036,3
1232,Hg2LiCuF6,Hg,Li,Cu,F,1.349,0.756,0.774,0.765,1.331,3.957306,3


In [22]:
group = pd.concat([group1, group2, group3],axis = 0).reset_index(drop=True)
group

Unnamed: 0,formula,A,B,C,X,Ra,Rb,Rc,RB,Rx,Tf,number
0,Li2NaBF6,Li,Na,B,F,1.159,1.028,0.266,0.6470,1.331,4.129996,1
1,Li2NaNF6,Li,Na,N,F,1.159,1.028,0.166,0.5970,1.331,4.155899,1
2,Li2CuAlF6,Li,Cu,Al,F,1.159,0.774,0.537,0.6555,1.331,4.132934,1
3,Li2CuPF6,Li,Cu,P,F,1.159,0.774,0.444,0.6090,1.331,4.143026,1
4,Li2CuCrF6,Li,Cu,Cr,F,1.159,0.774,0.618,0.6960,1.331,4.177749,1
...,...,...,...,...,...,...,...,...,...,...,...,...
9081,Nd2LiCuF6,Nd,Li,Cu,F,1.501,0.756,0.774,0.7650,1.331,3.562004,3
9082,Nd2NaCuF6,Nd,Na,Cu,F,1.501,1.028,0.774,0.9010,1.331,4.005409,3
9083,Eu2LiCuF6,Eu,Li,Cu,F,1.437,0.756,0.774,0.7650,1.331,3.699009,3
9084,Tm2LiCuF6,Tm,Li,Cu,F,1.352,0.756,0.774,0.7650,1.331,3.946825,3


In [24]:
group_new = group.drop_duplicates(subset='formula', keep='first').reset_index(drop=True)
group_new

Unnamed: 0,formula,A,B,C,X,Ra,Rb,Rc,RB,Rx,Tf,number
0,Li2NaBF6,Li,Na,B,F,1.159,1.028,0.266,0.6470,1.331,4.129996,1
1,Li2NaNF6,Li,Na,N,F,1.159,1.028,0.166,0.5970,1.331,4.155899,1
2,Li2CuAlF6,Li,Cu,Al,F,1.159,0.774,0.537,0.6555,1.331,4.132934,1
3,Li2CuPF6,Li,Cu,P,F,1.159,0.774,0.444,0.6090,1.331,4.143026,1
4,Li2CuCrF6,Li,Cu,Cr,F,1.159,0.774,0.618,0.6960,1.331,4.177749,1
...,...,...,...,...,...,...,...,...,...,...,...,...
8813,Nd2LiCuF6,Nd,Li,Cu,F,1.501,0.756,0.774,0.7650,1.331,3.562004,3
8814,Nd2NaCuF6,Nd,Na,Cu,F,1.501,1.028,0.774,0.9010,1.331,4.005409,3
8815,Eu2LiCuF6,Eu,Li,Cu,F,1.437,0.756,0.774,0.7650,1.331,3.699009,3
8816,Tm2LiCuF6,Tm,Li,Cu,F,1.352,0.756,0.774,0.7650,1.331,3.946825,3


# Generate basic physical characteristics

In [181]:
def get_base_features_A2BCX6(data: pd.DataFrame):
    result = []
    for i, row in data.iterrows():
        tmp = {}
        a = row['A']
        b = row['B']
        c = row['C']
        x = row['X']
        if a in ele_df1['symbol'].values and b in ele_df1['symbol'].values and c in ele_df1['symbol'].values and x in ele_df1['symbol'].values and a in ele_df2['symbol'].values and b in ele_df2['symbol'].values and c in ele_df2['symbol'].values and x in ele_df2['symbol'].values:
            tmp['formula'] = row['formula']
            tmp['number'] = row['number']
            tmp["A_Density"] = 2 * ele_df1[ele_df1['symbol'] == a]['Density'].values[0]
            tmp["B_Density"] = ele_df1[ele_df1['symbol'] == b]['Density'].values[0]
            tmp["C_Density"] = ele_df1[ele_df1['symbol'] == c]['Density'].values[0]
            tmp["X_Density"] = 6 * ele_df1[ele_df1['symbol'] == x]['Density'].values[0]
            tmp["A_dipole Polarizability"] = 2*ele_df1[ele_df1['symbol']==a]['dipole_polarizability'].values[0]
            tmp["B_dipole Polarizability"] = ele_df1[ele_df1['symbol']==b]['dipole_polarizability'].values[0]
            tmp["C_dipole Polarizability"] = ele_df1[ele_df1['symbol']==c]['dipole_polarizability'].values[0]
            tmp["X_dipole Polarizability"] = 6 * ele_df1[ele_df1['symbol']==x]['dipole_polarizability'].values[0]
            tmp["A_covalent Radius"] = 2*ele_df1[ele_df1['symbol']==a]['covalent_radius'].values[0]
            tmp["B_covalent Radius"] =  ele_df1[ele_df1['symbol']==b]['covalent_radius'].values[0]
            tmp["C_covalent Radius"] =  ele_df1[ele_df1['symbol']==c]['covalent_radius'].values[0]
            tmp["X_covalent Radius"] = 6*ele_df1[ele_df1['symbol']==x]['covalent_radius'].values[0]
            tmp["A_atomic Radius"] = 2*ele_df2[ele_df1['symbol']==a]['Atomic radius (Å)'].values[0]
            tmp["B_atomic Radius"] = ele_df2[ele_df1['symbol']==b]['Atomic radius (Å)'].values[0]
            if len(ele_df2[ele_df1['symbol']==c]) > 0:
                tmp["C_atomic Radius"] = ele_df2[ele_df1['symbol']==c]['Atomic radius (Å)'].values[0]
            else:
            # Handle the case when the list is empty, for example:
                tmp["C_atomic Radius"] = 0  
            #tmp["C_atomic Radius"] = ele_df2[ele_df1['symbol']==c]['Atomic radius (Å)'].values[0]
            tmp["X_atomic Radius"] = 6*ele_df2[ele_df1['symbol']==x]['Atomic radius (Å)'].values[0]
            tmp["A_FirstIonization"] = 2*ele_df1[ele_df1['symbol']==a]['FirstIonization'].values[0]
            tmp["B_FirstIonization"] = ele_df1[ele_df1['symbol']==b]['FirstIonization'].values[0]
            tmp["C_FirstIonization"] = ele_df1[ele_df1['symbol']==c]['FirstIonization'].values[0]
            tmp["X_FirstIonization"] = 6*ele_df1[ele_df1['symbol']==x]['FirstIonization'].values[0]
            tmp["A_number of Valence Electrons"] = 2*ele_df1[ele_df1['symbol']==a]['number_of_valence_electrons'].values[0]
            tmp["B_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==b]['number_of_valence_electrons'].values[0]
            tmp["C_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==c]['number_of_valence_electrons'].values[0]
            tmp["X_number of Valence Electrons"] = 6*ele_df1[ele_df1['symbol']==x]['number_of_valence_electrons'].values[0]
            tmp["A_number"] = 2*ele_df1[ele_df1['symbol']==a]['number'].values[0]
            tmp["B_number"] = ele_df1[ele_df1['symbol']==b]['number'].values[0]
            tmp["C_number"] = ele_df1[ele_df1['symbol']==c]['number'].values[0]
            tmp["X_number"] = 6*ele_df1[ele_df1['symbol']==x]['number'].values[0]
            tmp["A_Period"] = 2*ele_df1[ele_df1['symbol']==a]['Period'].values[0]
            tmp["B_Period"] = ele_df1[ele_df1['symbol']==b]['Period'].values[0]
            tmp["C_Period"] = ele_df1[ele_df1['symbol']==c]['Period'].values[0]
            tmp["X_Period"] = 6*ele_df1[ele_df1['symbol']==x]['Period'].values[0]
            tmp["A_Electronegativity"] = 2*ele_df1[ele_df1['symbol']==a]['Electronegativity'].values[0]
            tmp["B_Electronegativity"] = ele_df1[ele_df1['symbol']==b]['Electronegativity'].values[0]
            tmp["C_Electronegativity"] = ele_df1[ele_df1['symbol']==c]['Electronegativity'].values[0]
            tmp["X_Electronegativity"] = 6*ele_df1[ele_df1['symbol']==x]['Electronegativity'].values[0]
            tmp["A_number of s+p Electrons"] = 2*ele_df2[ele_df2['symbol']==a]['Number of s electrons'].values[0] + 2*ele_df2[ele_df2['symbol']==a]['Number of p electrons'].values[0]
            tmp["B_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of s electrons'].values[0] + ele_df2[ele_df2['symbol']==b]['Number of p electrons'].values[0]
            tmp["C_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==c]['Number of s electrons'].values[0] + ele_df2[ele_df2['symbol']==c]['Number of p electrons'].values[0]
            tmp["X_number of s+p Electrons"] = 6*ele_df2[ele_df2['symbol']==x]['Number of s electrons'].values[0] + 6*ele_df2[ele_df2['symbol']==x]['Number of p electrons'].values[0]
            tmp["A_number of d Electrons"] = 2 * ele_df2[ele_df2['symbol']==a]['Number of d electrons'].values[0]
            tmp["B_number of d Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of d electrons'].values[0]
            tmp["C_number of d Electrons"] = ele_df2[ele_df2['symbol']==c]['Number of d electrons'].values[0]
            tmp["X_number of d Electrons"] = 6*ele_df2[ele_df2['symbol']==x]['Number of d electrons'].values[0]
            tmp["A_Mulliken EN"] = 2* ele_df2[ele_df2['symbol']==a]['Mulliken EN'].values[0]
            tmp["B_Mulliken EN"] = ele_df2[ele_df2['symbol']==b]['Mulliken EN'].values[0]
            tmp["C_Mulliken EN"] = ele_df2[ele_df2['symbol']==c]['Mulliken EN'].values[0]
            tmp["X_Mulliken EN"] = 6*ele_df2[ele_df2['symbol']==x]['Mulliken EN'].values[0]
            result.append(tmp)
    return pd.DataFrame(result)

In [182]:
ele_df1 = pd.read_csv('./elements.csv')
ele_df2 = pd.read_csv('./elements2.csv')

In [183]:
ele_df1= ele_df1.fillna(0)
ele_df2= ele_df2.fillna(0)

In [184]:
base_features_A2BCX6 = get_base_features_A2BCX6(group)
base_features_A2BCX6

Unnamed: 0,formula,number,A_Density,B_Density,C_Density,X_Density,A_dipole Polarizability,B_dipole Polarizability,C_dipole Polarizability,X_dipole Polarizability,A_covalent Radius,B_covalent Radius,C_covalent Radius,X_covalent Radius,A_atomic Radius,B_atomic Radius,C_atomic Radius,X_atomic Radius,A_FirstIonization,B_FirstIonization,C_FirstIonization,X_FirstIonization,A_number of Valence Electrons,B_number of Valence Electrons,C_number of Valence Electrons,X_number of Valence Electrons,A_number,B_number,C_number,X_number,A_Period,B_Period,C_Period,X_Period,A_Electronegativity,B_Electronegativity,C_Electronegativity,X_Electronegativity,A_number of s+p Electrons,B_number of s+p Electrons,C_number of s+p Electrons,X_number of s+p Electrons,A_number of d Electrons,B_number of d Electrons,C_number of d Electrons,X_number of d Electrons,A_Mulliken EN,B_Mulliken EN,C_Mulliken EN,X_Mulliken EN
0,Li2NaBF6,1,1.068,0.971,2.34000,0.0102,328.225,162.7000,20.5,22.44,266,155,85,384,2.9,1.80,0.85,3.0,1040.4,495.8,800.6,10086.0,2,1,3,42,6,11,5,54,4,3,2,12,1.96,0.93,2.04,23.88,2,1,3,42,0,0,0,0,6.02,2.85,4.29,62.46
1,Li2NaNF6,1,1.068,0.971,0.00125,0.0102,328.225,162.7000,7.4,22.44,266,155,71,384,2.9,1.80,0.65,3.0,1040.4,495.8,1402.3,10086.0,2,1,5,42,6,11,7,54,4,3,2,12,1.96,0.93,3.04,23.88,2,1,5,42,0,0,0,0,6.02,2.85,7.30,62.46
2,Li2CuAlF6,1,1.068,8.960,2.70000,0.0102,328.225,46.5000,57.8,22.44,266,112,126,384,2.9,1.35,1.25,3.0,1040.4,745.5,577.5,10086.0,2,11,3,42,6,29,13,54,4,4,3,12,1.96,1.90,1.61,23.88,2,1,3,42,0,10,0,0,6.02,4.48,3.23,62.46
3,Li2CuPF6,1,1.068,8.960,1.82000,0.0102,328.225,46.5000,25.0,22.44,266,112,111,384,2.9,1.35,1.00,3.0,1040.4,745.5,1011.8,10086.0,2,11,5,42,6,29,15,54,4,4,3,12,1.96,1.90,2.19,23.88,2,1,5,42,0,10,0,0,6.02,4.48,5.62,62.46
4,Li2CuCrF6,1,1.068,8.960,7.15000,0.0102,328.225,46.5000,83.0,22.44,266,112,122,384,2.9,1.35,1.40,3.0,1040.4,745.5,652.9,10086.0,2,11,6,42,6,29,24,54,4,4,4,12,1.96,1.90,1.66,23.88,2,1,1,42,0,10,5,0,6.02,4.48,3.72,62.46
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9081,Nd2LiCuF6,3,14.020,0.534,8.96000,0.0102,416.000,164.1125,46.5,22.44,348,133,112,384,3.7,1.45,1.35,3.0,1066.2,520.2,745.5,10086.0,6,1,11,42,120,3,29,54,12,2,4,12,2.28,0.98,1.90,23.88,4,1,1,42,0,0,10,0,6.20,3.01,4.48,62.46
9082,Nd2NaCuF6,3,14.020,0.971,8.96000,0.0102,416.000,162.7000,46.5,22.44,348,155,112,384,3.7,1.80,1.35,3.0,1066.2,495.8,745.5,10086.0,6,1,11,42,120,11,29,54,12,3,4,12,2.28,0.93,1.90,23.88,4,1,1,42,0,0,10,0,6.20,2.85,4.48,62.46
9083,Eu2LiCuF6,3,10.480,0.534,8.96000,0.0102,368.000,164.1125,46.5,22.44,336,133,112,384,3.7,1.45,1.35,3.0,1094.2,520.2,745.5,10086.0,6,1,11,42,126,3,29,54,12,2,4,12,2.40,0.98,1.90,23.88,4,1,1,42,0,0,10,0,6.20,3.01,4.48,62.46
9084,Tm2LiCuF6,3,18.640,0.534,8.96000,0.0102,288.000,164.1125,46.5,22.44,328,133,112,384,3.5,1.45,1.35,3.0,1193.4,520.2,745.5,10086.0,6,1,11,42,138,3,29,54,12,2,4,12,2.50,0.98,1.90,23.88,4,1,1,42,0,0,10,0,6.20,3.01,4.48,62.46


In [185]:
def get_combined_features(data: pd.DataFrame):
    columns = data.columns
    A_features = []
    B_features = []
    C_features = []
    X_features = []
    for column in columns:
        if str(column).__contains__('A'):
            A_features.append(column)
        elif str(column).__contains__('B'):
            B_features.append(column)
        elif str(column).__contains__('C'):
            C_features.append(column)
        elif str(column).__contains__('X'):
            X_features.append(column)
    
    for a_feature, b_feature, c_feature, x_feature in zip(A_features, B_features, C_features, X_features):
        feature_name = str(a_feature).split('_')[1]
        feature_name = feature_name
        data['A-B_'+feature_name] = np.abs(data[a_feature] - data[b_feature])
        data['A-C_'+feature_name] = np.abs(data[a_feature] - data[c_feature])
        data['(A+B+C)-X_'+feature_name] = np.abs((data[a_feature] + data[b_feature]+ data[c_feature])- data[x_feature])

    #data = data.drop(columns=X_features)
    return data

features = get_combined_features(base_features_A2BCX6)
features

Unnamed: 0,formula,number,A_Density,B_Density,C_Density,X_Density,A_dipole Polarizability,B_dipole Polarizability,C_dipole Polarizability,X_dipole Polarizability,A_covalent Radius,B_covalent Radius,C_covalent Radius,X_covalent Radius,A_atomic Radius,B_atomic Radius,C_atomic Radius,X_atomic Radius,A_FirstIonization,B_FirstIonization,C_FirstIonization,X_FirstIonization,A_number of Valence Electrons,B_number of Valence Electrons,C_number of Valence Electrons,X_number of Valence Electrons,A_number,B_number,C_number,X_number,...,A-B_covalent Radius,A-C_covalent Radius,(A+B+C)-X_covalent Radius,A-B_atomic Radius,A-C_atomic Radius,(A+B+C)-X_atomic Radius,A-B_FirstIonization,A-C_FirstIonization,(A+B+C)-X_FirstIonization,A-B_number of Valence Electrons,A-C_number of Valence Electrons,(A+B+C)-X_number of Valence Electrons,A-B_number,A-C_number,(A+B+C)-X_number,A-B_Period,A-C_Period,(A+B+C)-X_Period,A-B_Electronegativity,A-C_Electronegativity,(A+B+C)-X_Electronegativity,A-B_number of s+p Electrons,A-C_number of s+p Electrons,(A+B+C)-X_number of s+p Electrons,A-B_number of d Electrons,A-C_number of d Electrons,(A+B+C)-X_number of d Electrons,A-B_Mulliken EN,A-C_Mulliken EN,(A+B+C)-X_Mulliken EN
0,Li2NaBF6,1,1.068,0.971,2.34000,0.0102,328.225,162.7000,20.5,22.44,266,155,85,384,2.9,1.80,0.85,3.0,1040.4,495.8,800.6,10086.0,2,1,3,42,6,11,5,54,...,111,181,122,1.10,2.05,2.55,544.6,239.8,7749.2,1,1,36,5,1,32,1,2,3,1.03,0.08,18.95,1,1,36,0,0,0,3.17,1.73,49.30
1,Li2NaNF6,1,1.068,0.971,0.00125,0.0102,328.225,162.7000,7.4,22.44,266,155,71,384,2.9,1.80,0.65,3.0,1040.4,495.8,1402.3,10086.0,2,1,5,42,6,11,7,54,...,111,195,108,1.10,2.25,2.35,544.6,361.9,7147.5,1,3,34,5,1,30,1,2,3,1.03,1.08,17.95,1,3,34,0,0,0,3.17,1.28,46.29
2,Li2CuAlF6,1,1.068,8.960,2.70000,0.0102,328.225,46.5000,57.8,22.44,266,112,126,384,2.9,1.35,1.25,3.0,1040.4,745.5,577.5,10086.0,2,11,3,42,6,29,13,54,...,154,140,120,1.55,1.65,2.50,294.9,462.9,7722.6,9,1,26,23,7,6,0,1,1,0.06,0.35,18.41,1,1,36,10,0,10,1.54,2.79,48.73
3,Li2CuPF6,1,1.068,8.960,1.82000,0.0102,328.225,46.5000,25.0,22.44,266,112,111,384,2.9,1.35,1.00,3.0,1040.4,745.5,1011.8,10086.0,2,11,5,42,6,29,15,54,...,154,155,105,1.55,1.90,2.25,294.9,28.6,7288.3,9,3,24,23,9,4,0,1,1,0.06,0.23,17.83,1,3,34,10,0,10,1.54,0.40,46.34
4,Li2CuCrF6,1,1.068,8.960,7.15000,0.0102,328.225,46.5000,83.0,22.44,266,112,122,384,2.9,1.35,1.40,3.0,1040.4,745.5,652.9,10086.0,2,11,6,42,6,29,24,54,...,154,144,116,1.55,1.50,2.65,294.9,387.5,7647.2,9,4,23,23,18,5,0,0,0,0.06,0.30,18.36,1,1,38,10,5,15,1.54,2.30,48.24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9081,Nd2LiCuF6,3,14.020,0.534,8.96000,0.0102,416.000,164.1125,46.5,22.44,348,133,112,384,3.7,1.45,1.35,3.0,1066.2,520.2,745.5,10086.0,6,1,11,42,120,3,29,54,...,215,236,209,2.25,2.35,3.50,546.0,320.7,7754.1,5,5,24,117,91,98,10,8,6,1.30,0.38,18.72,3,3,36,0,10,10,3.19,1.72,48.77
9082,Nd2NaCuF6,3,14.020,0.971,8.96000,0.0102,416.000,162.7000,46.5,22.44,348,155,112,384,3.7,1.80,1.35,3.0,1066.2,495.8,745.5,10086.0,6,1,11,42,120,11,29,54,...,193,236,231,1.90,2.35,3.85,570.4,320.7,7778.5,5,5,24,109,91,106,9,8,7,1.35,0.38,18.77,3,3,36,0,10,10,3.35,1.72,48.93
9083,Eu2LiCuF6,3,10.480,0.534,8.96000,0.0102,368.000,164.1125,46.5,22.44,336,133,112,384,3.7,1.45,1.35,3.0,1094.2,520.2,745.5,10086.0,6,1,11,42,126,3,29,54,...,203,224,197,2.25,2.35,3.50,574.0,348.7,7726.1,5,5,24,123,97,104,10,8,6,1.42,0.50,18.60,3,3,36,0,10,10,3.19,1.72,48.77
9084,Tm2LiCuF6,3,18.640,0.534,8.96000,0.0102,288.000,164.1125,46.5,22.44,328,133,112,384,3.5,1.45,1.35,3.0,1193.4,520.2,745.5,10086.0,6,1,11,42,138,3,29,54,...,195,216,189,2.05,2.15,3.30,673.2,447.9,7626.9,5,5,24,135,109,116,10,8,6,1.52,0.60,18.50,3,3,36,0,10,10,3.19,1.72,48.77


In [186]:
features.to_csv('./data/data_tf.csv',index = False)