In [61]:
# Imports
import phonopy
from monty.serialization import loadfn
import pandas as pd
import numpy as np
import re
import gc
import scipy.constants
import math
import os

Read the database lookup file

In [62]:
# Create a dataframe
df = loadfn(r"phonon_db_lookup_df.json")
df.head()

Unnamed: 0,material_id,elements,formula_pretty_reduced,formula,num_elements,chemsys,spacegroup_symbol,spacegroup_number,FORCE_SETS_filepath,phonon_filepath
0,mp-1000,"[Te, Ba]",BaTe,Ba4 Te4,2,Ba-Te,Fm-3m,225,phonon_db/mp-1000-20180417/FORCE_SETS,phonon_db/mp-1000-20180417/phonon.yaml
1,mp-10009,"[Ga, Te]",GaTe,Ga4 Te4,2,Ga-Te,P6_3/mmc,194,phonon_db/mp-10009-20180417/FORCE_SETS,phonon_db/mp-10009-20180417/phonon.yaml
2,mp-10070,"[O, P, Ag, Ba]",BaAg(PO3)3,Ba4 Ag4 P12 O36,4,Ag-Ba-O-P,P2_12_12_1,19,phonon_db/mp-10070-20180417/FORCE_SETS,phonon_db/mp-10070-20180417/phonon.yaml
3,mp-10074,"[Ge, Se]",GeSe2,Ge4 Se8,2,Ge-Se,I-42d,122,phonon_db/mp-10074-20180417/FORCE_SETS,phonon_db/mp-10074-20180417/phonon.yaml
4,mp-10080,"[B, O, Ge, Pr]",PrGeBO5,Pr3 Ge3 B3 O15,4,B-Ge-O-Pr,P3_1,144,phonon_db/mp-10080-20180417/FORCE_SETS,phonon_db/mp-10080-20180417/phonon.yaml


Get the species with Li element

In [63]:
def element_filtered_dataframe(df, element):
    """A function which filters a dataframe for a particular element and returns a new dataframe with only entries containing that element.
    
    Input:
        df (pd.DataFrame): The original dataframe which describes the database
        element (str): The element which you want to query the dataframe for.
    Output:
        filtered_df (pd.DataFrame): A new dataframe with only entries which contain the element of interest"""
    
    return df.loc[df['chemsys'].str.contains(element)].reset_index(drop=True)

Li_df = element_filtered_dataframe(df,'Li')
print(f'Of the {df.shape[0]} materials in this dataset, {Li_df.shape[0]} contain lithium \n')

Li_df.head()

Of the 10034 materials in this dataset, 873 contain lithium 



Unnamed: 0,material_id,elements,formula_pretty_reduced,formula,num_elements,chemsys,spacegroup_symbol,spacegroup_number,FORCE_SETS_filepath,phonon_filepath
0,mp-10103,"[Li, F, Al, Yb]",LiYbAlF6,Li2 Yb2 Al2 F12,4,Al-F-Li-Yb,P-31c,163,phonon_db/mp-10103-20180417/FORCE_SETS,phonon_db/mp-10103-20180417/phonon.yaml
1,mp-10182,"[Li, P, Zn]",LiZnP,Li4 Zn4 P4,3,Li-P-Zn,F-43m,216,phonon_db/mp-10182-20180417/FORCE_SETS,phonon_db/mp-10182-20180417/phonon.yaml
2,mp-10250,"[Li, F, Ba]",BaLiF3,Ba1 Li1 F3,3,Ba-F-Li,Pm-3m,221,phonon_db/mp-10250-20180417/FORCE_SETS,phonon_db/mp-10250-20180417/phonon.yaml
3,mp-10251,"[Li, O, Dy]",LiDyO2,Li4 Dy4 O8,3,Dy-Li-O,P2_1/c,14,phonon_db/mp-10251-20180417/FORCE_SETS,phonon_db/mp-10251-20180417/phonon.yaml
4,mp-10485,"[Li, Sb, Ba]",BaLiSb,Ba2 Li2 Sb2,3,Ba-Li-Sb,P6_3/mmc,194,phonon_db/mp-10485-20180417/FORCE_SETS,phonon_db/mp-10485-20180417/phonon.yaml


An extra variable 'formula' for reading Li pdos later in this notebook

In [64]:
formula = Li_df['formula'].str.split(" ", -1, expand=True)

The folder path is required for accessing the dat file
Example here: the dat folder contains all dos and pdos files for 873 species contain lithium in dat file format

In [83]:
folder_path = "C:/Users/Amelia/Desktop/UROP/dat/"

In [66]:
def loading (index):
    '''loading file from the database'''
    phonon_filepath = Li_df['phonon_filepath'][index]
    print(phonon_filepath)
    force_sets_filepath = Li_df['FORCE_SETS_filepath'][index]
    print(force_sets_filepath)

    ph = phonopy.load(phonon_filepath, force_sets_filename=force_sets_filepath, log_level=1)
    
    return ph

In [67]:
def separate_text (string):
    '''Separate string e.g. Li2 into Li and 2'''
    match = re.match(r"([a-z]+)([0-9]+)", string , re.I)
    if match:
        res = match.groups()
        
    return res

In [68]:
def find_Li_entry (index):
    '''Return a list of entries for Li atom pdos in dat file'''
    temp = {}
    shape = np.shape(formula)
    length = shape[1]
    for i in [*range (0, length)]:
        if formula.loc[index,i] != None:
            t = formula.loc[index,i]
            res = separate_text(t)
            temp[res[0]] = int(res[1])

    keys = [*temp.keys()]

    if keys[0] == 'Li':
        no_of_entry = temp['Li']
        entry = [*range(1,no_of_entry+1)]

    elif keys[0] != 'Li':
        Li_index = keys.index('Li')
        index_list = [*range(0,Li_index)]
        start = 0
        for i in index_list:
            start += temp[keys[i]]
        start += 1
        no_of_entry = temp['Li']
        entry = [*range(start, start + no_of_entry)]

    return entry

In [69]:
def find_unmatch ():
    '''not necessary, only if pdos file has less atom data than actual formula'''
    
    all_index = [*range (0, Li_df.index[-1]+1)] 
    unmatch = []
    
    for index in all_index:
        temp = {}
        shape = np.shape(formula)
        length = shape[1]
        for i in [*range (0, length)]:
            if formula.loc[index,i] != None:
                t = formula.loc[index,i]
                res = separate_text(t)
                temp[res[0]] = int(res[1])

        keys = [*temp.keys()]

        total_entry = 0
        for i in keys:
            total_entry += temp[i]

        total_entry += 1 #inclue the frequency row
        
        file = np. loadtxt(os.path.join(folder_path, "projected_dos_{0}.dat".format(index)),unpack = True)
        shape = np.shape(file)
        file_entry = shape[0]
        
        if total_entry != file_entry:
            unmatch.append(index)
            
        return unmatch

In [70]:
def find_total_no_atoms ():
    '''find total no of atoms in a unit cell'''

    all_index = [*range (0, Li_df.index[-1]+1)]
    total_no_atoms = {}
    for index in all_index:
        temp = {}
        shape = np.shape(formula)
        length = shape[1]
        for i in [*range (0, length)]:
            if formula.loc[index,i] != None:
                t = formula.loc[index,i]
                res = separate_text(t)
                temp[res[0]] = int(res[1])

        keys = [*temp.keys()]

        total_entry = 0
        for i in keys:
            total_entry += temp[i]

        total_no_atoms[index] = total_entry

    return total_no_atoms

In [71]:
def find_most_atom_species ():
    '''find the index of the species that has the largest number of atoms in a unit cell'''

    result = find_total_no_atoms()
    species = list(result.keys())[list(result.values()).index(max(result.values()))]

    return species

In [72]:
def get_total_dos(index):
    '''Get total DOS values in a list'''
    file = np. loadtxt(os.path.join(folder_path, "total_dos_{0}.dat".format(index)),unpack = True)
    total_dos = file[1]
    
    return total_dos

In [73]:
def get_total_f(index):
    '''Get total frequency values in a list'''
    file = np. loadtxt(os.path.join(folder_path, "total_dos_{0}.dat".format(index)),unpack = True)
    total_f = file[0]
    
    return total_f

In [74]:
def get_partial_dos (index):
    '''Get partial DOS values in a numpy array'''
    #entry should be a list of numbers corresponding to the columns of the species of interest
    file = np. loadtxt(os.path.join(folder_path, "projected_dos_{0}.dat".format(index)),unpack = True)
    entry = find_Li_entry (index)
    partial_dos = 0
    for i in entry:
        partial_dos += file [i]
    
    return partial_dos

In [75]:
def get_partial_f (index):
    '''Get partial DOS frequency in a numpy array'''
    #entry should be a list of numbers corresponding to the columns of the species of interest
    file = np. loadtxt(os.path.join(folder_path, "projected_dos_{0}.dat".format(index)),unpack = True)
    partial_f = file[0]
    
    return partial_f

In [76]:
def run_all_dos( mesh = 50):
    '''takes around 2 hrs for 800+ data, the larger the mesh, the better the quality of dos data'''

    global fail_total
    global fail_projected

    fail_total = []
    fail_projected = []

    index = [*range (0, Li_df.index[-1]+1)]
    for i in index:
        
        ph = loading(i)
        ph.run_mesh(mesh, with_eigenvectors=True, is_mesh_symmetry=False)

        filename_1 = os.path.join(folder_path, "total_dos_{0}.dat".format(index))
        filename_2 = os.path.join(folder_path, "projected_dos_{0}.dat".format(index))

        try:
            ph.run_total_dos()
            ph.write_total_dos(filename = filename_1)
        except MemoryError as e:
            fail_total.append(i)
            pass

        try:
            ph.run_projected_dos()
            ph.write_projected_dos(filename = filename_2)
        except MemoryError as e:
            fail_projected.append(i)
            pass

        print(i)

        del ph
        gc.collect()
        
    return

In [17]:
#run_all_dos(mesh = 50)
#default is mesh = 50 which is low enough to run by regular computer, mesh can take either integer or list of three values
#phonopy default is 100 or [20, 20, 20]

Prepare a dataframe for output

In [77]:
data = Li_df.copy()
data = data.drop('FORCE_SETS_filepath', axis=1)
data = data.drop('phonon_filepath', axis=1)
data

Unnamed: 0,material_id,elements,formula_pretty_reduced,formula,num_elements,chemsys,spacegroup_symbol,spacegroup_number
0,mp-10103,"[Li, F, Al, Yb]",LiYbAlF6,Li2 Yb2 Al2 F12,4,Al-F-Li-Yb,P-31c,163
1,mp-10182,"[Li, P, Zn]",LiZnP,Li4 Zn4 P4,3,Li-P-Zn,F-43m,216
2,mp-10250,"[Li, F, Ba]",BaLiF3,Ba1 Li1 F3,3,Ba-F-Li,Pm-3m,221
3,mp-10251,"[Li, O, Dy]",LiDyO2,Li4 Dy4 O8,3,Dy-Li-O,P2_1/c,14
4,mp-10485,"[Li, Sb, Ba]",BaLiSb,Ba2 Li2 Sb2,3,Ba-Li-Sb,P6_3/mmc,194
...,...,...,...,...,...,...,...,...
868,mp-989579,"[Rb, Li, Tl, Cl]",Rb2LiTlCl6,Rb8 Li4 Tl4 Cl24,4,Cl-Li-Rb-Tl,Fm-3m,225
869,mp-989583,"[Rb, Li, In, Cl]",Rb2LiInCl6,Rb8 Li4 In4 Cl24,4,Cl-In-Li-Rb,Fm-3m,225
870,mp-9912,"[Li, P, Ce]",Li2CeP2,Li2 Ce1 P2,3,Ce-Li-P,P-3m1,164
871,mp-9915,"[Li, Be, P]",LiBeP,Li2 Be2 P2,3,Be-Li-P,P4/nmm,129


In [78]:
def get_all_data (index):
    '''output four lists of data based on index input'''
    
    total_dos = get_total_dos(index)
    total_f = get_total_f(index)
    projected_dos = get_partial_dos (index)
    projected_f = get_partial_f (index)
    
    return total_dos,total_f,projected_dos,projected_f

In [79]:
def get_fav (index):
    '''Get a temperature independent phonon band centre value'''
    
    Li_f = get_partial_f(index)
    Li_d = get_partial_dos(index)
    
    fav_elements = Li_f * Li_d
    fav = np.sum(fav_elements)
    norm = np.sum(Li_d)
    fav = fav / norm
    return fav

In [80]:
k = scipy.constants.k
hbar = scipy.constants.hbar

def get_favT (index, Tmin, Tmax, Tstep):
    '''Get a list of temperature dependent phonon band centre values or a single value at given T values'''
    favT = []

    if ((Tmin != None) and (Tmax == None) and (Tstep == None)):
        Temperature = [Tmin]
    elif ((Tmin != None) and (Tmax != None) and (Tstep != None)):
        Temperature = [*range(Tmin, Tmax, Tstep)]
    else:
        print("Tmin is expected as a value, Tmax and Tstep could be None")
        Temperature = None

    Li_f = get_partial_f(index)
    Li_d = get_partial_dos(index)

    for T in Temperature:
        ph_numbers = []
        beta = 1/(k*T)
        for frequency in Li_f:
            if frequency < 0:
                ph_numbers.append(1)

            else:
                energy = frequency * hbar * 10 ** 12
                denominator = math.exp(energy*beta) - 1
                ph_number = 1/denominator
                ph_numbers.append(ph_number)


        fav_elements = Li_f * Li_d * ph_numbers
        norm_elements = Li_d * ph_numbers

        fav = np.sum(fav_elements)
        norm = np.sum(norm_elements)
        fav = fav / norm
        favT.append(fav)

    return favT

Add an empty column for temperature independent phonon band centre

In [81]:
data['fav'] = np.nan
data

Unnamed: 0,material_id,elements,formula_pretty_reduced,formula,num_elements,chemsys,spacegroup_symbol,spacegroup_number,fav
0,mp-10103,"[Li, F, Al, Yb]",LiYbAlF6,Li2 Yb2 Al2 F12,4,Al-F-Li-Yb,P-31c,163,
1,mp-10182,"[Li, P, Zn]",LiZnP,Li4 Zn4 P4,3,Li-P-Zn,F-43m,216,
2,mp-10250,"[Li, F, Ba]",BaLiF3,Ba1 Li1 F3,3,Ba-F-Li,Pm-3m,221,
3,mp-10251,"[Li, O, Dy]",LiDyO2,Li4 Dy4 O8,3,Dy-Li-O,P2_1/c,14,
4,mp-10485,"[Li, Sb, Ba]",BaLiSb,Ba2 Li2 Sb2,3,Ba-Li-Sb,P6_3/mmc,194,
...,...,...,...,...,...,...,...,...,...
868,mp-989579,"[Rb, Li, Tl, Cl]",Rb2LiTlCl6,Rb8 Li4 Tl4 Cl24,4,Cl-Li-Rb-Tl,Fm-3m,225,
869,mp-989583,"[Rb, Li, In, Cl]",Rb2LiInCl6,Rb8 Li4 In4 Cl24,4,Cl-In-Li-Rb,Fm-3m,225,
870,mp-9912,"[Li, P, Ce]",Li2CeP2,Li2 Ce1 P2,3,Ce-Li-P,P-3m1,164,
871,mp-9915,"[Li, Be, P]",LiBeP,Li2 Be2 P2,3,Be-Li-P,P4/nmm,129,


Get temperature independent phonon band centers for all the species

In [84]:
index = [*range (0, data.index[-1]+1)]
for i in index:
    fav = get_fav(i)
    data.at[i,'fav'] = fav

Get temperature dependent phonon band center (room temperature) for all the species

In [86]:
Tmin = 298
Tmax = None
Tstep = None

if ((Tmin != None) and (Tmax == None) and (Tstep == None)):
    Temperature = [Tmin]
elif ((Tmin != None) and (Tmax != None) and (Tstep != None)):
    Temperature = [*range(Tmin, Tmax, Tstep)]
else:
    print("Tmin is expected as a value, Tmax and Tstep could be None")
    Temperature = None


index = [*range (0, data.index[-1]+1)]
T_val = pd.DataFrame(columns = Temperature)
for i in index:
    favT = get_favT(i, Tmin, Tmax, Tstep)
    T_val.loc[i] = favT

data = pd.concat([data, T_val], axis = 1)

Get temperature dependent phonon band center in a temperature range for all the species, example here using 10 to 1000k with a step of 10k

In [87]:
Tmin = 10
Tmax = 1000
Tstep = 10

if ((Tmin != None) and (Tmax == None) and (Tstep == None)):
    Temperature = [Tmin]
elif ((Tmin != None) and (Tmax != None) and (Tstep != None)):
    Temperature = [*range(Tmin, Tmax, Tstep)]
else:
    print("Tmin is expected as a value, Tmax and Tstep could be None")
    Temperature = None


index = [*range (0, data.index[-1]+1)]
T_val = pd.DataFrame(columns = Temperature)
for i in index:
    favT = get_favT(i, Tmin, Tmax, Tstep)
    T_val.loc[i] = favT

data = pd.concat([data, T_val], axis = 1)


In [35]:
data

Unnamed: 0,material_id,elements,formula_pretty_reduced,formula,num_elements,chemsys,spacegroup_symbol,spacegroup_number,fav,298,...,900,910,920,930,940,950,960,970,980,990
0,mp-10103,"[Li, F, Al, Yb]",LiYbAlF6,Li2 Yb2 Al2 F12,4,Al-F-Li-Yb,P-31c,163,11.477275,10.187487,...,10.294750,10.295315,10.295867,10.296407,10.296935,10.297452,10.297958,10.298454,10.298939,10.299414
1,mp-10182,"[Li, P, Zn]",LiZnP,Li4 Zn4 P4,3,Li-P-Zn,F-43m,216,10.767147,9.834405,...,9.909935,9.910330,9.910717,9.911096,9.911466,9.911828,9.912183,9.912530,9.912870,9.913203
2,mp-10250,"[Li, F, Ba]",BaLiF3,Ba1 Li1 F3,3,Ba-F-Li,Pm-3m,221,7.149976,6.155540,...,6.206074,6.206345,6.206610,6.206870,6.207124,6.207372,6.207615,6.207853,6.208087,6.208315
3,mp-10251,"[Li, O, Dy]",LiDyO2,Li4 Dy4 O8,3,Dy-Li-O,P2_1/c,14,11.573993,8.612673,...,8.822861,8.823985,8.825086,8.826162,8.827215,8.828246,8.829255,8.830243,8.831211,8.832159
4,mp-10485,"[Li, Sb, Ba]",BaLiSb,Ba2 Li2 Sb2,3,Ba-Li-Sb,P6_3/mmc,194,2.543602,2.224327,...,2.230293,2.230325,2.230357,2.230388,2.230418,2.230448,2.230477,2.230505,2.230533,2.230561
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
868,mp-989579,"[Rb, Li, Tl, Cl]",Rb2LiTlCl6,Rb8 Li4 Tl4 Cl24,4,Cl-Li-Rb-Tl,Fm-3m,225,6.337484,5.903504,...,5.925654,5.925772,5.925886,5.925998,5.926108,5.926215,5.926320,5.926423,5.926523,5.926622
869,mp-989583,"[Rb, Li, In, Cl]",Rb2LiInCl6,Rb8 Li4 In4 Cl24,4,Cl-In-Li-Rb,Fm-3m,225,6.227338,5.675781,...,5.702292,5.702432,5.702569,5.702704,5.702835,5.702964,5.703089,5.703213,5.703333,5.703451
870,mp-9912,"[Li, P, Ce]",Li2CeP2,Li2 Ce1 P2,3,Ce-Li-P,P-3m1,164,9.743440,9.099472,...,9.147794,9.148047,9.148295,9.148538,9.148775,9.149008,9.149235,9.149458,9.149676,9.149890
871,mp-9915,"[Li, Be, P]",LiBeP,Li2 Be2 P2,3,Be-Li-P,P4/nmm,129,8.642599,8.046354,...,8.085366,8.085574,8.085777,8.085976,8.086170,8.086361,8.086547,8.086730,8.086909,8.087084


**First frequency that has non-zero Li DOS**

(i) ignore imaginary

In [89]:
def first_f_i (index):
    '''get first frequency that has non-zero Li DOS and non-imaginary '''
    Li_f = get_partial_f(index)
    Li_d = get_partial_dos(index)
    df = pd.DataFrame({'Li_f': Li_f, 'Li_d': Li_d})
    df = df[(df['Li_d'] > 0)&(df['Li_f']> 0)]
    df = df.reset_index()
    f = df.loc[0,'Li_f']
    return f

Add an empty column for output

In [90]:
data['first_f_i'] = np.nan

Write first frequency data (ignore imaginary)

In [91]:
index = [*range (0, data.index[-1]+1)]
for i in index:
    f = first_f_i(i)
    data.at[i,'first_f_i'] = f

(ii) take the absolute value

In [92]:
def first_f_a (index):
    '''get first frequency (absolute) that has non-zero Li DOS'''
    Li_f = get_partial_f(index)
    Li_d = get_partial_dos(index)
    Li_f = abs(Li_f)
    key = np.nonzero(Li_d)[0][0]
    f = Li_f[key]
    return f

Add an empty column for output

In [93]:
data['first_f_a'] = np.nan

Write first frequency data (Take absolute values for imaginary frequencies)

In [94]:
index = [*range (0, data.index[-1]+1)]
for i in index:
    f = first_f_a(i)
    data.at[i,'first_f_a'] = f

Li DOS width relative to the total width

In [95]:
def relative_w (index):
    '''get Li DOS width relative to the total width'''

    total_f = get_total_f(index)
    total_d = get_total_dos(index)
    Li_f = get_partial_f(index)
    Li_d = get_partial_dos(index)

    df_1 = pd.DataFrame({'total_f': total_f, 'total_d': total_d})
    f_1 = df_1.loc[0,'total_f']
    f_2 = df_1.iloc[-1].loc['total_f']
    total_f_d = f_2 - f_1

    df_2 = pd.DataFrame({'Li_f': Li_f, 'Li_d': Li_d})
    f_1 = df_2.loc[0,'Li_f']
    f_2 = df_2.iloc[-1].loc['Li_f']
    Li_f_d = f_2 - f_1

    relative_w = Li_f_d / total_f_d

    return relative_w

Add an empty column for output

In [96]:
data['relative_w'] = np.nan

Write relative width

In [97]:
index = [*range (0, data.index[-1]+1)]
for i in index:
    w = relative_w(i)
    data['relative_w'][i] = w

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['relative_w'][i] = w


In [98]:
data

Unnamed: 0,material_id,elements,formula_pretty_reduced,formula,num_elements,chemsys,spacegroup_symbol,spacegroup_number,fav,298,...,930,940,950,960,970,980,990,first_f_i,first_f_a,relative_w
0,mp-10103,"[Li, F, Al, Yb]",LiYbAlF6,Li2 Yb2 Al2 F12,4,Al-F-Li-Yb,P-31c,163,11.477275,10.187487,...,10.296407,10.296935,10.297452,10.297958,10.298454,10.298939,10.299414,0.042794,0.042794,1.0
1,mp-10182,"[Li, P, Zn]",LiZnP,Li4 Zn4 P4,3,Li-P-Zn,F-43m,216,10.767147,9.834405,...,9.911096,9.911466,9.911828,9.912183,9.912530,9.912870,9.913203,0.025242,0.025242,1.0
2,mp-10250,"[Li, F, Ba]",BaLiF3,Ba1 Li1 F3,3,Ba-F-Li,Pm-3m,221,7.149976,6.155540,...,6.206870,6.207124,6.207372,6.207615,6.207853,6.208087,6.208315,0.030784,0.030784,1.0
3,mp-10251,"[Li, O, Dy]",LiDyO2,Li4 Dy4 O8,3,Dy-Li-O,P2_1/c,14,11.573993,8.612673,...,8.826162,8.827215,8.828246,8.829255,8.830243,8.831211,8.832159,0.084645,0.678670,1.0
4,mp-10485,"[Li, Sb, Ba]",BaLiSb,Ba2 Li2 Sb2,3,Ba-Li-Sb,P6_3/mmc,194,2.543602,2.224327,...,2.230388,2.230418,2.230448,2.230477,2.230505,2.230533,2.230561,0.020693,0.020693,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
868,mp-989579,"[Rb, Li, Tl, Cl]",Rb2LiTlCl6,Rb8 Li4 Tl4 Cl24,4,Cl-Li-Rb-Tl,Fm-3m,225,6.337484,5.903504,...,5.925998,5.926108,5.926215,5.926320,5.926423,5.926523,5.926622,0.027749,1.153376,1.0
869,mp-989583,"[Rb, Li, In, Cl]",Rb2LiInCl6,Rb8 Li4 In4 Cl24,4,Cl-In-Li-Rb,Fm-3m,225,6.227338,5.675781,...,5.702704,5.702835,5.702964,5.703089,5.703213,5.703333,5.703451,0.000556,0.735833,1.0
870,mp-9912,"[Li, P, Ce]",Li2CeP2,Li2 Ce1 P2,3,Ce-Li-P,P-3m1,164,9.743440,9.099472,...,9.148538,9.148775,9.149008,9.149235,9.149458,9.149676,9.149890,0.024268,0.024268,1.0
871,mp-9915,"[Li, Be, P]",LiBeP,Li2 Be2 P2,3,Be-Li-P,P4/nmm,129,8.642599,8.046354,...,8.085976,8.086170,8.086361,8.086547,8.086730,8.086909,8.087084,0.038148,0.038148,1.0


Write the result dataframe into a csv file

In [99]:
data.to_csv('data_big.csv')