In [90]:
# import dependencies
import pandas as pd

In [91]:
# Import base Fe dataset
fe_compounds_df = pd.read_csv('Resources/Fe_binary_dataframe.csv', index_col=0)
fe_compounds_df.head()

Unnamed: 0,mp_id,Formula,Composition,IPF,Density,Elastic_Tensor,E_above_Hull
0,mp-1005,FeP,"{'Fe': 1.0, 'P': 1.0}",0.145502,6.371739,"{'G_Reuss': 130.0, 'G_VRH': 133.0, 'G_Voigt': ...",0.0
1,mp-1009077,FeH,"{'Fe': 1.0, 'H': 1.0}",0.195137,7.098567,"{'G_Reuss': 91.0, 'G_VRH': 91.0, 'G_Voigt': 91...",0.0
2,mp-1079437,FeB4,"{'Fe': 1.0, 'B': 4.0}",0.104389,4.580343,,0.0
3,mp-1080525,FeB,"{'Fe': 1.0, 'B': 1.0}",0.179459,6.887697,,0.0
4,mp-1095443,ScFe2,"{'Sc': 1.0, 'Fe': 2.0}",0.189488,6.089663,,0.0


In [92]:
# Extract composition column to be cleaned
composition_list = fe_compounds_df["Composition"]
composition_list

0                                   {'Fe': 1.0, 'P': 1.0}
1                                   {'Fe': 1.0, 'H': 1.0}
2                                   {'Fe': 1.0, 'B': 4.0}
3                                   {'Fe': 1.0, 'B': 1.0}
4                                  {'Sc': 1.0, 'Fe': 2.0}
                              ...                        
1132    {'Na': 5.0, 'Li': 1.0, 'Fe': 2.0, 'P': 2.0, 'C...
1133    {'Na': 2.0, 'Fe': 1.0, 'As': 1.0, 'C': 1.0, 'O...
1134    {'Li': 6.0, 'Mn': 5.0, 'Fe': 1.0, 'B': 6.0, 'O...
1135    {'Li': 12.0, 'Mn': 11.0, 'Fe': 1.0, 'P': 12.0,...
1136    {'Li': 8.0, 'Mn': 1.0, 'Fe': 7.0, 'P': 8.0, 'O...
Name: Composition, Length: 1137, dtype: object

In [93]:
# Check successful extraction of composition data
composition_list[0]

"{'Fe': 1.0, 'P': 1.0}"

In [94]:
# create dataframe with every column being a element
element_columns = ['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg',
            'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr',
            'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br',
            'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag',
            'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 
            'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W',
            'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn',
            'Fr', 'Ra', 
            'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 
            'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr']
composition_df = pd.DataFrame(columns=element_columns)
composition_df

Unnamed: 0,H,He,Li,Be,B,C,N,O,F,Ne,...,Pu,Am,Cm,Bk,Cf,Es,Fm,Md,No,Lr


In [95]:
# Check use of eval to convert string to dictionary
# Check successful conversion to dictionary
composition_list_dict = eval(composition_list[0])
composition_list_dict['P']

1.0

In [96]:
# loop over all data and apply eval to every row
for i in range(len(composition_list)):
    composition_list_dict[i] = eval(composition_list[i])

In [97]:
# check successful conversion
composition_list_dict[1136]

{'Li': 8.0, 'Mn': 1.0, 'Fe': 7.0, 'P': 8.0, 'O': 32.0}

In [99]:
# for each row, append new dictionary into dataframe structure
for i in range(1137):
    composition_df = composition_df.append(composition_list_dict[i],ignore_index=True)

    
composition_df


Unnamed: 0,H,He,Li,Be,B,C,N,O,F,Ne,...,Pu,Am,Cm,Bk,Cf,Es,Fm,Md,No,Lr
0,,,,,,,,,,,...,,,,,,,,,,
1,1.0,,,,,,,,,,...,,,,,,,,,,
2,,,,,4.0,,,,,,...,,,,,,,,,,
3,,,,,1.0,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1132,,,1.0,,,2.0,,14.0,,,...,,,,,,,,,,
1133,,,,,,1.0,,7.0,,,...,,,,,,,,,,
1134,,,6.0,,6.0,,,18.0,,,...,,,,,,,,,,
1135,,,12.0,,,,,48.0,,,...,,,,,,,,,,


In [100]:
# Fill NaN with 0
composition_df = composition_df.fillna(0)
composition_df


Unnamed: 0,H,He,Li,Be,B,C,N,O,F,Ne,...,Pu,Am,Cm,Bk,Cf,Es,Fm,Md,No,Lr
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1132,0.0,0.0,1.0,0.0,0.0,2.0,0.0,14.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1133,0.0,0.0,0.0,0.0,0.0,1.0,0.0,7.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1134,0.0,0.0,6.0,0.0,6.0,0.0,0.0,18.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1135,0.0,0.0,12.0,0.0,0.0,0.0,0.0,48.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [101]:
# Pull list for mp_ids to add to final dataframe
mp_id_df = fe_compounds_df['mp_id']
mp_id_df

0          mp-1005
1       mp-1009077
2       mp-1079437
3       mp-1080525
4       mp-1095443
           ...    
1132     mp-768938
1133     mp-772415
1134     mp-774370
1135     mp-775195
1136     mp-849430
Name: mp_id, Length: 1137, dtype: object

In [102]:
# Add mp_id to the composition datagframe
composition_df["mp_id"] = mp_id_df
    
composition_df

Unnamed: 0,H,He,Li,Be,B,C,N,O,F,Ne,...,Am,Cm,Bk,Cf,Es,Fm,Md,No,Lr,mp_id
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,mp-1005
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,mp-1009077
2,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,mp-1079437
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,mp-1080525
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,mp-1095443
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1132,0.0,0.0,1.0,0.0,0.0,2.0,0.0,14.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,mp-768938
1133,0.0,0.0,0.0,0.0,0.0,1.0,0.0,7.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,mp-772415
1134,0.0,0.0,6.0,0.0,6.0,0.0,0.0,18.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,mp-774370
1135,0.0,0.0,12.0,0.0,0.0,0.0,0.0,48.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,mp-775195


In [103]:
# new column order
reorder_columns = ['mp_id', 'H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg',
            'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr',
            'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br',
            'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag',
            'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 
            'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W',
            'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn',
            'Fr', 'Ra', 
            'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 
            'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr']

In [104]:
# recorder columns
composition_df = composition_df[reorder_columns]
composition_df

Unnamed: 0,mp_id,H,He,Li,Be,B,C,N,O,F,...,Pu,Am,Cm,Bk,Cf,Es,Fm,Md,No,Lr
0,mp-1005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,mp-1009077,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,mp-1079437,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,mp-1080525,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,mp-1095443,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1132,mp-768938,0.0,0.0,1.0,0.0,0.0,2.0,0.0,14.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1133,mp-772415,0.0,0.0,0.0,0.0,0.0,1.0,0.0,7.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1134,mp-774370,0.0,0.0,6.0,0.0,6.0,0.0,0.0,18.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1135,mp-775195,0.0,0.0,12.0,0.0,0.0,0.0,0.0,48.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [105]:
# write to csv for database integration
composition_df.to_csv(r'Resources/Fe_compounds_composition.csv', index = False)