In [1]:
from pymatgen.core.composition import *
import numpy as np
import pandas as pd
import ase.db # https://wiki.fysik.dtu.dk/ase/ase/db/db.html
import json
import re

# Reading Features - Atomic properties for elements in the periodic table.
### These properties were calculated using Density Functional Theory. For more informations see :

    Exploring Two-Dimensional Materials Thermodynamic Stability via Machine Learning
    Gabriel R. Schleder*, Carlos Mera Acosta, and Adalberto Fazzio
    ACS Appl. Mater. Interfaces 2020, 12, 18, 20149–20157.

In [2]:
df_atoms = pd.read_csv('Schleder2019_AtomicTable.csv')
#df_atoms


# Reading Database - This is a database of two-dimensional materials (C2DB)
### The majority of materials in this database are theoretical predictions. For more informations see :



    1) The Computational 2D Materials Database: High-Throughput Modeling and Discovery of Atomically Thin Crystals. 2D Materials 5, 042002 (2018).
    

    2) Recent Progress of the Computational 2D Materials Database (C2DB). 2D Materials 8, 044002 (2021).

In [11]:
# All possible properties included in the database

keys=['folder', 'uid', 'plasmafrequency_x', 'plasmafrequency_y', 'asr_id', 'cell_area', 
      'has_inversion_symmetry', 'stoichiometry', 'spacegroup', 'spgnum', 'pointgroup', 
      'crystal_type', 'dos_at_ef_nosoc', 'dos_at_ef_soc', 'alphax_el', 'alphay_el', 'alphaz_el', 
      'ehull', 'hform', 'thermodynamic_stability_level', 'spin_axis', 'E_x', 'E_y', 'E_z', 'dE_zx', 
      'dE_zy', 'dim_primary', 'dim_primary_score', 'dim_nclusters_0D', 'dim_nclusters_1D', 'dim_nclusters_2D', 
      'dim_nclusters_3D', 'dim_threshold_0D', 'dim_threshold_1D', 'dim_threshold_2D', 'dim_score_0D', 
      'dim_score_1D', 'dim_score_2D', 'dim_score_3D', 'dim_score_01D', 'dim_score_02D', 'dim_score_03D', 
      'dim_score_12D', 'dim_score_13D', 'dim_score_23D', 'dim_score_012D', 'dim_score_013D', 'dim_score_023D', 
      'dim_score_123D', 'dim_score_0123D', 'first_class_material', 'minhessianeig', 'dynamic_stability_phonons', 
      'speed_of_sound_x', 'speed_of_sound_y', 'dynamic_stability_stiffness', 'c_11', 'c_12', 'c_13', 
      'c_21', 'c_22', 'c_23', 'c_31', 'c_32', 'c_33', 'magstate', 'is_magnetic', 'nspins', 'evac', 
      'evacdiff', 'dipz', 'efermi', 'gap', 'vbm', 'cbm', 'gap_dir', 'gap_dir_nosoc', 'gap_nosoc', 
      'workfunction', 'emass_vb_dir1', 'emass_vb_dir2', 'emass_cb_dir1', 'emass_cb_dir2', 
      'has_asr_hse_calculate', 'has_asr_plasmafrequency', 'has_asr_phonons_calculate', 
      'has_asr_database_material_fingerprint', 'has_asr_structureinfo', 'has_asr_pdos', 
      'has_asr_setup_strains', 'has_asr_polarizability', 'has_asr_convex_hull', 'has_asr_magnetic_anisotropy', 
      'has_asr_dimensionality', 'has_asr_setinfo', 'has_asr_phonons', 'has_asr_bader', 
      'has_asr_stiffness', 'has_asr_magstate', 'has_asr_gs_calculate', 'has_asr_gs', 
      'has_asr_bandstructure_calculate', 'has_asr_bandstructure', 'has_asr_projected_bandstructure', 
      'has_asr_pdos_calculate', 'has_asr_emasses_refine', 'has_asr_emasses', 'has_asr_emasses_validate']

In [12]:
# We are intereste in :

# formula            
# stoichiometry
# spacegroup
# gap                           (target)

In [13]:
# You must create a data frame which includes
# Formula, stoichiometry, spacegroup, gap

# Follows an example for the construction of the AB2 stoichiometry eHull dataset

In [19]:
data = ase.db.connect('./c2db-2021-06-24.db')
data_nonMag = data.select(is_magnetic=False) # Creates a object with all the needed information. 
                                             # Excluding magnetic materials.

target='ehull'
df_materials = pd.DataFrame(columns=['Material','Space Group',target,'Atom1', 'Atom2'])

for row in data_nonMag:
    try:
        formula_information = Composition(row.formula).as_dict()
        list_ele = list(formula_information.items())          
        
        if (len(list_ele)==2 and list_ele[0][1]==1 and list_ele[1][1]==2): # Only AB2 stoichiometry

            new_entry = [row.formula, row.spacegroup, row[target], list_ele[0][0], list_ele[1][0]]            
            df_materials.loc[len(df_materials)] = new_entry

    except:
        pass

df_materials

Unnamed: 0,Material,Space Group,ehull,Atom1,Atom2
0,AgBr2,P-4m2,0.102879,Ag,Br
1,AgLi2,P-6m2,0.230152,Ag,Li
2,AlI2,P-6m2,0.399248,Al,I
3,AsCl2,P-6m2,0.554280,As,Cl
4,AsS2,P-3m1,0.154443,As,S
...,...,...,...,...,...
620,ZrI2,P-6m2,0.027005,Zr,I
621,ZrO2,P-4m2,0.464152,Zr,O
622,ZrS2,P-3m1,0.000000,Zr,S
623,ZrSe2,P-3m1,0.000000,Zr,Se


In [15]:
merge1 = pd.merge(df_materials, df_atoms, how='inner', left_on='Atom1', right_on='Element')
merge1

Unnamed: 0,Material,Space Group,ehull,Atom1,Atom2,Element,Z,Electronegativity,IonizationPotential,ElectronAffinity,...,r_p_orbital,r_d_orbital,r_atomic_nonbonded,r_valence_lastorbital,r_covalent,Valence,PeriodicColumn,PeriodicColumn_upto18,NumberUnfilledOrbitals,Polarizability
0,AgBr2,P-4m2,0.102879,Ag,Br,Ag,47,1.93,-7.8182,-1.2721,...,1.4180,0.5486,1.53,1.3241,1.45,11.0,1.0,11.0,1.0,52.50
1,AgLi2,P-6m2,0.230152,Ag,Li,Ag,47,1.93,-7.8182,-1.2721,...,1.4180,0.5486,1.53,1.3241,1.45,11.0,1.0,11.0,1.0,52.50
2,AgCl2,P-4m2,0.129179,Ag,Cl,Ag,47,1.93,-7.8182,-1.2721,...,1.4180,0.5486,1.53,1.3241,1.45,11.0,1.0,11.0,1.0,52.50
3,AgI2,P-4m2,0.117230,Ag,I,Ag,47,1.93,-7.8182,-1.2721,...,1.4180,0.5486,1.53,1.3241,1.45,11.0,1.0,11.0,1.0,52.50
4,AgSe2,P-4m2,0.145625,Ag,Se,Ag,47,1.93,-7.8182,-1.2721,...,1.4180,0.5486,1.53,1.3241,1.45,11.0,1.0,11.0,1.0,52.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620,CoO2,P-4m2,0.237013,Co,O,Co,27,1.88,-7.4741,-1.3745,...,1.6081,0.3590,1.26,0.3590,1.26,9.0,9.0,9.0,3.0,53.00
621,CoS2,P-3m1,0.110073,Co,S,Co,27,1.88,-7.4741,-1.3745,...,1.6081,0.3590,1.26,0.3590,1.26,9.0,9.0,9.0,3.0,53.00
622,CoSe2,P-3m1,0.053422,Co,Se,Co,27,1.88,-7.4741,-1.3745,...,1.6081,0.3590,1.26,0.3590,1.26,9.0,9.0,9.0,3.0,53.00
623,CoTe2,P-6m2,0.159807,Co,Te,Co,27,1.88,-7.4741,-1.3745,...,1.6081,0.3590,1.26,0.3590,1.26,9.0,9.0,9.0,3.0,53.00


In [16]:
merge2 = pd.merge(merge1, df_atoms, how='inner', left_on='Atom2', right_on='Element')
merge2

Unnamed: 0,Material,Space Group,ehull,Atom1,Atom2,Element_x,Z_x,Electronegativity_x,IonizationPotential_x,ElectronAffinity_x,...,r_p_orbital_y,r_d_orbital_y,r_atomic_nonbonded_y,r_valence_lastorbital_y,r_covalent_y,Valence_y,PeriodicColumn_y,PeriodicColumn_upto18_y,NumberUnfilledOrbitals_y,Polarizability_y
0,AgBr2,P-4m2,0.102879,Ag,Br,Ag,47,1.93,-7.8182,-1.2721,...,0.8834,0.2281,1.14,0.8834,1.20,17.0,7.0,17.0,1.0,21.80
1,AlBr2,P-3m1,0.417214,Al,Br,Al,13,1.61,-5.7176,0.0364,...,0.8834,0.2281,1.14,0.8834,1.20,17.0,7.0,17.0,1.0,21.80
2,AsBr2,P-6m2,0.494250,As,Br,As,33,2.18,-9.0974,-1.6025,...,0.8834,0.2281,1.14,0.8834,1.20,17.0,7.0,17.0,1.0,21.80
3,AsBr2,P-3m1,0.604745,As,Br,As,33,2.18,-9.0974,-1.6025,...,0.8834,0.2281,1.14,0.8834,1.20,17.0,7.0,17.0,1.0,21.80
4,AuBr2,P-4m2,0.131970,Au,Br,Au,79,2.54,-9.3842,-2.3142,...,0.8834,0.2281,1.14,0.8834,1.20,17.0,7.0,17.0,1.0,21.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
620,VSb2,P-6m2,0.253202,V,Sb,V,23,1.63,-7.2534,-0.2221,...,1.2400,0.4483,1.38,1.2400,1.39,15.0,5.0,15.0,3.0,42.55
621,WSb2,P-6m2,0.380175,W,Sb,W,74,2.36,-8.8329,-0.4374,...,1.2400,0.4483,1.38,1.2400,1.39,15.0,5.0,15.0,3.0,42.55
622,WSb2,P-3m1,0.351708,W,Sb,W,74,2.36,-8.8329,-0.4374,...,1.2400,0.4483,1.38,1.2400,1.39,15.0,5.0,15.0,3.0,42.55
623,ZrSb2,P-6m2,0.333642,Zr,Sb,Zr,40,1.33,-6.6339,-0.0117,...,1.2400,0.4483,1.38,1.2400,1.39,15.0,5.0,15.0,3.0,42.55


In [17]:
# Finalizing 
df = merge2
df.drop(columns=['Atom1', 'Atom2', 'Element_x', 'Element_y'], inplace=True)
df.head(10)

Unnamed: 0,Material,Space Group,ehull,Z_x,Electronegativity_x,IonizationPotential_x,ElectronAffinity_x,HOMO_x,LUMO_x,r_s_orbital_x,...,r_p_orbital_y,r_d_orbital_y,r_atomic_nonbonded_y,r_valence_lastorbital_y,r_covalent_y,Valence_y,PeriodicColumn_y,PeriodicColumn_upto18_y,NumberUnfilledOrbitals_y,Polarizability_y
0,AgBr2,P-4m2,0.102879,47,1.93,-7.8182,-1.2721,-4.4022,0.9657,1.3241,...,0.8834,0.2281,1.14,0.8834,1.2,17.0,7.0,17.0,1.0,21.8
1,AlBr2,P-3m1,0.417214,13,1.61,-5.7176,0.0364,-2.712,3.1169,1.1043,...,0.8834,0.2281,1.14,0.8834,1.2,17.0,7.0,17.0,1.0,21.8
2,AsBr2,P-6m2,0.49425,33,2.18,-9.0974,-1.6025,-5.1964,4.1976,0.8558,...,0.8834,0.2281,1.14,0.8834,1.2,17.0,7.0,17.0,1.0,21.8
3,AsBr2,P-3m1,0.604745,33,2.18,-9.0974,-1.6025,-5.1964,4.1976,0.8558,...,0.8834,0.2281,1.14,0.8834,1.2,17.0,7.0,17.0,1.0,21.8
4,AuBr2,P-4m2,0.13197,79,2.54,-9.3842,-2.3142,-5.7531,0.7024,1.2274,...,0.8834,0.2281,1.14,0.8834,1.2,17.0,7.0,17.0,1.0,21.8
5,BaBr2,P-3m1,0.056445,56,0.89,-5.3475,1.0568,-3.2293,-1.9206,2.1967,...,0.8834,0.2281,1.14,0.8834,1.2,17.0,7.0,17.0,1.0,21.8
6,BaBr2,P-4m2,0.244396,56,0.89,-5.3475,1.0568,-3.2293,-1.9206,2.1967,...,0.8834,0.2281,1.14,0.8834,1.2,17.0,7.0,17.0,1.0,21.8
7,BaBr2,P-6m2,0.167063,56,0.89,-5.3475,1.0568,-3.2293,-1.9206,2.1967,...,0.8834,0.2281,1.14,0.8834,1.2,17.0,7.0,17.0,1.0,21.8
8,BiBr2,P-3m1,0.38734,83,2.02,-7.8723,-1.5162,-4.5686,2.1526,0.9899,...,0.8834,0.2281,1.14,0.8834,1.2,17.0,7.0,17.0,1.0,21.8
9,BiBr2,P-6m2,0.297076,83,2.02,-7.8723,-1.5162,-4.5686,2.1526,0.9899,...,0.8834,0.2281,1.14,0.8834,1.2,17.0,7.0,17.0,1.0,21.8


In [18]:
# Exporting to a file
df.to_csv('AB2_ehull_materials.csv')