# Get Data
Get data from Material Project by API

In [2]:
import sys
import os
current_dir = os.getcwd()
current_dir

'c:\\Users\\Fortyfour\\Desktop\\graduation_design\\Scripts\\Data'

In [3]:
# To import the custom module from a specific path
sys.path.insert(0, os.path.join(current_dir, '../'))

In [4]:
from mp_api.client import MPRester
import pandas as pd
from matplotlib import pyplot as plt

from ydata_profiling import ProfileReport
from Utools.ML_figures.figures import element_prevalence
%matplotlib inline
%config InlineBackend.figure_format='retina'

In [5]:
# Set the API key for Materials Project
API_KEY = os.getenv("MP_API_KEY")

In [6]:
# Check the available attributes.
with MPRester(API_KEY) as mpr:
    # Get the list of available attributes
    available_fields = mpr.materials.summary.available_fields

In [7]:
available_fields

['builder_meta',
 'nsites',
 'elements',
 'nelements',
 'composition',
 'composition_reduced',
 'formula_pretty',
 'formula_anonymous',
 'chemsys',
 'volume',
 'density',
 'density_atomic',
 'symmetry',
 'property_name',
 'material_id',
 'deprecated',
 'deprecation_reasons',
 'last_updated',
 'origins',
 'structure',
 'task_ids',
 'uncorrected_energy_per_atom',
 'energy_per_atom',
 'formation_energy_per_atom',
 'energy_above_hull',
 'is_stable',
 'equilibrium_reaction_energy_per_atom',
 'decomposes_to',
 'xas',
 'grain_boundaries',
 'band_gap',
 'cbm',
 'vbm',
 'efermi',
 'is_gap_direct',
 'is_metal',
 'es_source_calc_id',
 'bandstructure',
 'dos',
 'dos_energy_up',
 'dos_energy_down',
 'is_magnetic',
 'ordering',
 'total_magnetization',
 'total_magnetization_normalized_vol',
 'total_magnetization_normalized_formula_units',
 'num_magnetic_sites',
 'num_unique_magnetic_sites',
 'types_of_magnetic_species',
 'bulk_modulus',
 'shear_modulus',
 'universal_anisotropy',
 'homogeneous_poisson

In [8]:
need_fields = ['material_id', 'formula_pretty', 'composition', 'band_gap', 'is_gap_direct',
                'formation_energy_per_atom', 'energy_above_hull', 'volume', 'density', 'density_atomic',
                'symmetry', 'nsites', 'structure']

In [9]:
# Get the data of non-metal, stable materials
# e_above_hull < 0.1 
# band_gap < 6eV
with MPRester(API_KEY) as mpr:
    # Get the data for the specified fields
    docs = mpr.materials.summary.search(
        fields = need_fields,
        is_metal = False, 
        band_gap = (0, 6),
        energy_above_hull =(0, 0.1)
    )

Retrieving SummaryDoc documents:   0%|          | 0/58919 [00:00<?, ?it/s]

In [10]:
docs[0]

[4m[1mMPDataDoc<SummaryDoc>[0;0m[0;0m(
[1mnsites[0;0m=5,
[1mcomposition[0;0m=Composition('Ac2 O3'),
[1mformula_pretty[0;0m='Ac2O3',
[1mvolume[0;0m=91.51122386046316,
[1mdensity[0;0m=9.109129867325775,
[1mdensity_atomic[0;0m=18.30224477209263,
[1msymmetry[0;0m=SymmetryData(crystal_system=<CrystalSystem.trig: 'Trigonal'>, symbol='P-3m1', number=164, point_group='-3m', symprec=0.1, angle_tolerance=5.0, version='2.5.0'),
[1mmaterial_id[0;0m=MPID(mp-11107),
[1mstructure[0;0m=Structure Summary
Lattice
    abc : 4.09548748 4.09548852432923 6.29988201
 angles : 90.0 90.0 119.99999994338978
 volume : 91.51122386046316
      A : np.float64(4.09548748) np.float64(-1e-08) np.float64(-0.0)
      B : np.float64(-2.04774425) np.float64(3.54679711) np.float64(0.0)
      C : np.float64(-0.0) np.float64(0.0) np.float64(6.29988201)
    pbc : True True True
PeriodicSite: Ac (2.048, 1.182, 4.755) [0.6667, 0.3333, 0.7548]
PeriodicSite: Ac (-2.388e-06, 2.365, 1.545) [0.3333, 0.6667, 0.2

In [11]:
# Convert the data to a pandas DataFrame
doc_dict = {
    'material_id': [doc.material_id for doc in docs],
    'formula_pretty': [doc.formula_pretty for doc in docs],
    'composition': [doc.composition for doc in docs],
    'nsites': [doc.nsites for doc in docs],
    'crystal_system': [doc.symmetry.crystal_system for doc in docs],
    'space_group': [doc.symmetry.number for doc in docs],
    'point_group': [doc.symmetry.point_group for doc in docs],
    'volume': [doc.volume for doc in docs],
    'density': [doc.density for doc in docs],
    'density_atomic': [doc.density_atomic for doc in docs],
    'formation_energy_per_atom': [doc.formation_energy_per_atom for doc in docs],
    'energy_above_hull': [doc.energy_above_hull for doc in docs],
    'is_gap_direct': [doc.is_gap_direct for doc in docs],
    # 'structure': [doc.structure for doc in docs], # when we need the structure, we can use the MPRester to get it separately
    'band_gap': [doc.band_gap for doc in docs]
}

raw_df = pd.DataFrame(doc_dict)

In [12]:
raw_df.head()

Unnamed: 0,material_id,formula_pretty,composition,nsites,crystal_system,space_group,point_group,volume,density,density_atomic,formation_energy_per_atom,energy_above_hull,is_gap_direct,band_gap
0,mp-11107,Ac2O3,"(Ac, O)",5,Trigonal,164,-3m,91.511224,9.10913,18.302245,-3.737668,0.0,False,3.5226
1,mp-32800,Ac2S3,"(Ac, S)",40,Tetragonal,122,-42m,1118.407852,6.535149,27.960196,-2.492486,0.0,False,2.2962
2,mp-977351,Ac2S3,"(Ac, S)",10,Trigonal,167,-3m,328.464893,5.562971,32.846489,-2.439787,0.0527,False,3.0275
3,mp-867311,AcAgTe2,"(Ac, Ag, Te)",4,Cubic,225,m-3m,122.518406,7.997421,30.629602,-0.996232,0.0,False,0.0794
4,mp-1183115,AcAlO3,"(Ac, Al, O)",5,Cubic,221,m-3m,57.451413,8.72823,11.490283,-3.690019,0.0,True,4.1024


In [14]:
raw_df.shape

(58919, 14)

In [13]:
# Save the raw data to a csv file
file_path = os.path.join(current_dir, '../Data/mp_raw_data.csv')
raw_df.to_csv(file_path, index=False)

- There are a total of 58919 pieces of data from MP.