In [1]:
import pandas as pd
import numpy as np
from pymatgen.core import Composition
from utils.Featurizor import Featurizor
from sklearn.ensemble import VotingClassifier
# No warnings about setting value on copy of slice
pd.options.mode.chained_assignment = None

# Display up to 60 columns of a dataframe
pd.set_option('display.max_columns', 60)

# Matplotlib visualization
import matplotlib.pyplot as plt
from matplotlib import rcParams
%matplotlib inline

# Internal ipython tool for setting figure size
from IPython.core.pylabtools import figsize

# Seaborn for visualization
import seaborn as sns

# Splitting data into training and testing
from sklearn.model_selection import train_test_split
import warnings

warnings.filterwarnings('ignore')

config = {
    "mathtext.fontset": 'stix',
    "font.family": 'sans-serif',
    "font.sans-serif": ['Times New Roman'],
    "font.size": 24,
    'axes.unicode_minus': False
}
rcParams.update(config)
plt.rcParams['axes.unicode_minus'] = False
large = 22
med = 16
small = 12
params = {
    'axes.titlesize': large,
    'legend.fontsize': med,
    'figure.figsize': (8, 6),
    'axes.labelsize': med,
    'axes.titlesize': med,
    'xtick.labelsize': med,
    'ytick.labelsize': med,
    'figure.titlesize': large
}
plt.rcParams.update(params)
plt.rcParams['figure.dpi'] = 300
seed = 42

In [2]:
from mp_api.client import MPRester
m = MPRester('nqUYyfjrG2R2IPVhuAy4GFqyGYaoWjbH')
spacegroup_numbers = [227]
result = []
C = ['O','S','Se','Te','Cl','F']
for spacegroup in spacegroup_numbers:
    for c in C:
        docs = m.summary.search(formula='*1*2{}4'.format(c), spacegroup_number= spacegroup)
        if len(docs) > 0:
            tmp_dict = {}
            for doc in docs:
                doc_dict = dict(doc)
                doc_dict['spacegroup_number'] = doc.symmetry.number
                result.append(doc_dict)

Retrieving SummaryDoc documents:   0%|          | 0/170 [00:00<?, ?it/s]

Retrieving SummaryDoc documents:   0%|          | 0/106 [00:00<?, ?it/s]

Retrieving SummaryDoc documents:   0%|          | 0/42 [00:00<?, ?it/s]

Retrieving SummaryDoc documents:   0%|          | 0/4 [00:00<?, ?it/s]

Retrieving SummaryDoc documents:   0%|          | 0/1 [00:00<?, ?it/s]

Retrieving SummaryDoc documents:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
spinels = pd.DataFrame(result)
spinels = spinels[['formula_pretty','is_gap_direct','band_gap','structure']]
spinels.columns=['formula',r'$y$','band gap','structure']
spinels

Unnamed: 0,formula,$y$,band gap,structure
0,NaMn2O4,False,0.0000,"[[4.229859 4.229859 4.229859] Na, [6.3447885 6..."
1,Mg2FeO4,False,0.0000,"[[5.31263625 3.18758175 3.18758175] Mg, [5.312..."
2,Gd2CdO4,True,0.0000,"[[3.65343075 3.65343075 6.08905125] Gd, [6.089..."
3,CaIn2O4,True,2.0208,"[[0.98066019 3.25582617 1.83688805] Ca, [ 2.94..."
4,Si(NiO2)2,True,3.5770,"[[0.81930158 0.57942515 1.41926313] Si, [5.734..."
...,...,...,...,...
320,Dy2CdTe4,False,0.0000,"[[7.90082125 4.74049275 4.74049275] Dy, [4.740..."
321,Cr2CuTe4,False,0.0000,"[[4.57184554 3.23277792 7.91867596] Cr, [4.571..."
322,Li2ZnCl4,False,4.1422,"[[3.85676751 3.85677152 1.28550518] Li, [3.856..."
323,LiAg2F4,False,0.0000,"[[0. 0. 0.] Li, [2.3405165 2.3405165 2.3405165..."


In [4]:
m.get_database_version()

'2022.10.28'

In [5]:
from mp_api.client import MPRester
import pickle
m = MPRester('nqUYyfjrG2R2IPVhuAy4GFqyGYaoWjbH')
result = []
docs = m.summary.search(all_fields=True,fields=['formula_pretty','symmetry','band_gap','is_gap_direct','structure'])
if len(docs) > 0:
    tmp_dict = {}
    for doc in docs:
        doc_dict = dict(doc)
        doc_dict['spacegroup_number'] = doc.symmetry.number
        result.append(doc_dict)
mp_data = pd.DataFrame(result)
with open("mp_data.pickle", 'wb') as file:
    pickle.dump(mp_data, file)

Retrieving SummaryDoc documents:   0%|          | 0/154718 [00:00<?, ?it/s]

In [6]:
mp_data

Unnamed: 0,formula_pretty,symmetry,structure,band_gap,is_gap_direct,fields_not_requested,spacegroup_number
0,LiCaPb,crystal_system=<CrystalSystem.hex_: 'Hexagonal...,"[[ 2.47450345 -1.42865846 8.81820568] Li, [2....",0.0000,False,"[builder_meta, nsites, elements, nelements, co...",187
1,Li7Mn4CoO12,crystal_system=<CrystalSystem.tri: 'Triclinic'...,"[[0.28750304 2.76071346 1.40491689] Li, [1.766...",0.7477,False,"[builder_meta, nsites, elements, nelements, co...",1
2,K2CuF4,crystal_system=<CrystalSystem.tet: 'Tetragonal...,[[2.68765510e-05 2.81728926e-05 1.08914839e+01...,0.0000,False,"[builder_meta, nsites, elements, nelements, co...",142
3,Li2Mn3Cr3O12,crystal_system=<CrystalSystem.mono: 'Monoclini...,"[[4.4015725 0. 0. ] Li, [0. ...",0.0927,False,"[builder_meta, nsites, elements, nelements, co...",10
4,FeB4,crystal_system=<CrystalSystem.ortho: 'Orthorho...,"[[2.25478502 0. 0. ] Fe, [0. ...",0.0000,False,"[builder_meta, nsites, elements, nelements, co...",58
...,...,...,...,...,...,...,...
154713,Al(CuO2)2,crystal_system=<CrystalSystem.tet: 'Tetragonal...,"[[0. 0. 0.] Al, [-2.22044605e-16 2.60403300e+...",0.0000,False,"[builder_meta, nsites, elements, nelements, co...",88
154714,Ba(ZnP)2,crystal_system=<CrystalSystem.tet: 'Tetragonal...,"[[0. 0. 0.] Ba, [0.72520071 2.88736829 2.49463...",0.0000,False,"[builder_meta, nsites, elements, nelements, co...",139
154715,Ho2CuPt,crystal_system=<CrystalSystem.cubic: 'Cubic'> ...,"[[1.71899957 1.71899957 1.71899957] Ho, [5.156...",0.0000,False,"[builder_meta, nsites, elements, nelements, co...",225
154716,YGeBiO5,crystal_system=<CrystalSystem.ortho: 'Orthorho...,"[[ 0.04538752 7.18036088 14.60815859] Y, [5.3...",3.4480,False,"[builder_meta, nsites, elements, nelements, co...",61


In [7]:
mp_data = mp_data[['formula_pretty','is_gap_direct','band_gap','structure']]
mp_data.columns=['formula',r'$y$','band gap','structure']

In [8]:
mp_data

Unnamed: 0,formula,$y$,band gap,structure
0,LiCaPb,False,0.0000,"[[ 2.47450345 -1.42865846 8.81820568] Li, [2...."
1,Li7Mn4CoO12,False,0.7477,"[[0.28750304 2.76071346 1.40491689] Li, [1.766..."
2,K2CuF4,False,0.0000,[[2.68765510e-05 2.81728926e-05 1.08914839e+01...
3,Li2Mn3Cr3O12,False,0.0927,"[[4.4015725 0. 0. ] Li, [0. ..."
4,FeB4,False,0.0000,"[[2.25478502 0. 0. ] Fe, [0. ..."
...,...,...,...,...
154713,Al(CuO2)2,False,0.0000,"[[0. 0. 0.] Al, [-2.22044605e-16 2.60403300e+..."
154714,Ba(ZnP)2,False,0.0000,"[[0. 0. 0.] Ba, [0.72520071 2.88736829 2.49463..."
154715,Ho2CuPt,False,0.0000,"[[1.71899957 1.71899957 1.71899957] Ho, [5.156..."
154716,YGeBiO5,False,3.4480,"[[ 0.04538752 7.18036088 14.60815859] Y, [5.3..."
