In [2]:
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('..')
import os
from copy import deepcopy
from scipy.constants import h, c, e
from monty.json import MontyDecoder, MontyEncoder
import json
import pandas as pd

from phasemapy.parser import ICDDEntry, ICDDEntryPreprocessor
from phasemapy.solver import Phase, Sample
from pymatgen.core import Element

chemsys = ['Li', 'Sr', 'Al']
oxide_system = True

In [32]:
def get_dataframe(icdd_entries, keys):
    data = {}
    for key in keys:
        data[key] = [e.as_dict()[key] for e in icdd_entries]
    df = pd.DataFrame(data)
    return df

def plot_merge_xrd(entries_sum,entries_index):
    from scipy.ndimage import gaussian_filter1d
    def smooth_hist(q, amp, bins):
        hist, bin_edges = np.histogram(q, bins=bins, weights=amp)
        smoothed = gaussian_filter1d(hist, 4)
        return smoothed
    bins = np.linspace(min([_.data['xrd'][0][0] for _ in entries_sum]) - 0.01,
                           max([_.data['xrd'][0][-1] for _ in entries_sum]) + 0.01, 1000)
    smooth_xrd_data = []
    for i in entries_index:        
        q, amp = entries_sum[i].data['xrd'][0],entries_sum[i].data['xrd'][1]
        smooth_xrds_i = smooth_hist(q, amp, bins)
        smooth_xrds_i = smooth_xrds_i / np.max(smooth_xrds_i) * 100
        smooth_xrd_data.append(smooth_xrds_i)    
    
    for j in range(len(smooth_xrd_data)):
        plt.plot(bins[0:-1],smooth_xrd_data[j],label = f"{entries_index[j]}+{entries_sum[entries_index[j]].name}+{entries_sum[entries_index[j]].entry_id}")
        plt.legend()

        

In [3]:
#load entry pool: 100 ICDD entries
with open('./data/ICDD_entries_raw.json') as f:
    entries_Li_Sr_Al = json.load(f, cls=MontyDecoder)

entries = [ICDDEntry.from_icdd_json (en) for en in entries_Li_Sr_Al]

In [None]:
precess = ICDDEntryPreprocessor(deepcopy(entries), chemsys, oxide_system)
groups = precess.merge_by_xrd(bin_number=1000, gaussian_filter=4, R_cutoff=0.15)

In [None]:
df = get_dataframe([_ for _ in precess.entries ],
                       ['entry_id', 'name', 'leader'])
# print(df)
df.to_excel("./data/output_candidate_pool.xlsx")

with open('./data/icdd_entries.json', 'w') as f:
    json.dump(precess.entries, f, cls=MontyEncoder)

In [None]:
plot_merge_xrd(entries, groups[0])

In [None]:
plot_merge_xrd(entries, groups[1])

In [4]:
from glob import glob
from pymatgen.io.cif import CifParser
from pymatgen.analysis.diffraction.xrd import XRDCalculator
pdfs = glob('../ICSD/Li_Sr_Al_O/*.cif')
icsd_entries = [ICDDEntry.from_icsd_cif(pdf) for pdf in pdfs]

In [7]:
def check_oxi(comp,chemsys):    
    c1 = comp[Element(chemsys[0])] * np.min(Element(chemsys[0]).common_oxidation_states)\
        + comp[Element(chemsys[1])] * np.min(Element(chemsys[1]).common_oxidation_states)\
        + comp[Element(chemsys[2])] * np.min(Element(chemsys[2]).common_oxidation_states)\
        - comp[Element('O')] * 2
    c2 = comp[Element(chemsys[0])] * np.max(Element(chemsys[0]).common_oxidation_states)\
        + comp[Element(chemsys[1])] * np.max(Element(chemsys[1]).common_oxidation_states)\
        + comp[Element(chemsys[2])] * np.max(Element(chemsys[2]).common_oxidation_states)\
        - comp[Element('O')] * 2

    return c1 * c2 <= 0

In [5]:
icsd_entries[0].composition

Comp: Li5 Al1 O4

In [6]:
precess = ICDDEntryPreprocessor(deepcopy(icsd_entries), chemsys, oxide_system)
precess.get_xrd()
icsd_entries = precess.entries

In [11]:
# def check_oxi(comp):
#         # comp = {el.symbol: comp[el] for el in comp}
#         c1 = comp[Element('Li')] * 2 + comp[Element('Sr')] * 2 + comp[Element('Al')] * 2 - comp[Element('O')] * 2
#         c2 = comp[Element('Li')] * 5 + comp[Element('Sr')] * 4 + comp[Element('Al')] * 5 - comp[Element('O')] * 2

#         return c1 * c2 <= 0

icsd_entries = [_ for _ in icsd_entries if check_oxi(_.composition,chemsys)]
print('[ICSD] after remove weird-valence entries', len(icsd_entries))

[ICSD] after remove weird-valence entries 152


In [30]:
    def get_dataframe(icdd_entries, keys):
        data = {}
        for key in keys:
            data[key] = [e.as_dict()[key] for e in icdd_entries]
        df = pd.DataFrame(data)
        return df

    pdfs = glob('./data/icdd/*.xml')
    icdd_entries = [ICDDEntry.from_icdd_xml(pdf) for pdf in pdfs]
    icdd_entries = [_ for _ in icdd_entries if _.name != 'O2']    

    precess = ICDDEntryPreprocessor(deepcopy(icdd_entries), chemsys, oxide_system)
    df = get_dataframe(precess.entries,
                       ['entry_id', 'name', 'pressure_temperature', 'cross_refs', 'status', 'quality_mark', 'name',
                        'spgr', 'common_name'])

    print('[ICDD] Total (Li-Sr-Al) - O: ', len(icdd_entries))  # Total

    icdd_entries = [_ for _ in icdd_entries if _.status != 'Deleted']
    print('[ICDD] after remove Deleted:', len(icdd_entries))

    icdd_entries = [_ for _ in icdd_entries if _.quality_mark != 'Hypothetical']
    print('[ICDD] after remove Hypothetical:', len(icdd_entries))

    icdd_entries = [_ for _ in icdd_entries if _.quality_mark not in ['Blank', 'Low-Precision']]
    print('[ICDD] after remove Blank/Low-Precision:', len(icdd_entries))

    icdd_entries = [_ for _ in icdd_entries if _.pressure_temperature == 'Ambient']
    print('[ICDD] after remove non-Ambient:', len(icdd_entries))

#     icdd_entries = [_ for _ in icdd_entries if _.structure]
#     print('[ICDD] after remove no-struct entries', len(icdd_entries))

    def check_oxi(comp):        
        c1 = comp[Element('Li')] * 1 + comp[Element('Sr')] * 1 + comp[Element('Al')] * 3 - comp[Element('O')] * 2
        c2 = comp[Element('Li')] * 1 + comp[Element('Sr')] * 2 + comp[Element('Al')] * 3 - comp[Element('O')] * 2

        return c1 * c2 <= 0

    icdd_entries = [_ for _ in icdd_entries if check_oxi(_.composition)]
    print('[ICDD] after remove weird-valence entries', len(icdd_entries))

    precess = ICDDEntryPreprocessor(deepcopy(icdd_entries), chemsys, oxide_system)
    precess.process_frac_name()
    precess.process_disorder()  
    precess.merge_by_cross_ref()
    print('[ICDD] after merging cross-ref entries', len(precess.entries))
    precess.get_xrd()
    precess.merge_by_polymorph(bin_number=1000, gaussian_filter=4, R_cutoff=0.2)
    print('[ICDD] after merging XRD-polymorph entries', len(precess.entries))
    precess.merge_by_xrd(bin_number=1000, gaussian_filter=4, R_cutoff=0.22)
    print('[ICDD] after merging XRD-group entries', len(precess.entries))

    print(len([_ for _ in precess.entries if _.structure.is_ordered]), 'ordered structures')
    print(len([_ for _ in precess.entries if not _.structure.is_ordered]), 'disordered structures')
#     print(len([_ for _ in precess.entries if _.structure.composition.as_dict().keys() == {'V', 'O'}]))



    all_entries = precess.entries
    df = get_dataframe([_ for _ in all_entries ],
                       ['entry_id', 'name', 'pressure_temperature', 'cross_refs', 'status', 'quality_mark', 'name',
                        'spgr', 'common_name', 'leader'])
    print(df)
    df.to_excel('./data/output_candidate_pool.xlsx')

    with open('./data/icdd_entries.json', 'w') as f:
        json.dump(all_entries, f, cls=MontyEncoder)



Structure Summary
Lattice
    abc : 6.302 6.42 4.62
 angles : 90.0 90.0 90.0
 volume : 186.91984079999997
      A : 6.302 0.0 3.8588620641133097e-16
      B : 1.0324147410924007e-15 6.42 3.9311162252630037e-16
      C : 0.0 0.0 4.62
PeriodicSite: Li+ (3.4724, 3.5631, 1.1735) [0.5510, 0.5550, 0.2540]
PeriodicSite: Li+ (5.9806, 3.5631, 1.1735) [0.9490, 0.5550, 0.2540]
PeriodicSite: Li+ (3.4724, 6.0669, 1.1735) [0.5510, 0.9450, 0.2540]
PeriodicSite: Li+ (5.9806, 6.0669, 1.1735) [0.9490, 0.9450, 0.2540]
PeriodicSite: Li+ (2.8296, 2.8569, 3.4465) [0.4490, 0.4450, 0.7460]
PeriodicSite: Li+ (0.3214, 2.8569, 3.4465) [0.0510, 0.4450, 0.7460]
PeriodicSite: Li+ (2.8296, 0.3531, 3.4465) [0.4490, 0.0550, 0.7460]
PeriodicSite: Li+ (0.3214, 0.3531, 3.4465) [0.0510, 0.0550, 0.7460]
PeriodicSite: Li+ (4.7265, 1.6050, 0.9794) [0.7500, 0.2500, 0.2120]
PeriodicSite: Li+ (1.5755, 4.8150, 3.6406) [0.2500, 0.7500, 0.7880]
PeriodicSite: Al3+ (4.7265, 4.8150, 3.5759) [0.7500, 0.7500, 0.7740]
PeriodicSite: Al3+

2

7

In [61]:
def check_oxi(comp,chemsys):    
    c1 = comp[Element(chemsys[0])] * np.min(Element(chemsys[0]).common_oxidation_states)\
        + comp[Element(chemsys[1])] * np.min(Element(chemsys[1]).common_oxidation_states)\
        + comp[Element(chemsys[2])] * np.min(Element(chemsys[2]).common_oxidation_states)\
        - comp[Element('O')] * 2
    c2 = comp[Element(chemsys[0])] * np.max(Element(chemsys[0]).common_oxidation_states)\
        + comp[Element(chemsys[1])] * np.max(Element(chemsys[1]).common_oxidation_states)\
        + comp[Element(chemsys[2])] * np.max(Element(chemsys[2]).common_oxidation_states)\
        - comp[Element('O')] * 2

    return c1 * c2 <= 0

In [6]:

#sys.path.append('/Users/yizhou/PycharmProjects/phasemapy')
import json
import pandas as pd
from copy import deepcopy
from glob import glob

from monty.json import MontyEncoder
from pymatgen.core import Element

from phasemapy.parser import ICDDEntry, ICDDEntryPreprocessor


chemsys = ['Li', 'Sr', 'Al']
oxide_system = True

def main():
    def get_dataframe(icdd_entries, keys):
        data = {}
        for key in keys:
            data[key] = [e.as_dict()[key] for e in icdd_entries]
        df = pd.DataFrame(data)
        return df

    pdfs = glob('./data/icdd/*.xml')
    icdd_entries = [ICDDEntry.from_icdd_xml(pdf) for pdf in pdfs]
    icdd_entries = [_ for _ in icdd_entries if _.name != 'O2']    

    precess = ICDDEntryPreprocessor(deepcopy(icdd_entries), chemsys, oxide_system)
    df = get_dataframe(precess.entries,
                       ['entry_id', 'name', 'pressure_temperature', 'cross_refs', 'status', 'quality_mark', 'name',
                        'spgr', 'common_name'])

    print('[ICDD] Total (Li-Sr-Al) - O: ', len(icdd_entries))  # Total

    icdd_entries = [_ for _ in icdd_entries if _.status != 'Deleted']
    print('[ICDD] after remove Deleted:', len(icdd_entries))

    icdd_entries = [_ for _ in icdd_entries if _.quality_mark != 'Hypothetical']
    print('[ICDD] after remove Hypothetical:', len(icdd_entries))

    icdd_entries = [_ for _ in icdd_entries if _.quality_mark not in ['Blank', 'Low-Precision']]
    print('[ICDD] after remove Blank/Low-Precision:', len(icdd_entries))

    icdd_entries = [_ for _ in icdd_entries if _.pressure_temperature == 'Ambient']
    print('[ICDD] after remove non-Ambient:', len(icdd_entries))

#     icdd_entries = [_ for _ in icdd_entries if _.structure]
#     print('[ICDD] after remove no-struct entries', len(icdd_entries))

    def check_oxi(comp):
        # comp = {el.symbol: comp[el] for el in comp}
        c1 = comp[Element('Li')] * 2 + comp[Element('Sr')] * 2 + comp[Element('Al')] * 2 - comp[Element('O')] * 2
        c2 = comp[Element('Li')] * 5 + comp[Element('Sr')] * 4 + comp[Element('Al')] * 5 - comp[Element('O')] * 2

        return c1 * c2 <= 0

    icdd_entries = [_ for _ in icdd_entries if check_oxi(_.composition)]
    print('[ICDD] after remove weird-valence entries', len(icdd_entries))

    precess = ICDDEntryPreprocessor(deepcopy(icdd_entries), chemsys, oxide_system)
    precess.process_frac_name()
    precess.process_disorder()  
    precess.merge_by_cross_ref()
    print('[ICDD] after merging cross-ref entries', len(precess.entries))
    precess.get_xrd()
    precess.merge_by_polymorph(bin_number=1000, gaussian_filter=4, R_cutoff=0.2)
    print('[ICDD] after merging XRD-polymorph entries', len(precess.entries))
    s=precess.merge_by_xrd(bin_number=1000, gaussian_filter=4, R_cutoff=0.22)
    print('[ICDD] after merging XRD-group entries', len(precess.entries),s)

    print(len([_ for _ in precess.entries if _.structure.is_ordered]), 'ordered structures')
    print(len([_ for _ in precess.entries if not _.structure.is_ordered]), 'disordered structures')
#     print(len([_ for _ in precess.entries if _.structure.composition.as_dict().keys() == {'V', 'O'}]))



    all_entries = precess.entries
    df = get_dataframe([_ for _ in all_entries ],
                       ['entry_id', 'name', 'pressure_temperature', 'cross_refs', 'status', 'quality_mark', 'name',
                        'spgr', 'common_name', 'leader'])
    print(df)
    df.to_excel('./data/output_candidate_pool.xlsx')

    with open('./data/icdd_entries.json', 'w') as f:
        json.dump(all_entries, f, cls=MontyEncoder)

if __name__ == "__main__":
    main()





[ICDD] Total (Li-Sr-Al) - O:  37
[ICDD] after remove Deleted: 35
[ICDD] after remove Hypothetical: 35
[ICDD] after remove Blank/Low-Precision: 35
[ICDD] after remove non-Ambient: 29
[ICDD] after remove weird-valence entries 26
[ICDD] after merging cross-ref entries 22
[ICDD] after merging XRD-polymorph entries 20
[ICDD] after merging XRD-group entries 19 [[6, 19]]
13 ordered structures
6 disordered structures
       entry_id          name pressure_temperature                                         cross_refs     status quality_mark           spgr                          common_name       leader
0   00-026-0031        Al2 O3              Ambient                                      [04-007-2253]    Primary      Indexed    P63mc (186)                            κ`-Al2 O3  00-026-0031
1   00-050-0741        Al2 O3              Ambient                         [00-010-0425, 04-007-2478]    Primary      Indexed    Fd-3m (227)                             γ-Al2 O3  00-050-0741
2   01-074-223

In [14]:
#load entry pool: 19 ICDD entries
with open('./data/icdd_entries.json') as f:
    entries_icdd = json.load(f, cls=MontyDecoder)



In [15]:
entries_nostruct = [_ for _ in entries_icdd if _.structure==None]

In [21]:
if len(entries_nostruct) == 0:
    df = get_dataframe([_ for _ in all_entries ],
                   ['entry_id', 'name', 'pressure_temperature', 'cross_refs', 'status', 'quality_mark', 'name',
                    'spgr', 'common_name', 'leader'])
    print(df)
    df.to_excel('./data/output_candidate_pool.xlsx')

    with open('./data/icdd_entries.json', 'w') as f:
        json.dump(all_entries, f, cls=MontyEncoder)

else:
    pass

1


In [23]:
import qmpy_rester as qr

In [28]:
#return list of data
with qr.QMPYRester() as q:
    kwargs={
        "element_set":"(Fe-Mn),O",
        "stability":"0",
        "natom":"<10",
    
    }
        
    list_of_data = q.get_oqmd_phases(**kwargs)
    
list_of_data

Your filters are:
    filter=element_set=(Fe-Mn),O AND stability=0
Proceed? [Y/n]:Y


{'links': {'next': 'http://oqmd.org/oqmdapi/formationenergy?filter=element_set%3D%28Fe-Mn%29%2CO+AND+stability%3D0&limit=50&offset=50',
  'previous': None,
  'base_url': {'href': 'https://oqmd.org/oqmdapi',
   'meta': {'_oqmd_version': '1.0'}}},
 'resource': {},
 'data': [{'name': 'MnCuO2',
   'entry_id': 5833,
   'calculation_id': 10650,
   'icsd_id': 30379,
   'formationenergy_id': 4066863,
   'duplicate_entry_id': 5833,
   'composition': 'Cu1 Mn1 O2',
   'composition_generic': 'ABC2',
   'prototype': 'NaNiO2(mS8)',
   'spacegroup': 'C2/m',
   'volume': 46.2351,
   'ntypes': 3,
   'natoms': 4,
   'unit_cell': [[0.0, -2.921685, 0.0],
    [2.771423, 1.460842, -0.017567],
    [-1.526772, 0.0, 5.719667]],
   'sites': ['Cu @ 0.5 1 0.5',
    'Mn @ 0 0 0',
    'O @ 0.41 0.82 0.182',
    'O @ 0.59 0.18 0.818'],
   'band_gap': 0.376,
   'delta_e': -1.54633147891409,
   'stability': 0.0,
   'fit': 'standard',
   'calculation_label': 'static'},
  {'name': 'MnH2O2',
   'entry_id': 4364,
   'calc

In [26]:
import qmpy_rester as qr

## Return list of data
with qr.QMPYRester() as q:
    kwargs = {
        "element_set": "(Fe-Mn),O",      # composition include (Fe OR Mn) AND O
        "stability": "0",            # hull distance smaller than -0.1 eV
        "natom": "<10",                  # number of atoms less than 10
        }
    list_of_data = q.get_oqmd_phases(**kwargs)

## Return data of a single oqmd phase
with qr.QMPYRester() as q:
    phase = q.get_oqmd_phase_by_id(fe_id=4061139, fields='!sites') # Default: fields=None

Your filters are:
    filter=element_set=(Fe-Mn),O AND stability=0
Proceed? [Y/n]:Y


In [29]:
phase

{'name': 'CsHoSiS4',
 'entry_id': 25672,
 'calculation_id': 1352933,
 'icsd_id': 412748,
 'formationenergy_id': 4061139,
 'duplicate_entry_id': 25672,
 'composition': 'Cs1 Ho1 S4 Si1',
 'composition_generic': 'ABCD4',
 'prototype': 'CsSmGeSe4',
 'spacegroup': 'P212121',
 'volume': 760.627,
 'ntypes': 4,
 'natoms': 28,
 'unit_cell': [[6.36095, 0.0, 0.0],
  [0.0, 6.704647, 0.0],
  [0.0, 0.0, 17.835028]],
 'band_gap': 3.024,
 'delta_e': -1.60893999919811,
 'stability': 0.0,
 'fit': 'standard',
 'calculation_label': 'static'}