# Searching through Core-level data

Welcome! Here you can search the database for core-level shifts and Binding energies from corehole calculations which you can visualize and export. The GUI is for a quite simple specific querry. Enjoy! Created @ PGI-1 by Jens Broeder

In [None]:
%load_ext autoreload
%autoreload 2
#%matplotlib notebook
%matplotlib inline

In [None]:
from aiida import load_profile
load_profile()

In [None]:
from aiida.orm.querybuilder import QueryBuilder
from aiida.orm import WorkChainNode
from aiida.orm import CalcJobNode
from aiida.orm import Dict

In [None]:
from base64 import b64encode
#import StringIO
from tempfile import NamedTemporaryFile
import numpy as np
import ipywidgets as ipw
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import ase
from ase.io import *
from ase.data import covalent_radii, atomic_numbers
from ase.data.colors import cpk_colors
from ase.neighborlist import NeighborList

htr2ev = 27.2113862459

In [None]:
############################   START OF PREPROCESSING   ###############################

In [None]:
PREPROCESS_VERSION = 1.07

def preprocess_newbies():
    qb = QueryBuilder()
    qb.append(WorkChainNode, filters={
        'attributes.process_label': 'fleur_initial_cls_wc',
        'or':[
               {'extras': {'!has_key': 'preprocess_version'}},
               {'extras.preprocess_version': {'<': PREPROCESS_VERSION}},
           ],
    })
    
    result = qb.all()
    print('Found' + str(len(result)) + ' entries for query in db.')
    for m in result: # iterall() would interfere with set_extra()
        n = m[0]
        if not n.is_sealed:
            print("Skipping underway workchain PK %d"%n.pk)
            continue
        if 'obsolete' not in n.extras_keys():
            n.set_extra('obsolete', False)
        try:
            preprocess_one(n)
            n.set_extra('preprocess_successful', True)
            n.set_extra('preprocess_version', PREPROCESS_VERSION)
            print("Preprocessed PK %d"%n.pk)
        except Exception as e:
            n.set_extra('preprocess_successful', False)
            n.set_extra('preprocess_error', str(e))
            n.set_extra('preprocess_version', PREPROCESS_VERSION)
            print("Failed to preprocess PK %d: %s"%(n.pk, e))

In [None]:
def preprocess_one(workcalc):
    
    from aiida_fleur.tools.common_fleur_wf_util import get_natoms_element
    # TODO : do we really want to set all the extra keys?
    # Pro: it is way faster, con: it spamms the extras
    #TODO: some query features here are double, and can be replaced 
    # bei SQL-Alc QB functionality with contains...    
    def get_calc_by_label(workcalc, label):
        qb = QueryBuilder()
        qb.append(WorkChainNode, filters={'uuid':workcalc.uuid})
        qb.append(CalcJobNode, output_of=WorkChainNode, filters={'label':label})
        # process label?
        if qb.count() != 1:
            raise(Exception("Could not find %s calculation."%label))
        calc = qb.first()[0]
        return calc
  
    # Formula
    structure = workcalc.inputs.structure
    
    ase_struct = structure.get_ase()
    formula = ase_struct.get_chemical_formula()
    workcalc.set_extra('formula', formula)
    workcalc.set_extra('structure_description', structure.description)
    
    
    # thumbnail
    thumbnail = render_thumbnail(ase_struct)
    thumbnail_structure = structure.set_extra('thumbnail', thumbnail)
    workcalc.set_extra('thumbnail_struc', thumbnail)
    
    
    infos = extract_information(workcalc)
    #print(infos)
    gap = infos.get('bandgap', None)#.get(formula, None) # currently this is a dict... will change
    #print(gap)
    if gap is not None:
        workcalc.set_extra('bandgap', gap)
    workcalc.set_extra('fermi_energy', infos.get('fermi_energy'))
    workcalc.set_extra('space_group', infos.get('space_group'))
    workcalc.set_extra('structure_uuid', infos.get('structure_uuid'))
    
    elements = get_natoms_element(formula)
    elementlist = elements.keys()
    workcalc.set_extra('elements', elementlist)
    
    # ensure all steps succeed
    #all_steps = ['fleur_scf_wc']

    #for label in all_steps:
    #    calc = get_calc_by_label(workcalc, label)
    #    if calc.get_state() != 'FINISHED':
    #        raise(Exception("Calculation %s in state %s."%(label, calc.get_state())))
    #    if "aiida.out" not in calc.out.retrieved.get_folder_list():
    #        raise(Exception("Calculation %s did not retrive aiida.out"%label))
    #    #fn = calc.out.retrieved.get_abs_path("aiida.out")
    #    #content = open(fn).read()
    #    #if "JOB DONE." not in content:
    #    #    raise(Exception("Calculation %s did not print JOB DONE."%label))
    
    # energies
    #scf_calc = get_calc_by_label(workcalc, "scf")
    #assert scf_calc.res['fermi_energy_units'] == 'eV'
    #fermi_energy = scf_calc.res['fermi_energy']
    #assert scf_calc.res['energy_units'] == 'eV'
    #workcalc.set_extra('total_energy', scf_calc.res['energy'])
    #workcalc.set_extra('opt_structure_uuid', scf_calc.inp.structure.uuid)
    
    # outputnode
    #wc_outputpara_dict = workcalc.out.output_parameters
    
    # gap
    #bandgap
    #workcalc.set_extra('gap', gap)
        
    # vacuum level
    #export_hartree_calc = get_calc_by_label(workcalc, "export_hartree")
    #fn = export_hartree_calc.out.retrieved.get_abs_path("vacuum_hartree.dat")
    #data = np.loadtxt(fn)
    #vacuum_level = np.mean(data[:,2]) * 27.211385 * 0.5
    #workcalc.set_extra('vacuum_level', vacuum_level)
    
    # store shifted energies
    #workcalc.set_extra('fermi_energy', fermi_energy - vacuum_level)
    #if is_insulator:
    #    workcalc.set_extra('homo', homo - vacuum_level)
    #    workcalc.set_extra('lumo', lumo - vacuum_level)
    #else:
    #    workcalc.set_extra('homo', fermi_energy - vacuum_level)
    #    workcalc.set_extra('lumo', fermi_energy - vacuum_level)

In [None]:
def set_extract_information(workcalc):  
    pass

def extract_information(workcalc):
    from aiida_fleur.tools.StructureData_util import get_spacegroup
    return_dict = {}
    
    # structure
    structure = workcalc.inputs.structure
    structure_uuid = structure.uuid
    formula = structure.get_formula()
    space_group = get_spacegroup(structure)
    
    #thumbnail = render_thumbnail(structure.get_ase())
    #return_dict['thumbnail'] = thumbnail
    #print space_group
    return_dict['space_group'] = space_group
    return_dict['structure_uuid'] = structure_uuid
    
    plabel = workcalc.get_attribute('process_label')
    return_dict['process_label'] = plabel
    # outputnode
    if plabel == 'fleur_initial_cls_wc':
        try:
            wc_outputpara_dict = workcalc.outputs.output_initial_cls_wc_para.get_dict()
        except: # the workchain failed
            #print('wc failed?')
            return return_dict
    elif plabel == 'fleur_corehole_wc':
        try:
            wc_outputpara_dict = workcalc.outputs.output_corehole_wc_para.get_dict()
        except: # the workchain failed
            return return_dict
    #bandgap
    gap = wc_outputpara_dict.get('bandgap', {})
    return_dict['bandgap'] = gap*htr2ev  
    
    #efermi
    fermi_energy = wc_outputpara_dict.get('fermi_energy', {})
    return_dict['fermi_energy'] = fermi_energy*htr2ev
    
    return return_dict

In [None]:
def render_thumbnail(atoms):
    #from PTL import Image
    tmp = NamedTemporaryFile()
    ase.io.write(tmp.name, atoms, format='png') # does not accept StringIO
    raw = open(tmp.name,"rb").read()
    #img = Image.open(tmp.name)
    #raw=img.raw
    tmp.close()
    return b64encode(raw)

def render_thumbnail2(ase_struct):
    s = ase_struct.repeat((2,1,1))
    cov_radii = [covalent_radii[a.number] for a in s]
    nl = NeighborList(cov_radii, bothways = True, self_interaction = False)
    nl.update(s)
    
    fig, ax = plt.subplots()
    ax.set_aspect(1)
    ax.axes.set_xlim([0,s.cell[0][0]])
    ax.axes.set_ylim([5,s.cell[1][1]-5])
    #ax.set_axis_bgcolor((0.423,0.690,0.933))
    ax.set_axis_bgcolor((0.85,0.85,0.85))
    ax.axes.get_yaxis().set_visible(False)

    #name = ase_struct.get_chemical_formula() # get name before repeat
    #ax.set_xlabel(name, fontsize=12)
    ax.tick_params(axis='x', which='both', bottom='off', top='off',labelbottom='off')
    
    for at in s:
        #circles
        x,y,z = at.position
        n = atomic_numbers[at.symbol]
        ax.add_artist(plt.Circle((x,y), covalent_radii[n]*0.5, color=cpk_colors[n], fill=True, clip_on=True))
        #bonds
        nlist = nl.get_neighbors(at.index)[0]
        for theneig in nlist:
            x,y,z = (s[theneig].position +  at.position)/2
            x0,y0,z0 = at.position
            if (x-x0)**2 + (y-y0)**2 < 2 :
                ax.plot([x0,x],[y0,y],color=cpk_colors[n],linewidth=2,linestyle='-')

    img = StringIO.StringIO()
    fig.savefig(img, format="png", dpi=72, bbox_inches='tight')
    return b64encode(img.getvalue())

In [None]:
from aiida.orm import load_node
n = load_node(458275)

In [None]:
extract_information(n)

In [None]:
preprocess_one(n)

In [None]:
############################   END OF PREPROCESSING   ###############################

In [None]:
def search():

    results.value = "preprocessing..."
    preprocess_newbies()
    
    results.value = "searching..."
    
    # html table header
    html  = '<style>#aiida_results td,th {padding: 2px}</style>' 
    html += '<form action="display.ipynb" method="get" target="_blank">'
    html += '<table border=1 id="aiida_results" style="margin:10px;"><tr>'
    html += '<th></th>'
    html += '<th>PK</th>'
    #html += '<th>UUID</th>'
    html += '<th>Workchain type</th>'
    html += '<th>Creation Time</th>'
    html += '<th>Formula</th>'
    html += '<th>Symmetry</th>'
    html += '<th>GAP [eV]</th>'
    html += '<th>Fermi Energy [eV]</th>'
    html += '<th>Structure</th>'
    html += '<th>Details</th>'
    #html += '<th></th>'
    html += '</tr>'

    # query AiiDA database
    filters = {}
    inp_wc_types_list = inp_wc_types.value.strip().split()
    if inp_wc_types_list:
        filters['attributes.process_label'] = {'in' : inp_wc_types_list}
    else:
        filters['attributes.process_label'] = {'in' : [r'fleur_initial_cls_wc']}#, r'fleur_corehole_wc']}
    #filters['extras.preprocess_version'] = PREPROCESS_VERSION
    #filters['extras.preprocess_successful'] = True
    #filters['extras.obsolete'] = False
    # filter if outputnode is present
    #edge_filters={'label':{'like':'output_%'}}
    
    
    pk_list = inp_pks.value.strip().split()
    if pk_list:
        # TODO make a pk and uuid list
        filters['id'] = {'in': pk_list}
        #filters['uuid] = {'in': uuid_list}
    
    stoic = None
    formula_list = inp_formula.value.strip().split()
    if formula_list:
        # TODO get formula rather from structure
        # or workchain node itself
        print(formula_list)
        filters['extras.formula'] = {'in': formula_list}
        stoic = formula_list[0]

    element_list = inp_elements.value.strip().split()
    if element_list:
        tempf = []
        for element in element_list:
            tempf.append({'ilike':'%'+str(element)+'%'})
        filters['extras.formula'] = {'and':tempf}
    # or create element list in extras and query if str in list
    #if inp_stoi_form.value:
    #    stoic = inp_stoi_form.value


    def add_range_filter(bounds, label):
        filters['extras.'+label] = {'and':[{'>=':bounds[0]}, {'<':bounds[1]}]}
    
    
    add_range_filter(inp_gap.value, "bandgap")
    add_range_filter(inp_efermi.value, "fermi_energy")
    combine_wc_dict = {} # formula : uuid
    
    qb = QueryBuilder()
    #qb.append(ParameterData, tag='para')
    print(filters)
    qb.append(WorkChainNode, filters=filters)#, descendant_of='para')
    #qb.order_by({WorkChainNode:{'ctime' : 'desc'}})
    if inp_max.value:
        qb.limit(int(inp_max.value))
    else:
        qb.limit(10)
    
    result = qb.all()
    print('found: '+ str(len(result)))
    for i, node_tuple in enumerate(result):
        node = node_tuple[0]
        extras = node.extras
        #thumbnail = extras.get('thumbnail')
        thumbnail = extras.get('thumbnail_struc', None)
        description = extras.get('structure_description', '')
        structure_uuid = extras.get('structure_uuid')
        #opt_structure_uuid = node.get_extra('opt_structure_uuid')
        #print node
        #print type(node)
        # TODO
        #res_node = extract_information(node)
        #print thumbnail
        bandgap = extras.get('bandgap', -100) 
        efermi = extras.get('fermi_energy', -100)
            
        formula = extras.get('formula', '')
        plabel = node.get_attribute('process_label')
        
        if formula and plabel=='fleur_initial_cls_wc':
            if not combine_wc_dict.get(formula): # use most recent one
                combine_wc_dict[formula] = node.uuid
            #TODO: this will be overwritten user has no control
            # THink of better solution
            
        # append table row
        html += '<tr>'
        html += '<td><input type="checkbox" name="pk" value="%s"></td>'%node.pk
        html += '<td><a target="_blank" href="../../aiida/aiida_graph_browser.ipynb?pk=%d">%d</a></td>' % (node.pk, node.pk)
        #html += '<td>%s</td>' % node.uuid
        html += '<td>%s</td>' % plabel#('process_label', '..')
        html += '<td>%s</td>' % node.ctime.strftime("%Y-%m-%d %H:%M")
        html += '<td>%s</td>' % formula
        html += '<td>%s</td>' % extras.get('space_group', '..')
        html += '<td>%f</td>' % bandgap#bandgap if bandgap else -100
        html += '<td>%f</td>' % efermi
        html += '<td><a target="_blank" href="../util/export_structure.ipynb?uuid=%s">'% structure_uuid
        html += '<img src="data:image/png;base64,%s" title="%s"></a></td>' % (thumbnail, description)
        html += '<td><a target="_blank" href="./display.ipynb?pk=%s">Show</a><br>'%node.pk
        html += '</td>'# TODO link to plot spectra notebook
        #html += '<td><a target="_blank" href="./show.ipynb?pk=%s">Show</a><br>'%node.pk
        #html += '<a target="_blank" href="./show_pdos.ipynb?pk=%s">PDOS</a></td>'%node.pk
        html += '</tr>'
        
    html += '</table>'
    html += 'Found %d %d matching entries.<br>'%(qb.count(), len(result))
    
    #if inp_stoi.value and stoic:
    #    print combine_wc_dict
    #    reactions = determine_reactions(stoic, combine_wc_dict.keys())
    #    fav_reactions = determine_favorable_reaction(reactions, combine_wc_dict)
    #    html += '\n'
    #    html += 'Mixed spectra with same Stoichiometry'
    #    html += '<table border=1 id="aiida_results2" style="margin:10px;"><tr>'
    #    html += '<th></th>'
    #    html += '<th>PKs</th>'
    #    html += '<th>Reaction</th>'
    #    html += '<th>Formation energy [eV per atom]</th>'
    #    html += '<th>Details</th>'
    #    html += '</tr>'
    #    for reaction in fav_reactions:
    #        print reaction
    
    html += '\n'    
    html += '<input type="submit" value="Plot">'
    #html += '<input type="submit" value="Calculate Mixtures">'
    html += '</form>'

    results.value = html

In [None]:
np.array([1,2])*3

In [None]:
# search UI
style = {"description_width":"100px"}
layout = ipw.Layout(width="692px")#ipw.Layout(width="592px")
#inp_plugin = ipw.Text(description='Plugins:', placeholder='e.g. fleur.fleur (space separated)', layout=layout, style=style)
#inp_codes = ipw.Text(description='Codes:', placeholder='e.g. fleur_0.27@localhost (space separated)', layout=layout, style=style)
inp_wc_types = ipw.Text(description='Workchain types:', placeholder='e.g. fleur_initial_cls_wc (space separated, default)', layout=layout, style=style)
inp_pks = ipw.Text(description='PKs:', placeholder='e.g. 4062 4753 (space separated)', layout=layout, style=style)
inp_formula = ipw.Text(description='Formulas:', placeholder='e.g. Be2W Be12W', layout=layout, style=style)
#inp_stoi_form = ipw.Text(description='Stoichiometry:', placeholder='e.g. Be12W', layout=layout, style=style)

inp_elements = ipw.Text(description='Contains Elements:', placeholder='e.g. Be W (space separated)', layout=layout, style=style)
inp_cls = ipw.Text(description='Core-levels:', placeholder='e.g. Be1s W4f (space separated)', layout=layout, style=style)
inp_max = ipw.Text(description='Maximum results:', placeholder='e.g. 10 (default 100)', layout=layout, style=style)
#inp_stoi = ipw.Checkbox(value=True, description='Auto show mixtures with same Stoichiometry', disabled=False)


def slider(desc, min, max):
    return ipw.FloatRangeSlider(description=desc, min=min, max=max, 
                                    value=[min, max], step=0.05, layout=layout, style=style)

inp_gap = slider("Gap [eV]:", 0.0, +10.0)
inp_efermi = slider("Fermi Energy [eV]:", -10.0, +10.0)
search_crit = [#inp_plugin, inp_codes, 
               inp_wc_types, inp_pks, inp_formula, 
               inp_elements,
               inp_cls, inp_max,# inp_stoi, inp_stoi_form,
               inp_gap, inp_efermi]

In [None]:
def on_click(b):
    with info_out:
        clear_output()
        search()

button = ipw.Button(description="Search")
button.on_click(on_click)
results = ipw.HTML()
info_out = ipw.Output()
app = ipw.VBox(children=search_crit + [button, results, info_out])
display(app)