# Structure Property visualizer

This is the second of two deliverables for the SiSc-Lab2020 project.

Authors = Sijie Luo and Anna Garoufali

Supervisors: Jens Bröder, Dr. Daniel Wortmann, Johannes Wasmer, Prof. Dr. Stefan Blügel.

In [1]:
# Imports
%load_ext autoreload
%autoreload 2
%matplotlib notebook

# python imports:
from collections import Counter
import time
import numpy as np
import pandas as pd
#from pprint import pprint
# aiida imports:
from aiida import load_profile
profile = load_profile()

from aiida_jutools.sisc_lab import helpers
from bokeh.io import output_notebook

output_notebook()
helpers.print_bold(f"This notebook/dashboard will visualize the contents from the database of profile {profile.name}")
all_times = []

[1mThis notebook/dashboard will visualize the contents from the database of profile generic[1m


### Check workflows and versions

In [2]:
t1 = time.time()
# Preprocessing: Set formula attributes for all the structure nodes
helpers.set_structure_formula()

# workflow_name = 'fleur_scf_wc' # Filter workflow
# workflowdictlst = helpers.get_structure_workflow_dict(workflow_filters={'attributes.process_label':workflow_name})
#or
workflow_name = 'FleurScfWorkChain' # No restriction. Querying by default
#workflow_filters = {'attributes.process_label' : {'==' : workflow_name}}
workflowdictlst = helpers.get_structure_workflow_dict(timing=True, check_version=True)#workflow_filters=workflow_filters, 

print("Nuumber of the workflows: ", len(workflowdictlst), '\n')
print("Workflows: ")
workflowdictlst[:2]

all_times.append(time.time()-t1)

Elapsed time:  0.14190411567687988 s

Versions and frequency:
 [('0.4.2', 157), ('0.2.2', 131), ('AiiDA Fleur Parser v0.3.0', 103), ('0.4.3', 49), ('AiiDA Fleur Parser v0.3.1', 41), ('AiiDA Fleur Parser v0.3.2', 34), ('aiida-quantumespresso parser pw.x v3.2.1', 10), ('0.4.0', 6), ('0.3.0', 6)] 

Nuumber of the workflows:  537 

Workflows: 


In [3]:
# TODO: drop down widget for selecting the workflow/dict results
# TODO: drop down widget to select property

### Check attributes

In [4]:
# dict_project=['uuid','attributes'] # Attributes of dict nodes
# workflowdictlst = helpers.get_structure_workflow_dict(dict_project=dict_project)
# workflowdictlst[:20]
#or
# structure_project=['uuid', 'extras','attributes.kinds'] # Attributes of structure nodes
# workflowdictlst = helpers.get_structure_workflow_dict(structure_project=structure_project)
# workflowdictlst[:20]

Available dict nodes attributes for different versions of workflow


1. workflow 0.4.2
- 'workflow_version': '0.4.2',
- 'total_energy': -971.2916432694,
- 'force_largest': 0.0,
- 'distance_charge': None,
- 'total_wall_time': 176,
- 'total_energy_units': 'Htr',
- 'distance_charge_units': 'me/bohr^3',
- 'total_wall_time_units': 's'

2. Parser
- 'parser_info': 'AiiDA Fleur Parser v0.3.0',
- 'energy': -26430.191843004,
- 'bandgap': 0.0177798418,
- 'walltime': 176,
- 'energy_units': 'eV',
- 'fermi_energy': 0.2778502713,
- 'bandgap_units': 'eV',
- 'energy_hartree': -971.2916432694,
- 'walltime_units': 'seconds',
- 'fermi_energy_units': 'Htr',
- 'energy_hartree_units': 'Htr'

3. workflow 0.2.2
- 'workflow_version': '0.2.2',
- 'force': 1.241e-06,
- 'energy': -15784.56376617,
- 'energy_units': 'eV'

### Structure nodes

In [5]:
t1 = time.time()
structure_project=['uuid', 'extras.formula']
structurenodes = helpers.generate_structure_property_pandas_source(
            workflow_name, 
            structure_project=structure_project,
            filename='structure_property.json')
#structurenodes.head()

all_times.append(time.time()-t1)

### Dict nodes

In [6]:
t1 = time.time()
# Dict nodes with workflow_version=0.4.2
dict_project_wf042=['uuid', 'attributes.workflow_version', 'attributes.total_energy',
                    'attributes.total_energy_units', 'attributes.distance_charge',
                    'attributes.distance_charge_units', 'attributes.total_wall_time',
                    'attributes.total_wall_time_units']
dictnodes_wf042 = helpers.generate_dict_property_pandas_source(
        workflow_name, 
        dict_project=dict_project_wf042, 
        filename='dict_property_workflow042.json')
#dictnodes_wf042.head()

In [7]:
# Dict nodes with parser of any versions
dict_project_parser=['uuid', 'attributes.parser_info', 'attributes.energy', 'attributes.energy_units', 
                     'attributes.fermi_energy', 'attributes.fermi_energy_units', 'attributes.energy_hartree', 
                     'attributes.energy_hartree_units', 'attributes.bandgap', 'attributes.bandgap_units', 
                     'attributes.walltime', 'attributes.walltime_units']
dictnodes_parser = helpers.generate_dict_property_pandas_source(
        workflow_name, 
        dict_project=dict_project_parser, 
        filename='dict_property_parser.json')
#dictnodes_parser.head()

all_times.append(time.time()-t1)

### Combine two kind of nodes

In [8]:
t1 = time.time()
# Combined nodes with workflow_version=0.4.2
combinednodes_wf042 = helpers.generate_combination_property_pandas_source(
        workflow_name, 
        dict_project=dict_project_wf042, 
        structure_project=structure_project,
        filename='combined_property_wf042.json')
#combinednodes_wf042.head()

In [9]:
# Combined nodes with parser of any versions
combinednodes_parser = helpers.generate_combination_property_pandas_source(
        workflow_name, 
        dict_project=dict_project_parser, 
        structure_project=structure_project,
        filename='combined_property_parser.json')
#combinednodes_parser.head()

all_times.append(time.time()-t1)

# Interactive plot

### Check data source before plotting

In [10]:
t1 = time.time()
df = helpers.read_json_file('combined_property_parser.json')
#df

In [11]:
filtered_df, xdata, ydata = helpers.filter_missing_value(df,'energy', 'fermi_energy')
filtered_df

Unnamed: 0,dict_uuid,parser_info,energy,energy_units,fermi_energy,fermi_energy_units,energy_hartree,energy_hartree_units,bandgap,bandgap_units,walltime,walltime_units,structure_uuid,formula
0,8c09f5fd,AiiDA Fleur Parser v0.3.0,-15784.561730,eV,0.205269,Htr,-580.071949,Htr,0.625036,eV,100.0,seconds,e7ab9f49,Si2
1,984358bd,AiiDA Fleur Parser v0.3.0,-15784.731259,eV,0.193149,Htr,-580.078179,Htr,0.929981,eV,23.0,seconds,fb3d7bd9,Si2
2,e77ebc42,AiiDA Fleur Parser v0.3.0,-15784.723533,eV,0.177246,Htr,-580.077895,Htr,1.094787,eV,36.0,seconds,fcb8cd9d,Si2
3,e8cb389e,AiiDA Fleur Parser v0.3.0,-26430.182989,eV,0.262493,Htr,-971.291318,Htr,0.120050,eV,9.0,seconds,7fa1a765,Al4
4,3694c93f,AiiDA Fleur Parser v0.3.0,-15784.547822,eV,0.188577,Htr,-580.071438,Htr,0.833228,eV,358.0,seconds,5b794dc3,Si2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,852d5ec1,AiiDA Fleur Parser v0.3.2,-15784.561138,eV,0.207919,Htr,-580.071923,Htr,0.611768,eV,107.0,seconds,b82d65c9,Si2
174,066d9f70,AiiDA Fleur Parser v0.3.2,-15784.561929,eV,0.205002,Htr,-580.071952,Htr,0.626396,eV,107.0,seconds,3a05e5d6,Si2
175,e497f940,AiiDA Fleur Parser v0.3.2,-15784.561814,eV,0.203299,Htr,-580.071948,Htr,0.633605,eV,105.0,seconds,e3b5ca84,Si2
176,b2a3f4ba,AiiDA Fleur Parser v0.3.2,-15784.561823,eV,0.203831,Htr,-580.071948,Htr,0.632232,eV,113.0,seconds,ec2cf06f,Si2


In [12]:

all_times.append(time.time()-t1)

### Interactive plot by Bokeh

In [13]:
t1 = time.time()
# Workflow_version=0.4.2
helpers.bokeh_struc_prop_vis('combined_property_wf042.json','distance_charge', 'total_energy', "vis_wf042.html", axis_type=['log', 'linear'],nbins=40)

all_times.append(time.time()-t1)

In [14]:
# Parser
t1 = time.time()
helpers.bokeh_struc_prop_vis('combined_property_parser.json','energy', 'bandgap',"vis_parser.html")

all_times.append(time.time()-t1)

### Interactive plot using Bokeh server application

In [15]:
# In vscode terminal:
# bokeh serve --show --port 5001 bokehplotting.py

In [16]:
npro  = None #sum(node_count.values())
ndata = None #sum(process_count.values())
totalnodes  = None
size = 34
header = ('# Timings of D2 in seconds\n# Database info: {} nodes; {} processes, {} data, {} MB size \n'
          '# Preprocess structures, Prepare Structure, Prepare Dict, Join Data, Loading file, Plot 1, Plot 2\n'
           ''.format(totalnodes, npro, ndata, size))
timestring = ''
for times in all_times:
    timestring = timestring + '{}  '.format(times) 
with open('all_times_D2_iffaiida.txt', 'w') as file1:
    file1.write(header)
    file1.write(timestring)