# Structure Property visualizer

This is the second of two deliverables for the SiSc-Lab2020 project.

Authors = Sijie Luo and Anna Garoufali

Supervisors: Jens Bröder, Dr. Daniel Wortmann, Johannes Wasmer, Prof. Dr. Stefan Blügel.

In [1]:
aiida_profile_name = "wasmer"
enable_autoreload = True # disable for timings
timings_filename = "all_times_D2_wasmer.txt"

________________

In [2]:
# Imports
if enable_autoreload:
    %load_ext autoreload
    %autoreload 2
%matplotlib notebook

# python imports:
from collections import Counter
import time
import numpy as np
import pandas as pd
#from pprint import pprint
# aiida imports:
from aiida import load_profile
profile = load_profile(aiida_profile_name)

In [3]:
# project imports

# now check if outsourced version produces same result
# add project module to sys.path
import sys
from pathlib import Path

def add_to_sys_path(path:Path):
    if str(path) not in sys.path:
        sys.path.append(str(path))

# load developer's code: general package
project_dir = Path("/Users/wasmer/src/aiida-jutools/")
add_to_sys_path(project_dir)

from aiida_jutools.sisc_lab import helpers
from bokeh.io import output_notebook

output_notebook()
helpers.print_bold(f"This notebook/dashboard will visualize the contents from the database of profile {profile.name}")
all_times = []

[1mThis notebook/dashboard will visualize the contents from the database of profile wasmer[1m


### Check workflows and versions

In [4]:
t1 = time.time()
# Preprocessing: Set formula attributes for all the structure nodes
helpers.set_structure_formula()

# workflow_name = 'fleur_scf_wc' # Filter workflow
# workflowdictlst = helpers.get_structure_workflow_dict(workflow_filters={'attributes.process_label':workflow_name})
#or
workflow_name = None
# workflow_name = 'FleurScfWorkChain' # No restriction. Querying by default
#workflow_filters = {'attributes.process_label' : {'==' : workflow_name}}
workflowdictlst = helpers.get_structure_workflow_dict(timing=True, check_version=True)#workflow_filters=workflow_filters, 

print("Number of the workflows: ", len(workflowdictlst), '\n')
print("Workflows: ")
workflowdictlst[:2]

all_times.append(time.time()-t1)

Elapsed time:  0.044512271881103516 s

Versions and frequency:
 [('0.12.0', 94), ('0.10.4', 38), ('0.12.1', 21)] 

Number of the workflows:  383 

Workflows: 


In [5]:
# TODO: drop down widget for selecting the workflow/dict results
# TODO: drop down widget to select property

### Check attributes

In [6]:
# dict_project=['uuid','attributes'] # Attributes of dict nodes
# workflowdictlst = helpers.get_structure_workflow_dict(dict_project=dict_project)
# workflowdictlst[:20]
#or
# structure_project=['uuid', 'extras','attributes.kinds'] # Attributes of structure nodes
# workflowdictlst = helpers.get_structure_workflow_dict(structure_project=structure_project)
# workflowdictlst[:20]

Available *scalar* dict nodes attributes for different versions of workflow

(Note on query notation: `'attributes.some_attr.0'` means: `some_attr` is a `list`, and we're accessing `some_attr[0]` via query. Same goes for dicts: `'some_attr.key'`.


1. fleur ??? workflow 0.4.2
- 'workflow_version': '0.4.2',
- 'total_energy': -971.2916432694,
- 'force_largest': 0.0,
- 'distance_charge': None,
- 'total_wall_time': 176,
- 'total_energy_units': 'Htr',
- 'distance_charge_units': 'me/bohr^3',
- 'total_wall_time_units': 's'

2. fleur Parser
- 'parser_info': 'AiiDA Fleur Parser v0.3.0',
- 'energy': -26430.191843004,
- 'bandgap': 0.0177798418,
- 'walltime': 176,
- 'energy_units': 'eV',
- 'fermi_energy': 0.2778502713,
- 'bandgap_units': 'eV',
- 'energy_hartree': -971.2916432694,
- 'walltime_units': 'seconds',
- 'fermi_energy_units': 'Htr',
- 'energy_hartree_units': 'Htr'

3. fleur ??? workflow 0.2.2
- 'workflow_version': '0.2.2',
- 'force': 1.241e-06,
- 'energy': -15784.56376617,
- 'energy_units': 'eV'

4. `kkr_scf_wc` workflow 0.10.4 / `kkr_dos_wc` workflow 0.12.0 (sub-workflow of `kkr_scf_wc`)
- 'workflow_version': '0.10.4'
- 'energy': -16343.364818188,
- 'energy_unit': 'eV',
- 'total_energy_Ry': -1201.21516834
- 'total_energy_Ry_unit': 'Rydberg'
- 'fermi_energy': 0.1582351535,
- 'fermi_energy_units': 'Ry',
- 'dos_at_fermi_energy': 10.584429,
- 'charge_neutrality': 0.0
- 'charge_neutrality_unit': 'electrons'
- 'total_charge_per_atom.0': 19.0,
- 'convergence_reached': True
- 'number_of_iterations': 9
- 'timings_group.Time in Iteration': 0.5915
- 'timings_unit': 'seconds',
- 'parser_version': '0.6.6'

5. `kkr_imp_wc` workflow 0.8.0 / `kkr_imp_sub_wc` workflow 0.9.4 (sub-workflow of `kkr_imp_wc`)
- 'workflow_version'
- 'energy': -856105.64822435,
- 'energy_unit': 'eV',
- 'total_energy_Ry': -62922.60509318,
- 'total_energy_Ry_unit': 'Rydberg'
- 'fermi_energy': 0.6216417824,
- 'fermi_energy_units': 'Ry',
- 'charge_neutrality'
- 'total_charge_per_atom.0': 29.0
- 'total_charge_per_atom_unit': 'electron charge'
- 'charge_valence_states_per_atom.0': 11.0
- 'charge_core_states_per_atom_unit': 'electron charge'
- 'number_of_atoms_in_unit_cell': 19
- 'converged': True
- 'number_of_rms_steps': 124,
- 'convergence_group.calculation_converged': True
- 'convergence_group.number_of_iterations: 29
- 'timings_group.Total running time': 38.651699066162
- 'timings_unit': 'seconds',
- 'parser_version': '0.4.2'

### Structure nodes

In [7]:
t1 = time.time()
structure_project=['uuid', 'extras.formula']
structurenodes = helpers.generate_structure_property_pandas_source(
            workflow_name, 
            structure_project=structure_project,
            filename='structure_property.json')
#structurenodes.head()

all_times.append(time.time()-t1)

In [8]:
structurenodes

Unnamed: 0,structure_uuid,formula
0,dedea5e4,Cu
1,dedea5e4,Cu
2,dedea5e4,Cu
3,dedea5e4,Cu
4,dedea5e4,Cu
...,...,...
378,9f79da9d,Ca
379,9f79da9d,Ca
380,209e80d5,Cu
381,209e80d5,Cu


### Dict nodes

In [9]:
# t1 = time.time()
# # Dict nodes with workflow_version=0.4.2
# dict_project_wf042=['uuid', 'attributes.workflow_version', 'attributes.total_energy',
#                     'attributes.total_energy_units', 'attributes.distance_charge',
#                     'attributes.distance_charge_units', 'attributes.total_wall_time',
#                     'attributes.total_wall_time_units']

# dictnodes_wf042 = helpers.generate_dict_property_pandas_source(
#         workflow_name, 
#         dict_project=dict_project_wf042, 
#         filename='dict_property_workflow042.json')
# #dictnodes_wf042.head()

In [10]:
# # Dict nodes with parser of any versions
# dict_project_parser=['uuid', 'attributes.parser_info', 'attributes.energy', 'attributes.energy_units', 
#                      'attributes.fermi_energy', 'attributes.fermi_energy_units', 'attributes.energy_hartree', 
#                      'attributes.energy_hartree_units', 'attributes.bandgap', 'attributes.bandgap_units', 
#                      'attributes.walltime', 'attributes.walltime_units']
# dictnodes_parser = helpers.generate_dict_property_pandas_source(
#         workflow_name, 
#         dict_project=dict_project_parser, 
#         filename='dict_property_parser.json')
# #dictnodes_parser.head()

# all_times.append(time.time()-t1)

In [11]:
t1 = time.time()

project_kkr_scf_wc = ['uuid',
                      'attributes.workflow_version',
                      'attributes.energy',
                      'attributes.energy_unit',
                      'attributes.total_energy_Ry',
                      'attributes.total_energy_Ry_unit',
                      'attributes.fermi_energy',
                      'attributes.fermi_energy_units',
                      'attributes.dos_at_fermi_energy',
                      'attributes.charge_neutrality',
                      'attributes.charge_neutrality_unit',
                      'attributes.total_charge_per_atom.0',
                      'attributes.convergence_reached',
                      'attributes.number_of_iterations',
                      'attributes.timings_group.Time in Iteration',
                      'attributes.timings_unit',
                      'attributes.parser_version',]

dictnodes_kkr_scf_wc = helpers.generate_dict_property_pandas_source(
        workflow_name, 
        dict_project=project_kkr_scf_wc, 
        filename='dict_property_kkr_scf_wc.json')
#dictnodes_kkr_scf_wc.head()

project_kkr_imp_wc = ['uuid',
                      'attributes.workflow_version',
                      'attributes.energy',
                      'attributes.energy_unit',
                      'attributes.total_energy_Ry',
                      'attributes.total_energy_Ry_unit',
                      'attributes.fermi_energy',
                      'attributes.fermi_energy_units',
                      'attributes.charge_neutrality',
                      'attributes.total_charge_per_atom.0',
                      'attributes.total_charge_per_atom_unit',
 #                              'attributes.charge_valence_states_per_atom.0', # ValueError: DataFrame columns must be unique for orient='records'.
                      'attributes.charge_core_states_per_atom_unit',
                      'attributes.number_of_atoms_in_unit_cell',
                      'attributes.converged',
                      'attributes.number_of_rms_steps',
                      'attributes.convergence_group.calculation_converged',
                      'attributes.convergence_group.number_of_iterations',
                      'attributes.timings_group.Total running time',
                      'attributes.timings_unit',
                      'attributes.parser_version',]

dictnodes_kkr_imp_wc = helpers.generate_dict_property_pandas_source(
        workflow_name, 
        dict_project=project_kkr_imp_wc, 
        filename='dict_property_kkr_imp_wc.json')
#dictnodes_kkr_imp_wc.head()

all_times.append(time.time()-t1)

### Combine two kind of nodes

In [12]:
# t1 = time.time()
# # Combined nodes with workflow_version=0.4.2
# combinednodes_wf042 = helpers.generate_combination_property_pandas_source(
#         workflow_name, 
#         dict_project=dict_project_wf042, 
#         structure_project=structure_project,
#         filename='combined_property_wf042.json')
# #combinednodes_wf042.head()

In [13]:
# # Combined nodes with parser of any versions
# combinednodes_parser = helpers.generate_combination_property_pandas_source(
#         workflow_name, 
#         dict_project=dict_project_parser, 
#         structure_project=structure_project,
#         filename='combined_property_parser.json')
# #combinednodes_parser.head()

# all_times.append(time.time()-t1)

In [14]:
t1 = time.time()

combinednodes_kkr_scf_wc = helpers.generate_combination_property_pandas_source(
        workflow_name, 
        dict_project=project_kkr_scf_wc, 
        structure_project=structure_project,
        filename='combined_property_kkr_scf_wc.json')
#combinednodes_kkr_scf_wc.head()

combinednodes_kkr_imp_wc = helpers.generate_combination_property_pandas_source(
        workflow_name, 
        dict_project=project_kkr_imp_wc, 
        structure_project=structure_project,
        filename='combined_property_kkr_imp_wc.json')
#combinednodes_kkr_imp_wc.head()

all_times.append(time.time()-t1)

# Interactive plot

### Check data source before plotting

In [15]:
t1 = time.time()
df = helpers.read_json_file('combined_property_kkr_scf_wc.json')
#df
filtered_df, xdata, ydata = helpers.filter_missing_value(df,'energy', 'fermi_energy')
filtered_df

Unnamed: 0,dict_uuid,workflow_version,energy,energy_unit,total_energy_Ry,total_energy_Ry_unit,fermi_energy,fermi_energy_units,dos_at_fermi_energy,charge_neutrality,charge_neutrality_unit,0,convergence_reached,number_of_iterations,Time in Iteration,timings_unit,parser_version,structure_uuid,formula
0,c2cdd6d3,,-44965.16,eV,-3304.87858,Rydberg,0.64271,Ry,5.054623,,,29.0,,,0.4688,seconds,0.6.6,dedea5e4,Cu
1,2ae4e058,,-500783.1,eV,-36806.878405,Rydberg,0.660029,Ry,20.683127,,,78.0,,,0.5547,seconds,0.6.6,ca592bce,Pt
2,38da0956,,-25780.36,eV,-1894.821466,Rydberg,0.746349,Ry,19.371753,,,23.0,,,0.5606,seconds,0.6.6,4fdc9b26,V
3,b5835330,,-80903.65,eV,-5946.308512,Rydberg,-0.010436,Ry,120.901701,,,37.0,,,0.6238,seconds,0.6.6,7771e0b5,Rb
4,9e2d0f0e,,-439015.2,eV,-32267.02579,Rydberg,0.89404,Ry,8.16589,,,74.0,,,0.6072,seconds,0.6.6,086a2b25,W
5,334566d2,,-907936.2,eV,-66732.078259,Rydberg,0.883461,Ry,9.529759,,,75.0,,,2.1557,seconds,0.6.6,d8cb94af,Re2
6,d1b8f88e,,-184035.6,eV,-13526.366496,Rydberg,0.437006,Ry,20.709903,,,39.0,,,2.1386,seconds,0.6.6,8310e631,Y2
7,f94f96da,,-97601.58,eV,-7173.583674,Rydberg,0.573647,Ry,3.27868,,,30.0,,,2.9964,seconds,0.6.6,b81a29e5,Zn2
8,3540285f,,-246353.2,eV,-18106.629458,Rydberg,0.841174,Ry,11.505636,,,44.0,,,2.1532,seconds,0.6.6,262c2d37,Ru2
9,ee6317f8,,-195644.7,eV,-14379.619926,Rydberg,0.607291,Ry,16.325915,,,40.0,,,2.1473,seconds,0.6.6,353338ae,Zr2


In [16]:
df = helpers.read_json_file('combined_property_kkr_imp_wc.json')
#df
filtered_df, xdata, ydata = helpers.filter_missing_value(df,'energy', 'fermi_energy')
filtered_df

Unnamed: 0,dict_uuid,workflow_version,energy,energy_unit,total_energy_Ry,total_energy_Ry_unit,fermi_energy,fermi_energy_units,charge_neutrality,0,...,number_of_atoms_in_unit_cell,converged,number_of_rms_steps,calculation_converged,number_of_iterations,Total running time,timings_unit,parser_version,structure_uuid,formula
0,c2cdd6d3,,-44965.16,eV,-3304.87858,Rydberg,0.64271,Ry,,29.0,...,1.0,,,1.0,26.0,,seconds,0.6.6,dedea5e4,Cu
1,2ae4e058,,-500783.1,eV,-36806.878405,Rydberg,0.660029,Ry,,78.0,...,1.0,,,1.0,11.0,,seconds,0.6.6,ca592bce,Pt
2,38da0956,,-25780.36,eV,-1894.821466,Rydberg,0.746349,Ry,,23.0,...,1.0,,,1.0,14.0,,seconds,0.6.6,4fdc9b26,V
3,b5835330,,-80903.65,eV,-5946.308512,Rydberg,-0.010436,Ry,,37.0,...,1.0,,,0.0,50.0,,seconds,0.6.6,7771e0b5,Rb
4,9e2d0f0e,,-439015.2,eV,-32267.02579,Rydberg,0.89404,Ry,,74.0,...,1.0,,,1.0,13.0,,seconds,0.6.6,086a2b25,W
5,334566d2,,-907936.2,eV,-66732.078259,Rydberg,0.883461,Ry,,75.0,...,2.0,,,1.0,14.0,,seconds,0.6.6,d8cb94af,Re2
6,d1b8f88e,,-184035.6,eV,-13526.366496,Rydberg,0.437006,Ry,,39.0,...,2.0,,,1.0,16.0,,seconds,0.6.6,8310e631,Y2
7,f94f96da,,-97601.58,eV,-7173.583674,Rydberg,0.573647,Ry,,30.0,...,2.0,,,1.0,13.0,,seconds,0.6.6,b81a29e5,Zn2
8,3540285f,,-246353.2,eV,-18106.629458,Rydberg,0.841174,Ry,,44.0,...,2.0,,,1.0,14.0,,seconds,0.6.6,262c2d37,Ru2
9,ee6317f8,,-195644.7,eV,-14379.619926,Rydberg,0.607291,Ry,,40.0,...,2.0,,,1.0,13.0,,seconds,0.6.6,353338ae,Zr2


In [17]:

all_times.append(time.time()-t1)

### Interactive plot by Bokeh

In [18]:
t1 = time.time()
# Workflow_version=0.4.2
helpers.bokeh_struc_prop_vis('combined_property_kkr_scf_wc.json',
                             'fermi_energy', 'dos_at_fermi_energy', 
                             "vis_kkr_scf_wc.html", 
                             axis_type=['linear', 'linear'],nbins=40)

all_times.append(time.time()-t1)

In [19]:
# Parser
t1 = time.time()
helpers.bokeh_struc_prop_vis('combined_property_kkr_imp_wc.json',
                             'total_energy_Ry', 'number_of_atoms_in_unit_cell',
                             "vis_kkr_imp_wc.html",
                            axis_type=['linear', 'linear'],nbins=40)

all_times.append(time.time()-t1)

### Interactive plot using Bokeh server application

In [20]:
# In vscode terminal:
# bokeh serve --show --port 5001 bokehplotting.py

In [21]:
npro  = None #sum(node_count.values())
ndata = None #sum(process_count.values())
totalnodes  = None
size = 34
header = ('# Timings of D2 in seconds\n# Database info: {} nodes; {} processes, {} data, {} MB size \n'
          '# Preprocess structures, Prepare Structure, Prepare Dict, Join Data, Loading file, Plot 1, Plot 2\n'
           ''.format(totalnodes, npro, ndata, size))
timestring = ''
for times in all_times:
    timestring = timestring + '{}  '.format(times) 
with open("timings/" + timings_filename, 'w') as file1:
    file1.write(header)
    file1.write(timestring)