# Structure Property visualizer

This is the second of two deliverables for the SiSc-Lab2020 project.

Authors = Sijie Luo and Anna Garoufali

Supervisors: Jens Bröder, Dr. Daniel Wortmann, Johannes Wasmer, Prof. Dr. Stefan Blügel.

In [1]:
# Imports
%load_ext autoreload
%autoreload 2
%matplotlib notebook

# python imports:
from collections import Counter
import time
import numpy as np
import pandas as pd
#from pprint import pprint

import helpers

all_times = []

In [2]:
# aiida imports:
from aiida import load_profile
profile = load_profile()

#from aiida_jutools.sisc_lab import helpers
from bokeh.io import output_notebook

output_notebook()
helpers.print_bold(f"This notebook/dashboard will visualize the contents from the database of profile {profile.name}")

[1mThis notebook/dashboard will visualize the contents from the database of profile seconddb[1m


### Check workflows and versions

In [3]:
t1 = time.time()
# Preprocessing: Set formula attributes for all the structure nodes
helpers.set_structure_formula()

# workflow_name = 'fleur_scf_wc' # Filter workflow
# workflow_filters = {'attributes.process_label' : {'==' : workflow_name}}
# workflowdictlst = helpers.get_structure_workflow_dict(workflow_filters=workflow_filters)
#or
workflow_name = None # No restriction. Querying by default
workflowdictlst, versionslst = helpers.get_structure_workflow_dict(timing=True, check_version=True)

all_times.append(time.time()-t1)

print("Number of the workflows: ", len(workflowdictlst), '\n')
print("Workflows: ")
workflowdictlst[:2]


Elapsed time:  0.2640421390533447 s

Versions and frequency:
 [('workflow_0.4.2', 149), ('workflow_0.2.2', 130), ('parser_AiiDA Fleur Parser v0.3.0', 100), ('parser_AiiDA Fleur Parser v0.3.1', 41), ('parser_AiiDA Fleur Parser v0.3.2', 3), ('workflow_0.3.0', 2)] 

Number of the workflows:  425 

Workflows: 


[{'structure': ['0ccacebb-861b-4909-8bc1-83d3187bf56b', 'Al4'],
  'workflow': ['82d8046e-9bea-4d92-8f90-02832e5bc565', 'FleurScfWorkChain'],
  'dict': ['f453f49c-da1c-42f0-a9ba-e4806ef5fd2b', '0.4.2', None, None]},
 {'structure': ['0ccacebb-861b-4909-8bc1-83d3187bf56b', 'Al4'],
  'workflow': ['82d8046e-9bea-4d92-8f90-02832e5bc565', 'FleurScfWorkChain'],
  'dict': ['939a487f-e06a-4a0b-af71-37e8ee333d34',
   None,
   'AiiDA Fleur Parser v0.3.0',
   None]}]

In [5]:
from helpers import MAP
from helpers import predifined_workflow
versions = [key for key,val in versionslst]
print(versions, predifined_workflow.workflow_list)

['workflow_0.4.2',
 'workflow_0.2.2',
 'parser_AiiDA Fleur Parser v0.3.0',
 'parser_AiiDA Fleur Parser v0.3.1',
 'parser_AiiDA Fleur Parser v0.3.2',
 'workflow_0.3.0']

In [7]:
#Check attributes
# dict_project=['uuid','attributes'] # Attributes of dict nodes
# dict_filters = {'attributes.workflow_version' : {'==' : '0.3.0'}}
# #dict_filters = {'attributes.parser_info' : {'==' : 'AiiDA Fleur Parser v0.3.0'}}'
# workflowdictlst = helpers.get_structure_workflow_dict(dict_project=dict_project, dict_filters=dict_filters)
# workflowdictlst[:10]
#or
# structure_project=['uuid', 'extras','attributes.kinds'] # Attributes of structure nodes
# workflowdictlst = helpers.get_structure_workflow_dict(structure_project=structure_project)
# workflowdictlst[:20]

### Structure nodes

In [8]:
#!pip install openpyxl

#### Single workflow version

In [9]:
t1 = time.time()
structure_project=['uuid', 'extras.formula']
structure_nodes = helpers.generate_structure_property_pandas_source(
            version=versions[0],
            workflow_name=workflow_name,
            structure_project=structure_project,
            filename=f"structure_properties_{MAP[versions[0]]}.json")
#structure_nodes.head()

all_times.append(time.time()-t1)

#### Multiple workflow versions

In [10]:
t1 = time.time()

filename='structure_properties_all.xlsx'
excel_writer = pd.ExcelWriter(filename)

for version in versions:    
    structure_project=['uuid', 'extras.formula']
    structure_nodes = helpers.generate_structure_property_pandas_source(
                version=version,
                workflow_name=workflow_name, 
                structure_project=structure_project)
    print(structure_nodes)
    structure_nodes.to_excel(excel_writer, sheet_name=MAP[version], index=False)

excel_writer.save()       
    
all_times.append(time.time()-t1)

    structure_uuid formula
0         0ccacebb     Al4
1         2c639ddf     Fe2
2         2e6d2ce2     Fe2
3         3a6a57f6     Fe2
4         48475c16     Al4
..             ...     ...
144       f540a37f     Fe2
145       f67d62d7     Al4
146       f999a276     Fe2
147       fb3d7bd9     Si2
148       fcb8cd9d     Si2

[149 rows x 2 columns]
    structure_uuid formula
0         0ccacebb     Al4
1         0ccacebb     Al4
2         2e6d2ce2     Fe2
3         2e6d2ce2     Fe2
4         48475c16     Al4
..             ...     ...
125       f999a276     Fe2
126       fb3d7bd9     Si2
127       fb3d7bd9     Si2
128       fcb8cd9d     Si2
129       fcb8cd9d     Si2

[130 rows x 2 columns]
   structure_uuid formula
0        0ccacebb     Al4
1        2c639ddf     Fe2
2        2e6d2ce2     Fe2
3        48475c16     Al4
4        48c817fb     Fe2
..            ...     ...
95       ef6cd391     Al4
96       f67d62d7     Al4
97       f999a276     Fe2
98       fb3d7bd9     Si2
99       fcb8cd9d 

### Dict nodes

#### Single workflow version

In [11]:
t1 = time.time()

# Single workflow version
dict_project = predifined_workflow.get_workflow(MAP[versions[0]]).projections
dict_nodes = helpers.generate_dict_property_pandas_source(
        workflow_name=workflow_name,
        version=versions[0],
        dict_project=dict_project, 
        filename=f"dict_properties_{MAP[versions[0]]}.json")
#dict_nodes.head()
            
            
all_times.append(time.time()-t1)

#### Multiple workflow versions

In [12]:
t1 = time.time()


filename='dict_properties_all.xlsx'
excel_writer = pd.ExcelWriter(filename)

for version in versions:
    dict_project = predifined_workflow.get_workflow(MAP[version]).projections
    dict_nodes = helpers.generate_dict_property_pandas_source(
            workflow_name=workflow_name,
            version=version,
            dict_project=dict_project)
    print(dict_nodes)
    dict_nodes.to_excel(excel_writer, sheet_name=MAP[version], index=False)

excel_writer.save()   
    
all_times.append(time.time()-t1)

    dict_uuid workflow_version  total_energy total_energy_units  \
0    f453f49c            0.4.2   -971.291643                Htr   
1    1e54bac0            0.4.2  -2545.609813                Htr   
2    eba17028            0.4.2  -2545.607620                Htr   
3    454472d8            0.4.2  -2545.579023                Htr   
4    19c2ba60            0.4.2   -971.289065                Htr   
..        ...              ...           ...                ...   
144  38211f57            0.4.2  -2545.575523                Htr   
145  961affaa            0.4.2   -971.287960                Htr   
146  2c3aa629            0.4.2  -2545.606176                Htr   
147  35d563e9            0.4.2   -580.078179                Htr   
148  89a75332            0.4.2   -580.077895                Htr   

     distance_charge distance_charge_units  total_wall_time  \
0                NaN             me/bohr^3              176   
1           0.000003             me/bohr^3               96   
2     

  dict_uuid                parser_info        energy energy_units  \
0  b57975cc  AiiDA Fleur Parser v0.3.2 -15784.497137           eV   
1  85f80a71  AiiDA Fleur Parser v0.3.2 -15784.497137           eV   
2  fd157b0d  AiiDA Fleur Parser v0.3.2 -15784.497137           eV   

   fermi_energy fermi_energy_units  energy_hartree energy_hartree_units  \
0       0.20591                Htr     -580.069571                  Htr   
1       0.20591                Htr     -580.069571                  Htr   
2       0.20591                Htr     -580.069571                  Htr   

    bandgap bandgap_units  walltime walltime_units  
0  0.613305            eV        13        seconds  
1  0.613305            eV        14        seconds  
2  0.613305            eV        13        seconds  
  dict_uuid workflow_version        energy energy_units  \
0  0358e465            0.3.0 -15784.497137           eV   
1  0358e465            0.3.0 -15784.497137           eV   

  total_magnetic_moment_cell tot

### Combine two kind of nodes

#### Single workflow version

In [13]:
t1 = time.time()

structure_project=['uuid', 'extras.formula']
dict_project = predifined_workflow.get_workflow(MAP[versions[0]]).projections
combinednodes = helpers.generate_combined_property_pandas_source(
        workflow_name=workflow_name, 
        version=versions[0],
        structure_project=structure_project,
        dict_project=dict_project,
        filename=f"combined_properties_{MAP[versions[0]]}.json")

all_times.append(time.time()-t1)

#### Multiple workflow versions

In [14]:
t1 = time.time()

filename='combined_properties_all.xlsx'
excel_writer = pd.ExcelWriter(filename)

for version in versions:
    structure_project=['uuid', 'extras.formula']
    dict_project = predifined_workflow.get_workflow(MAP[version]).projections
    combined_nodes = helpers.generate_combined_property_pandas_source(
            workflow_name=workflow_name, 
            version=version,
            structure_project=structure_project,
            dict_project=dict_project)
    print(combined_nodes)
    combined_nodes.to_excel(excel_writer, sheet_name=MAP[version], index=False)

excel_writer.save()   
    
all_times.append(time.time()-t1)


    dict_uuid workflow_version  total_energy total_energy_units  \
0    f453f49c            0.4.2   -971.291643                Htr   
1    1e54bac0            0.4.2  -2545.609813                Htr   
2    eba17028            0.4.2  -2545.607620                Htr   
3    454472d8            0.4.2  -2545.579023                Htr   
4    19c2ba60            0.4.2   -971.289065                Htr   
..        ...              ...           ...                ...   
144  38211f57            0.4.2  -2545.575523                Htr   
145  961affaa            0.4.2   -971.287960                Htr   
146  2c3aa629            0.4.2  -2545.606176                Htr   
147  35d563e9            0.4.2   -580.078179                Htr   
148  89a75332            0.4.2   -580.077895                Htr   

     distance_charge distance_charge_units  total_wall_time  \
0                NaN             me/bohr^3              176   
1           0.000003             me/bohr^3               96   
2     

  dict_uuid workflow_version        energy energy_units  \
0  0358e465            0.3.0 -15784.497137           eV   
1  0358e465            0.3.0 -15784.497137           eV   

  total_magnetic_moment_cell total_magnetic_moment_cell_units structure_uuid  \
0                       None                           muBohr       59d8479e   
1                       None                           muBohr       59d8479e   

  formula  
0     Si2  
1     Si2  



# Interactive plot

### Check data source before plotting

#### Single workflow version

In [2]:
t1 = time.time()
df = helpers.read_json_file('combined_properties_wf_0_4_2.json')
df

Unnamed: 0,dict_uuid,workflow_version,total_energy,total_energy_units,distance_charge,distance_charge_units,total_wall_time,total_wall_time_units,structure_uuid,formula
0,f453f49c,0.4.2,-971.291643,Htr,,me/bohr^3,176,s,0ccacebb,Al4
1,1e54bac0,0.4.2,-2545.609813,Htr,0.000003,me/bohr^3,96,s,2c639ddf,Fe2
2,eba17028,0.4.2,-2545.607620,Htr,,me/bohr^3,8,s,2e6d2ce2,Fe2
3,454472d8,0.4.2,-2545.579023,Htr,0.000049,me/bohr^3,16,s,3a6a57f6,Fe2
4,19c2ba60,0.4.2,-971.289065,Htr,,me/bohr^3,205,s,48475c16,Al4
...,...,...,...,...,...,...,...,...,...,...
144,38211f57,0.4.2,-2545.575523,Htr,,me/bohr^3,2,s,f540a37f,Fe2
145,961affaa,0.4.2,-971.287960,Htr,0.000001,me/bohr^3,1895,s,f67d62d7,Al4
146,2c3aa629,0.4.2,-2545.606176,Htr,,me/bohr^3,74,s,f999a276,Fe2
147,35d563e9,0.4.2,-580.078179,Htr,,me/bohr^3,23,s,fb3d7bd9,Si2


In [3]:
filtered_df, xdata, ydata = helpers.filter_missing_value(df,'total_energy', 'distance_charge')
all_times.append(time.time()-t1)

filtered_df

Unnamed: 0,dict_uuid,workflow_version,total_energy,total_energy_units,distance_charge,distance_charge_units,total_wall_time,total_wall_time_units,structure_uuid,formula
0,1e54bac0,0.4.2,-2545.609813,Htr,3.209900e-06,me/bohr^3,96,s,2c639ddf,Fe2
1,454472d8,0.4.2,-2545.579023,Htr,4.898610e-05,me/bohr^3,16,s,3a6a57f6,Fe2
2,a26834b6,0.4.2,-2545.607449,Htr,2.787600e-06,me/bohr^3,51,s,50f820fc,Fe2
3,e31327d1,0.4.2,-2545.601842,Htr,2.385700e-06,me/bohr^3,30,s,6405731f,Fe2
4,ea0c2f2a,0.4.2,-2545.601842,Htr,2.302000e-06,me/bohr^3,28,s,6405731f,Fe2
...,...,...,...,...,...,...,...,...,...,...
70,71c0daa5,0.4.2,-580.071949,Htr,1.696322e-04,me/bohr^3,100,s,e7ab9f49,Si2
71,44e907ad,0.4.2,-2545.579023,Htr,4.899230e-05,me/bohr^3,16,s,eb40a6db,Fe2
72,6840fbac,0.4.2,-971.288176,Htr,9.440000e-08,me/bohr^3,2000,s,ef6cd391,Al4
73,7d27ac27,0.4.2,-2545.579023,Htr,4.899850e-05,me/bohr^3,16,s,f4cfcdb6,Fe2


#### Multiple workflow versions

In [4]:
t1 = time.time()
dfs = helpers.read_excel_file('combined_properties_all.xlsx')
print(dfs)

{'wf_0_4_2':     dict_uuid workflow_version  total_energy total_energy_units  \
0    f453f49c            0.4.2   -971.291643                Htr   
1    1e54bac0            0.4.2  -2545.609813                Htr   
2    eba17028            0.4.2  -2545.607620                Htr   
3    454472d8            0.4.2  -2545.579023                Htr   
4    19c2ba60            0.4.2   -971.289065                Htr   
..        ...              ...           ...                ...   
144  38211f57            0.4.2  -2545.575523                Htr   
145  961affaa            0.4.2   -971.287960                Htr   
146  2c3aa629            0.4.2  -2545.606176                Htr   
147  35d563e9            0.4.2   -580.078179                Htr   
148  89a75332            0.4.2   -580.077895                Htr   

     distance_charge distance_charge_units  total_wall_time  \
0                NaN             me/bohr^3              176   
1           0.000003             me/bohr^3              

In [5]:
from helpers import INVMAP

df_all, OPTIONS_all, UNITS_all = {}, {}, {}
versions, mversions = [], []
for key, df in dfs.items():
    df = helpers.filter_unavailable_df(df)
    if not df.empty:
        df_all[key] = df
        OPTIONS_all[key], UNITS_all[key] = helpers.get_attrs_and_units(df)
        mversions.append(key)
        versions.append(INVMAP[key])


### Interactive plot by Bokeh

In [6]:
t1 = time.time()

helpers.bokeh_struc_prop_vis('combined_properties_wf_0_4_2.json','total_energy', 'distance_charge', 
                            output_filename="vis_wf042.html", axis_type=['linear', 'linear'], nbins=40)

all_times.append(time.time()-t1)

### Interactive plot using Bokeh server application

In [None]:
# In vscode terminal:
# bokeh serve --show --port 5001 bokehplotting.py

In [None]:
npro  = None #sum(node_count.values())
ndata = None #sum(process_count.values())
totalnodes  = None
size = 34
header = ('# Timings of D2 in seconds\n# Database info: {} nodes; {} processes, {} data, {} MB size \n'
          '# Preprocess structures, Prepare Structure, Prepare Dict, Join Data, Loading file, Plot 1, Plot 2\n'
           ''.format(totalnodes, npro, ndata, size))
timestring = ''
for times in all_times:
    timestring = timestring + '{}  '.format(times) 
with open('all_times_D2_iffaiida.txt', 'w') as file1:
    file1.write(header)
    file1.write(timestring)