# Structure Property visualizer

This is the second of two deliverables for the SiSc-Lab2020 project.

Authors = Sijie Luo and Anna Garoufali

Supervisors: Jens Bröder, Dr. Daniel Wortmann, Johannes Wasmer, Prof. Dr. Stefan Blügel.

In [None]:
# Imports
%load_ext autoreload
%autoreload 2
%matplotlib notebook

# python imports:
from collections import Counter
import time
import numpy as np
import pandas as pd
#from pprint import pprint

import helpers

all_times = []

In [None]:
# aiida imports:
from aiida import load_profile
profile = load_profile()

#from aiida_jutools.sisc_lab import helpers
from bokeh.io import output_notebook

output_notebook()
helpers.print_bold(f"This notebook/dashboard will visualize the contents from the database of profile {profile.name}")

### Check workflows and versions

In [None]:
t1 = time.time()
# Preprocessing: Set formula attributes for all the structure nodes
helpers.set_structure_formula()

# workflow_name = 'fleur_scf_wc' # Filter workflow
# workflow_filters = {'attributes.process_label' : {'==' : workflow_name}}
# workflowdictlst = helpers.get_structure_workflow_dict(workflow_filters=workflow_filters)
#or
workflow_name = None # No restriction. Querying by default
workflowdictlst, versionslst = helpers.get_structure_workflow_dict(timing=True, check_version=True)

all_times.append(time.time()-t1)

print("Number of the workflows: ", len(workflowdictlst), '\n')
print("Workflows: ")
workflowdictlst[:2]


In [None]:
from helpers import MAP
from helpers import predifined_workflow
versions = [key for key,val in versionslst]
print(versions)

### Structure nodes

In [None]:
#!pip install openpyxl

#### Single workflow version

In [None]:
t1 = time.time()
structure_project=['uuid', 'extras.formula']
structure_nodes = helpers.generate_structure_property_pandas_source(
            version=versions[0],
            workflow_name=workflow_name,
            structure_project=structure_project,
            filename=f"structure_properties_{MAP[versions[0]]}.json")
#structure_nodes.head()

all_times.append(time.time()-t1)

#### Multiple workflow versions

In [None]:
t1 = time.time()

filename='structure_properties_all.xlsx'
excel_writer = pd.ExcelWriter(filename)

for version in versions:    
    structure_project=['uuid', 'extras.formula']
    structure_nodes = helpers.generate_structure_property_pandas_source(
                version=version,
                workflow_name=workflow_name, 
                structure_project=structure_project)
    print(structure_nodes)
    structure_nodes.to_excel(excel_writer, sheet_name=MAP[version], index=False)

excel_writer.save()       
    
all_times.append(time.time()-t1)

### Dict nodes

#### Single workflow version

In [None]:
t1 = time.time()

# Single workflow version
dict_project = predifined_workflow.get_workflow(MAP[versions[0]]).projections
dict_nodes = helpers.generate_dict_property_pandas_source(
        workflow_name=workflow_name,
        version=versions[0],
        dict_project=dict_project, 
        filename=f"dict_properties_{MAP[versions[0]]}.json")
#dict_nodes.head()
            
            
all_times.append(time.time()-t1)

#### Multiple workflow versions

In [None]:
t1 = time.time()


filename='dict_properties_all.xlsx'
excel_writer = pd.ExcelWriter(filename)

for version in versions:
    dict_project = predifined_workflow.get_workflow(MAP[version]).projections
    dict_nodes = helpers.generate_dict_property_pandas_source(
            workflow_name=workflow_name,
            version=version,
            dict_project=dict_project)
    print(dict_nodes)
    dict_nodes.to_excel(excel_writer, sheet_name=MAP[version], index=False)

excel_writer.save()   
    
all_times.append(time.time()-t1)

### Combine two kind of nodes

#### Single workflow version

In [None]:
t1 = time.time()

structure_project=['uuid', 'extras.formula']
dict_project = predifined_workflow.get_workflow(MAP[versions[0]]).projections
combinednodes = helpers.generate_combined_property_pandas_source(
        workflow_name=workflow_name, 
        version=versions[0],
        structure_project=structure_project,
        dict_project=dict_project,
        filename=f"combined_properties_{MAP[versions[0]]}.json")

all_times.append(time.time()-t1)

#### Multiple workflow versions

In [None]:
t1 = time.time()

filename='combined_properties_all.xlsx'
excel_writer = pd.ExcelWriter(filename)

for version in versions:
    structure_project=['uuid', 'extras.formula']
    dict_project = predifined_workflow.get_workflow(MAP[version]).projections
    combined_nodes = helpers.generate_combined_property_pandas_source(
            workflow_name=workflow_name, 
            version=version,
            structure_project=structure_project,
            dict_project=dict_project)
    print(combined_nodes)
    combined_nodes.to_excel(excel_writer, sheet_name=MAP[version], index=False)

excel_writer.save()   
    
all_times.append(time.time()-t1)



# Interactive plot

### Check data source before plotting

#### Single workflow version

In [None]:
t1 = time.time()
df = helpers.read_json_file('combined_properties_wf_0_4_2.json')


In [None]:
filtered_df, xdata, ydata = helpers.filter_missing_value(df,'total_energy', 'distance_charge')
all_times.append(time.time()-t1)


#### Multiple workflow versions

In [None]:
t1 = time.time()
dfs = helpers.read_excel_file('combined_properties_all.xlsx')


In [None]:
from helpers import INVMAP

df_all, OPTIONS_all, UNITS_all = {}, {}, {}
versions, mversions = [], []
for key, df in dfs.items():
    df = helpers.filter_unavailable_df(df)
    if not df.empty:
        df_all[key] = df
        OPTIONS_all[key], UNITS_all[key] = helpers.get_attrs_and_units(df)
        mversions.append(key)
        versions.append(INVMAP[key])
        
all_times.append(time.time()-t1)

### Interactive plot by Bokeh

In [None]:
t1 = time.time()

helpers.bokeh_struc_prop_vis('combined_properties_wf_0_4_2.json','total_energy', 'distance_charge', 
                            output_filename="vis_wf042.html", axis_type=['linear', 'linear'], nbins=40)

all_times.append(time.time()-t1)

### Interactive plot using Bokeh server application

In [None]:
# In vscode terminal:
# bokeh serve --show --port 5001 bokehplotting.py

In [None]:
npro  = None #sum(node_count.values())
ndata = None #sum(process_count.values())
totalnodes  = None
size = 34
header = ('# Timings of D2 in seconds\n# Database info: {} nodes; {} processes, {} data, {} MB size \n'
          '# Preprocess structures, Prepare Structure, Prepare Dict, Join Data, Loading file, Plot 1, Plot 2\n'
           ''.format(totalnodes, npro, ndata, size))
timestring = ''
for times in all_times:
    timestring = timestring + '{}  '.format(times) 
with open('all_times_D2_iffaiida.txt', 'w') as file1:
    file1.write(header)
    file1.write(timestring)