In [None]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd


In [None]:
# Load profile
from aiida import load_profile
load_profile() # Loading profile for query builder

# Import aiida-core Node classes from aiida.orm
from aiida.orm import load_node, Node, Group, Computer, User, Code
from aiida.orm import CalcFunctionNode, CalcJobNode, WorkFunctionNode, WorkChainNode

# Import Core data types
from aiida.orm import Int, Float, Str, Bool, List, Dict, ArrayData, XyData, SinglefileData, FolderData, RemoteData
# Import Material science data classes via the DataFactory
from aiida.plugins import DataFactory
StructureData = DataFactory('structure')
KpointsData = DataFactory("array.kpoints")
TrajectoryData = DataFactory("array.trajectory")
BandsData = DataFactory("array.bands")
UpfData = DataFactory('upf')



In [None]:
from aiida.orm import QueryBuilder

How large is the database

In [None]:
qb = QueryBuilder()
qb.append(Node)
NN = qb.count() # Number of Nodes
print(NN)
qb.all()

What kinds of nodes are in there?

In [None]:
# Data Nodes
core_data_list = [Int, Float, Str, Bool, List, Dict, ArrayData, XyData, SinglefileData, FolderData, RemoteData]
MS_data_list = [StructureData, KpointsData, TrajectoryData, UpfData, BandsData, Code]
data_list = np.append(core_data_list, MS_data_list)
data_num = {}
for class_name in data_list:
    qb = QueryBuilder()
    qb.append(class_name)
    if (qb.count() != 0): #Record data types that are not equal to zero
        data_num[class_name.__name__] = qb.count()
    #print('{:>15} | {:6}'.format(class_name.__name__, qb.count()))    
print(data_num)
ND = np.sum(list(data_num.values())) # Number of Data Nodes
print("Number of Data Nodes: ", ND)

In [None]:
# Process Nodes
process_list = [CalcFunctionNode, CalcJobNode, WorkFunctionNode, WorkChainNode, Computer, Group]
process_num = {}
for class_name in process_list:
    qb = QueryBuilder()
    qb.append(class_name)
    if (qb.count() != 0): #Record data types that are not equal to zero
        process_num[class_name.__name__] = qb.count()
    #print('{:>20} | {:6}'.format(class_name.__name__, qb.count()))    
print(process_num)
NP = np.sum(list(process_num.values())) # Number of Process Nodes
print("Number of Process Nodes: ", NP)


In [None]:
# Summary and plot
nodes_num = {}
nodes_num.update(data_num)
nodes_num.update(process_num)
nodes_num['Others'] = NN - ND - NP

fig, [ax1, ax2] = plt.subplots(figsize=(18,10), ncols=2)
patches,_ = ax1.pie(nodes_num.values(), labels = nodes_num.keys());
ax1.axis('equal')
ax2.axis('off')
ax2.legend(patches, nodes_num.keys(), loc='center left')
plt.show()

In [None]:
# Interactive visualize by Bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure,show
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category20


In [None]:
nodes = list(nodes_num.keys())
counts = list(nodes_num.values())
sorted_nodes = sorted(nodes, key=lambda x: counts[nodes.index(x)], reverse=False)
source = ColumnDataSource(data=dict(y=nodes, right=counts, color=Category20[14]))
p = figure(y_range=sorted_nodes, plot_height=450, x_range=(0,5300), title="Nodes Counts")
p.hbar(y="y", right="right", height=0.5, left=0, color='color', legend_field="y", source=source)

output_notebook()
p.xgrid.grid_line_color = None
#p.legend.orientation = "horizontal"
p.legend.location = "bottom_right"

show(p)
# The package performs weird. Try a few more times if it doesn't work

Who created the data and when?

In [None]:
def find_data_info(PK):
    node = load_node(PK)
    qb = QueryBuilder()
    qb.append(Node, project=['user_id','ctime'], filters={'id':{'==':PK}})
    uid, time = qb.all()[0]
    print("Node {} is created by user_id {} at time {}.".format(PK, uid, time))

    
find_data_info(500)


For Process Nodes: What calculations types and workflow types are in there?

In [None]:
# Labels and description of each kind of process nodes
process_list = [CalcFunctionNode, CalcJobNode, WorkFunctionNode, WorkChainNode]
for class_name in process_list:
    qb = QueryBuilder()
    qb.append(class_name, project=['id','node_type','process_type','description'])
    print("\nFor {}:".format(class_name.__name__))
    for result in qb.all():
        print(result)
    

In [None]:
# List process of all states
!verdi process list -a

Dict type nodes

In [None]:
qb = QueryBuilder()
qb.append(Dict)
qb.limit(5)
qb.all()

In [None]:
# Choose one Dict node to show
!verdi data dict show 5696

In [None]:
dictnode = load_node(5696)
dictdata = dictnode.get_dict()
dictdata

Metadata information of the StructureData nodes: Collect how a distribution of the number of atoms these structures have and what elements they contain.

In [None]:
# StructureData
qb = QueryBuilder()
qb.append(StructureData)
print(qb.count())

for structure, in qb.all()[10:12]:
    print(type(structure.sites[0]))
    print(structure.sites)