# Generate webpage plots for calculation_cohesive_energy_relation records

This Notebook is designed for reading finished calculation_cohesive_energy_relation records and generating the associated webpage content.

#### Library imports

In [1]:
# Standard Python libraries
from __future__ import print_function
import glob
import os
from copy import deepcopy
from collections import OrderedDict
from datetime import date
from math import floor

from IPython.core.display import display, HTML

import analysis
# pandas.pydata.org
import pandas as pd

# http://www.numpy.org/
import numpy as np

# https://github.com/usnistgov/DataModelDict
from DataModelDict import DataModelDict as DM

# https://github.com/usnistgov/atomman
import atomman as am
import atomman.lammps as lmp
import atomman.unitconvert as uc

# https://github.com/usnistgov/iprPy
import iprPy

#### Plotting library imports

In [2]:
# https://bokeh.pydata.org/
import bokeh
import bokeh.plotting
import bokeh.resources
import bokeh.io
import bokeh.models
print('bokeh version =', bokeh.__version__)
bokeh.io.output_notebook()

bokeh version = 0.12.7


## 1. Read Calculation Data

This section reads in raw data from a database. 

### 1.1 Initialize database

- __dbasename__ is used here to predefine different dbase settings
- __dbase__ is the iprPy.Database object to use for accessing a database

In [3]:
dbasename = 'iprhub'

# 'local' is a local directory
if   dbasename == 'local':
    dbase = iprPy.Database('local',   host='C:\Users\lmh1\Documents\calculations\ipr\library')

# 'test' is a local directory for testing 
if   dbasename == 'test':
    dbase = iprPy.Database('local',   host='C:\Users\lmh1\Documents\calculations\ipr\library_test')
    
# 'curator' is a local MDCS curator
elif dbasename == 'curator':
    dbase = iprPy.Database('curator', host='http://127.0.0.1:8000/', 
                                      user='admin', 
                                      pswd='admin')

# 'iprhub' is the remote MDCS curator at iprhub
elif dbasename == 'iprhub':
    dbase = iprPy.Database('curator', host='https://iprhub.nist.gov/', 
                                      user='lmh1',
                                      pswd='C:/users/lmh1/documents/iprhub/iprhub_password.txt',
                                      cert='C:/users/lmh1/documents/iprhub/iprhub-ca.pem')
else:
    raise ValueError('unknown dbasename ' + dbasename)

### 1.2 Access records

In [4]:
proto_df = dbase.get_records_df(style='crystal_prototype')
print(str(len(proto_df)) + ' prototype records loaded')

19 prototype records loaded


In [5]:
pot_df = dbase.get_records_df(style='potential_LAMMPS')
print(str(len(pot_df)) + ' potential records loaded')

156 potential records loaded


In [7]:
raw_df = dbase.get_records_df(style='calculation_cohesive_energy_relation')
print(str(len(raw_df)) + ' calculation records loaded')

7863 calculation records loaded


In [8]:
raw_df.keys()

Index([u'LAMMPS_version', u'calc_key', u'calc_script', u'e_vs_r_plot',
       u'error', u'family', u'iprPy_version', u'load_file', u'load_options',
       u'load_style', u'maximum_r', u'minimum_r', u'number_min_states',
       u'number_of_steps_r', u'potential_LAMMPS_id', u'potential_LAMMPS_key',
       u'potential_id', u'potential_key', u'sizemults', u'status', u'symbols'],
      dtype='object')

### 1.3 Check errors

In [9]:
if 'error' in raw_df:
    for error in np.unique(raw_df[pd.notnull(raw_df.error)].error):
        print(error)
        print()

--------------------------------------------------------------------------
no active ports detected (or Open MPI was unable to use them).  This
is most certainly not what you wanted.  Check your cables, subnet
manager configuration, etc.  The openib BTL will be ignored for this
job.

  Local host: r024
--------------------------------------------------------------------------
--------------------------------------------------------------------------
no active ports detected (or Open MPI was unable to use them).  This
is most certainly not what you wanted.  Check your cables, subnet
manager configuration, etc.  The openib BTL will be ignored for this
job.

  Local host: r024
--------------------------------------------------------------------------
Traceback (most recent call last):
  File "calc_E_vs_r_scan.py", line 218, in <module>
    main(*sys.argv[1:])
  File "calc_E_vs_r_scan.py", line 50, in main
    rsteps = input_dict['number_of_steps_r'])
  File "calc_E_vs_r_scan.py", line 134

## 2. Process Data

This section processes and refines the data.

### 2.1 Identify composition

We need to identify the composition of each calculation so that we can collect duplicates and filter out artificial compounds.

- __counts__ is a dictionary counting the number of times each atype appears in a crystal prototype's unit cell (i.e. the number of symmetry equivalent sites)

In [10]:
counts = {}
for i, prototype in proto_df.iterrows():
    model = DM(dbase.get_record(name=prototype.id, style='crystal_prototype').content)
    counts[prototype.id] = np.unique(model.finds('component'), return_counts=True)[1]

- __comp_refine()__ takes a list of symbols and count of how many times each symbol appears in a structure and generates a composition string.

In [11]:
def comp_refine(symbols, counts):
    """Takes a list of symbols and count of how many times each symbol appears and generates a composition string."""
    primes = [2,3,5,7,11,13,17,19,23,29,31,37,41,43,47]
    
    sym_dict = {}
    for i in xrange(len(symbols)):
        sym_dict[symbols[i]] = counts[i]
    
    for prime in primes:
        if max(sym_dict.values()) < prime:
            break
        
        while True:
            breaktime = False
            for value in sym_dict.values():
                if value % prime != 0:
                    breaktime = True
                    break
            if breaktime:
                break
            for key in sym_dict:
                sym_dict[key] /= prime
    
    composition=''
    for key in sorted(sym_dict):
        if sym_dict[key] > 0:
            composition += key
            if sym_dict[key] != 1:
                composition += str(sym_dict[key])
            
    return composition       

In [12]:
compositions = []
for i, calc in raw_df.iterrows():
    compositions.append(comp_refine(calc.symbols, counts[calc.family]))
raw_df = raw_df.assign(composition=compositions)

### 2.2 Identify current ipr potentials 

In [13]:
# Extract versionstyle and versionnumber from potential implementation ids
versionstyle = []
versionnumber = []
for name in pot_df['id'].values:
    version = name.split('--')[-1]
    try:
        versionnumber.append(int(version[-1]))
    except:
        versionnumber.append(np.nan)
        versionstyle.append(version)
    else:
        versionstyle.append(version[:-1])

pot_df['versionstyle'] = versionstyle
pot_df['versionnumber'] = versionnumber

# Loop through unique potential id's
includeid = []
for pot_id in np.unique(pot_df.pot_id.values):
    check_df = pot_df[pot_df.pot_id == pot_id]
    check_df = check_df[check_df.versionstyle == 'ipr']
    check_df = check_df[check_df.versionnumber == check_df.versionnumber.max()]
    if len(check_df) == 1:
        includeid.append(check_df['id'].values[0])
    elif len(check_df) > 1:
        raise ValueError('Bad currentIPR check for '+pot_id)

# Identify current IPR potentials
raw_df['currentIPR'] = raw_df.potential_LAMMPS_id.isin(includeid)

### 2.3 Remove unwanted calculations

Here is where we filter out unwanted entries (i.e. rows).

- __df__ is the dataframe during/after processing and refining

In [14]:
df = deepcopy(raw_df)

# Ignore unfinished or error calculations
df = df[df.status == 'finished']

# Ignore any implementations that are not current IPR implementations
df = df[df.currentIPR == True]

# Ignore any that don't use the standard run parameters
df = df[np.isclose(df.minimum_r, 2.0) & 
        np.isclose(df.maximum_r, 6.0) & 
        (df.number_of_steps_r == 200)]

# Ignore false compounds (where # of unique symbols != # of symbols)
df = df[df.symbols.apply(lambda x: len(np.unique(x))) == df.symbols.apply(lambda x: len(x))] 

# Ignore duplicate compounds
ignore = set()
for i in xrange(len(df)):
    trunc = df.iloc[i+1:]
    matches = trunc.calc_key[(trunc.potential_id == df.iloc[i].potential_id) & 
                             (trunc.family ==       df.iloc[i].family) &
                             (trunc.composition ==  df.iloc[i].composition) ].tolist()
    ignore = ignore.union(matches)
df = df[~df.calc_key.isin(ignore)]

df.reset_index(drop=True, inplace=True)
print(str(len(df)) + ' records after filtering')

3364 records after filtering


### 2.4 Filter out extra data

Here, we limit the DataFrame to only the data that we care about (i.e. columns).

- __headers__ gives the list of data columns from raw_data to include in and how they should be renamed in data.

In [15]:
#                        raw names       new names
headers = OrderedDict([ ('potential_id', 'potential'  ),
                        ('family',       'family'     ),
                        ('composition',  'composition'),
                        ('e_vs_r_plot',  'e_vs_r_plot')])

df = pd.DataFrame(df, columns=headers.keys())
df.rename(columns=headers, inplace=True)
df

Unnamed: 0,potential,family,composition,e_vs_r_plot
0,2006--Chamati-H--Fe,A7--alpha-As,Fe,E_coh a r 0 2.4...
1,2006--Chamati-H--Fe,A5--beta-Sn,Fe,E_coh a r 0 4....
2,2006--Chamati-H--Fe,A2--W--bcc,Fe,E_coh a r 0 6.3...
3,2006--Chamati-H--Fe,A3--Mg--hcp,Fe,E_coh a r 0 1.0...
4,2006--Chamati-H--Fe,A4--C--dc,Fe,E_coh a r 0 5....
5,2006--Chamati-H--Fe,A3'--alpha-La--double-hcp,Fe,E_coh a r 0 1.0...
6,2006--Chamati-H--Fe,A6--In--bct,Fe,E_coh a r 0 5.0...
7,2006--Chamati-H--Fe,Ah--alpha-Po--sc,Fe,E_coh a r 0 7.5...
8,2006--Chamati-H--Fe,A15--beta-W,Fe,E_coh a r 0 -7....
9,2006--Chamati-H--Fe,A1--Cu--fcc,Fe,E_coh a r 0 1.0...


## 3. Define content generation

This section defines how to generate the web content.

### 3.1 Data table generation

#### Data table settings

- __table_header_file__ contains the header description for the data table file.

- __string_format__ is the format to use for printing the string prototype names.

- __float_format__ is the format to use for printing the floating point data.

In [16]:
table_header_file = 'table header.txt'
string_format = '%-16s'
float_format = '%16.10f'

#### Load and show table_header

In [17]:
with open(table_header_file) as f:
    table_header = f.read()
print(table_header)

# Cohesive energy (eV) vs. nearest neighbor radial distance (Angstrom).
# potential = <potential>
# composition = <composition>

# NOTE: These values are for static, unrelaxed structures and use the ideal
# b/a and c/a ratios for the crystal structure, not the potential-specific
# values.

# Calculations from the NIST Interatomic Potential Repository Project:
# http://www.ctcms.nist.gov/potentials/

# Table generated <day>



#### Define table_gen()

In [18]:
def table_gen(df, potential, composition, header, str_fmt='%-16s', float_fmt='%16.10f'):
    """Generate a text data table from the data"""
    
    # Fill in header
    header_terms = {}
    header_terms['potential'] = potential
    header_terms['composition'] = composition
    header_terms['day'] = str(date.today())
    table = iprPy.tools.filltemplate(header, header_terms, '<', '>')
    
    # Select data for composition and potential
    table_df = df[(df.composition == composition) 
                & (df.potential == potential)].sort_values('family')
    rvalues = table_df.iloc[0].e_vs_r_plot.r.values
    
    # Label the table columns with crystal family names
    table += '\n   '+str_fmt % 'r'
    for i in range(len(table_df)):
        family = table_df.iloc[i].family
        if len(family) > 16:
            family = '--'.join(family.split('--')[:-1])
        table += ' '+str_fmt % family
    
    # Iterate through the data
    for j in range(len(rvalues)):
        table += '\n'+float_fmt % uc.get_in_units(rvalues[j], 'Angstrom')
        for i in range(len(table_df)):
            E_coh = table_df.iloc[i].e_vs_r_plot.E_coh.iloc[j]
            table += ' '+float_fmt % uc.get_in_units(E_coh, 'eV')
    
    return table

#### Test

In [None]:
print(table_gen(df, '2009--Purja-Pun-G-P--Ni-Al', 'AlNi', table_header))

### 3.2 Plot generation

#### Plot settings

- __lineformats_file__ contains the line formatting information.

In [19]:
lineformats_file = 'lineformats.csv'

#### Load and show figure plot settings

In [20]:
lineformats_df = pd.read_csv(lineformats_file)
lineformats_df

Unnamed: 0,family,color,line
0,A1--Cu--fcc,black,solid
1,A2--W--bcc,blue,solid
2,A3--Mg--hcp,red,dashed
3,A3'--alpha-La--double-hcp,cyan,dashdot
4,A4--C--dc,magenta,solid
5,A5--beta-Sn,#EAC117,solid
6,A6--In--bct,orange,solid
7,A7--alpha-As,gray,solid
8,A15--beta-W,green,solid
9,Ah--alpha-Po--sc,brown,solid


#### Define plot_gen()

In [21]:
def plot_gen(df, potential, composition, lineformats_df):
    """Generate a Bokeh plot from the data"""
    
    # Select data for composition and potential
    plot_df = df[(df.composition == composition) 
               & (df.potential == potential)].sort_values('family')
    rvalues = uc.get_in_units(plot_df.iloc[0].e_vs_r_plot.r.values, 'Angstrom')
    
    title = 'Cohesive Energy vs. Interatomic Spacing for ' + composition + ' using the ' + potential + ' potential'
    p = bokeh.plotting.figure(title = title,
                              plot_width = 800,
                              plot_height = 600,
                              x_range = [2, 6],
                              y_range = [-10, 0],              
                              x_axis_label='r (Angstrom)', 
                              y_axis_label='Cohesive Energy (eV/atom)')
    
    lowylim = -1
    for i in range(len(plot_df)):
        family = plot_df.iloc[i].family
        Evalues = uc.get_in_units(plot_df.iloc[i].e_vs_r_plot.E_coh.values, 'eV')
        lineformat = lineformats_df[lineformats_df.family==family].iloc[0]
        
        lowy = floor(Evalues.min())
        if lowy < lowylim:
            lowylim = lowy
        
        l = p.line(rvalues, 
                   Evalues, 
                   legend=family, 
                   line_color=lineformat.color, 
                   line_dash=lineformat.line, 
                   line_width = 2)  
        
        p.add_tools(bokeh.models.HoverTool(renderers=[l],
                                           tooltips=[("prototype", family),
                                                     ("r (Angstrom)", "$x"),
                                                     ("E_coh (eV)", "$y")]))
    
    if lowylim > -10:
        p.y_range = bokeh.models.Range1d(lowylim, 0)
    
    p.legend.location = "bottom_right"    
    return p        

#### Test

In [None]:
bokeh.io.show(plot_gen(df, '2012--Proville-L--Fe', 'Fe', lineformats_df))

### 3.3 HTML generation

#### HTML settings

- __resources__ bokeh Resources object.
- __html_info_file__ contains the html description for the calculation.

In [22]:
resources = bokeh.resources.Resources(mode='cdn')
html_info_file = 'html info.html'

#### Load and show html_info

In [23]:
with open(html_info_file) as f:
    html_info = f.read()
display(HTML(html_info))

### Define html_gen()

In [24]:
def html_gen(df, potential, html_info, lineformats_df, table_header, rootdir=''):
    """Generates the html content per potential for the E_vs_r_scan calculation"""
    
    options = OrderedDict()
    
    # Get list of unique compositions
    compositions = np.unique(df[df.potential == potential].composition)
    
    rootdir = os.path.join(rootdir, potential)
    if not os.path.isdir(rootdir):
        os.makedirs(rootdir)
    
    # Loop over compositions
    for composition in compositions:
        name = 'EvsR.' + composition
        
        # Generate table
        table = table_gen(df, potential, composition, table_header)
        with open(os.path.join(rootdir, name+'.txt'), 'w') as f:
            f.write(table)
        
        # Generate plot
        plot = plot_gen(df, potential, composition, lineformats_df)
        bokeh.io.export_png(plot, os.path.join(rootdir, name+'.png'))
        bokeh.io.save(plot, os.path.join(rootdir, name+'.html'), resources=resources, title='name')
          
        # Build HTML content
        html = '\n'.join(['<h3>Cohesive Energy vs. Interatomic Spacing for %s</h3>' % composition,
                          '<p><a href="./%s.txt" target="_blank">Download data</a></p>' % name,
                          '<p>Click on plot to load interactive version</p>',
                          '<a href="./%s.html" target="_blank"><img src="./%s.png" alt="%s"></a>' % (name, name, name)])
        
        # Save as select option
        options[composition] = html
    
    # Save html
    with open(os.path.join(rootdir, 'EvsR.html'), 'w') as f:
        f.write('\n'.join([html_info,
                           '<div>',
                           'Select a composition: ' + analysis.showSelection('EvsR', options),
                           '<div>']))

#### Test

In [None]:
html_gen(df, '2009--Purja-Pun-G-P--Ni-Al', html_info, lineformats_df, table_header)

## 4. Generate for all potentials

#### Generation parameters

- __savedir__ is the directory where the files will be saved.

In [25]:
savedir = '../webcontent/perpotential'

In [26]:
# Loop over all potentials
for potential in np.unique(df.potential):
    print(potential)

    # Generate html content
    html_gen(df, potential, html_info, lineformats_df, table_header, rootdir=savedir)

1985--Foiles-S-M--Ni-Cu
1987--Ackland-G-J--Ag
1987--Ackland-G-J--Au
1987--Ackland-G-J--Cu
1987--Ackland-G-J--Mo
1987--Ackland-G-J--Ni
1989--Adams-J-B--Ag
1989--Adams-J-B--Au
1989--Adams-J-B--Cu
1989--Adams-J-B--Ni
1989--Adams-J-B--Pd
1989--Adams-J-B--Pt
1992--Ackland-G-J--Ti
1995--Angelo-J-E--Ni-Al-H
1996--Farkas-D--Nb-Ti-Al
1997--Ackland-G-J--Fe
1997--Liu-X-Y--Al-Mg
1998--Liu-X-Y--Al-Mg
1999--Liu-X-Y--Al-Cu
1999--Mishin-Y--Al
1999--Mishin-Y--Ni
2000--Landa-A--Al-Pb
2000--Sturgeon-J-B--Al
2001--Mishin-Y--Cu-1
2002--Mishin-Y--Ni-Al
2003--Han-S--Cs
2003--Han-S--K
2003--Han-S--Li
2003--Han-S--Na
2003--Han-S--Rb
2003--Han-S--V
2003--Han-S--W
2003--Hoyt-J-J--Cu-Pb
2003--Li-Y-H--Ta
2003--Mendelev-M-I--Fe-2
2003--Mendelev-M-I--Fe-5
2003--Zope-R-R--Al
2003--Zope-R-R--Ti-Al
2004--Ackland-G-J--Fe-P
2004--Liu-X-Y--Al
2004--Mishin-Y--Ni-Al
2004--Zhou-X-W--Ag
2004--Zhou-X-W--Al
2004--Zhou-X-W--Au
2004--Zhou-X-W--Co
2004--Zhou-X-W--Cu
2004--Zhou-X-W--Fe
2004--Zhou-X-W--Mg
2004--Zhou-X-W--Mo
2004--Zh