# Process data for refine_structure_static calculation

This Notebook is designed for reading and converting results from the refine_structure_static calculation

Library imports

In [1]:
import glob
import os
from collections import OrderedDict
from copy import deepcopy

import pandas as pd
import numpy as np
from DataModelDict import DataModelDict as DM

import atomman as am
import atomman.unitconvert as uc

## 1. Raw Data

This section reads in or generates the raw_data associated with the calculation. 

### 1.1 Parameters

__lib_directory__ is the path to the data model record library to use 

In [2]:
lib_directory = 'C:\\Users\\lmh1\\Documents\\calculations\\ipr\\library_2016_09_15'

__raw_csv__ gives the location of the csv file for raw_data

In [3]:
raw_csv = os.path.join(lib_directory, 'refine_structure_static_raw.csv')

__build_raw__ indicates if raw_data is built from lib_directory or read in from raw_csv.
    
- = True -- Read data from all refine_structure_static records and save data to raw_csv

- = False -- Read data from raw_csv

In [4]:
build_raw = True

### 1.2 Conversion Functions

Functions used in converting a tree-like data model representation into a flattened dictionary are listed here. Flattened dictionaries are necessary for building the pandas DataFrame of all the data.

__model_2_dict__ takes a data model (as a DataModelDict) and converts it into an equivalent flat dictionary.

In [5]:
def model_2_dict(model):
    """Convert a structured data model for the calculation into an equivalent flat dictionary."""
    
    values = {}
    
    try: calculation = model.find('calculation')
    except: pass
    else:
        try: values['calc_key'] =               calculation['id']
        except: pass
        try: values['calc_type'] =              calculation['script']
        except: pass
        try: values['strain_range'] =           calculation[['run-parameter', 'strain-range']]
        except: pass
        try: values['load_options'] =           calculation[['run-parameter', 'load_options']]
        except: pass
        try: values['size_mult_a_lo'] =         calculation[['run-parameter', 'size-multipliers', 'a', 0]]
        except: pass
        try: values['size_mult_a_hi'] =         calculation[['run-parameter', 'size-multipliers', 'a', 1]]
        except: pass
        try: values['size_mult_b_lo'] =         calculation[['run-parameter', 'size-multipliers', 'b', 0]]
        except: pass
        try: values['size_mult_b_hi'] =         calculation[['run-parameter', 'size-multipliers', 'b', 1]]
        except: pass
        try: values['size_mult_c_lo'] =         calculation[['run-parameter', 'size-multipliers', 'c', 0]]
        except: pass
        try: values['size_mult_c_hi'] =         calculation[['run-parameter', 'size-multipliers', 'c', 1]]
        except: pass
    
    try: potential = model.find('potential')
    except: pass
    else:
        try: values['pot_key'] =                potential['key']
        except: pass
        try: values['pot_id'] =                 potential['id']
        except: pass

    try: system_info = model.find('system-info')
    except: pass
    else:
        try: values['load_file'] =              system_info[['artifact', 'file']]
        except: pass
        try: values['load_style'] =             system_info[['artifact', 'format']]
        except: pass
        try: values['prototype'] =              system_info[['artifact', 'family']]
        except: pass
        try: values['symbols'] =                '-'.join(system_info['symbols'])
        except: pass
        
    try: phase_state = model.find('phase-state')
    except: pass
    else:    
        try: values['temperature'] =            uc.value_unit(phase_state['temperature'])
        except: pass
        try: values['pressure_xx'] =            uc.value_unit(phase_state['pressure-xx'])
        except: pass
        try: values['pressure_yy'] =            uc.value_unit(phase_state['pressure-yy'])
        except: pass
        try: values['pressure_zz'] =            uc.value_unit(phase_state['pressure-zz'])
        except: pass
        
    try: system = model.find('relaxed-atomic-system')
    except: pass
    else:        
        try: values['a'] =                      uc.value_unit(system['cell'].find('a'))
        except: pass
        try: values['b'] =                      uc.value_unit(system['cell'].find('b'))
        except: pass
        try: values['c'] =                      uc.value_unit(system['cell'].find('c'))
        except: pass
        try: values['alpha'] =                  system['cell'].find('alpha')
        except: pass
        try: values['beta'] =                   system['cell'].find('beta')
        except: pass
        try: values['gamma'] =                  system['cell'].find('gamma')
        except: pass
        
        ucell, symbols = am.load('system_model', model, key='relaxed-atomic-system')
        atype_count = np.bincount(ucell.atoms_prop(key='atype'))
        values['composition'] = comp_refine(symbols, atype_count)
        
    try: values['E_cohesive'] =                 uc.value_unit(model.find('cohesive-energy'))
    except: pass    
    
    try: elastic_constants = model.find('elastic-constants')
    except: pass
    else: 
        try: values['C11'] =                    uc.value_unit(elastic_constants.find('C', yes={'ij':'1 1'})['stiffness'])
        except: pass
        try: values['C22'] =                    uc.value_unit(elastic_constants.find('C', yes={'ij':'2 2'})['stiffness'])
        except: pass
        try: values['C33'] =                    uc.value_unit(elastic_constants.find('C', yes={'ij':'3 3'})['stiffness'])
        except: pass
        try: values['C12'] =                    uc.value_unit(elastic_constants.find('C', yes={'ij':'1 2'})['stiffness'])
        except: pass
        try: values['C13'] =                    uc.value_unit(elastic_constants.find('C', yes={'ij':'1 3'})['stiffness'])
        except: pass
        try: values['C23'] =                    uc.value_unit(elastic_constants.find('C', yes={'ij':'2 3'})['stiffness'])
        except: pass
        try: values['C44'] =                    uc.value_unit(elastic_constants.find('C', yes={'ij':'4 4'})['stiffness'])
        except: pass
        try: values['C55'] =                    uc.value_unit(elastic_constants.find('C', yes={'ij':'5 5'})['stiffness'])
        except: pass
        try: values['C66'] =                    uc.value_unit(elastic_constants.find('C', yes={'ij':'6 6'})['stiffness'])
        except: pass
    
    try: values['error'] = model.find('error')
    except: pass
    
    
    return values

__comp_refine__ takes a list of symbols and count of how many times each symbol appears in a structure and generates a composition string.

In [6]:
def comp_refine(symbols, counts):
    """Takes a list of symbols and count of how many times each symbol appears and generates a composition string."""
    primes = [2,3,5,7,11,13,17,19,23,29,31,37,41,43,47]
    
    sym_dict = {}
    for i in xrange(len(symbols)):
        sym_dict[symbols[i]] = counts[i+1]
    
    for prime in primes:
        if max(sym_dict.values()) < prime:
            break
        
        while True:
            breaktime = False
            for value in sym_dict.values():
                if value % prime != 0:
                    breaktime = True
                    break
            if breaktime:
                break
            for key in sym_dict:
                sym_dict[key] /= prime
    
    composition=''
    for key in sorted(sym_dict):
        if sym_dict[key] > 0:
            composition += key
            if sym_dict[key] != 1:
                composition += str(sym_dict[key])
            
    return composition       

### 1.3 Code

Create raw_data

In [7]:
if build_raw:
    raw_data = []
    for fname in glob.iglob(os.path.join(lib_directory, '*', '*', '*', 'refine_structure_static', '*')):

        if os.path.splitext(fname)[1].lower() in ('.xml', '.json'):
            with open(fname) as f:
                model = DM(f)
            raw_data.append(model_2_dict(model))
    raw_data = pd.DataFrame(raw_data)
    raw_data.to_csv(raw_csv, index=False, float_format='%.13g')  

else:
    raw_data = pd.read_csv(raw_csv)

Show raw_data

In [8]:
raw_data

Unnamed: 0,C11,C12,C13,C22,C23,C33,C44,C55,C66,E_cohesive,...,prototype,size_mult_a_hi,size_mult_a_lo,size_mult_b_hi,size_mult_b_lo,size_mult_c_hi,size_mult_c_lo,strain_range,symbols,temperature
0,1.052581,0.770629,,,,,0.478046,,,-3.540000,...,A1--Cu--fcc,3,0,3,0,3,0,1.000000e-05,Cu,0.0
1,1.627509,0.428910,,,,,0.137365,,,-3.457837,...,A15--beta-W,3,0,3,0,3,0,1.000000e-05,Cu,0.0
2,0.845207,0.865159,,,,,0.569227,,,-3.510146,...,A2--W--bcc,3,0,3,0,3,0,1.000000e-05,Cu,0.0
3,1.389632,0.658471,0.548210,1.389633,0.548209,1.501021,0.254091,0.254092,0.365581,-3.538797,...,A3'--alpha-La--double-hcp,3,0,3,0,3,0,1.000000e-05,Cu,0.0
4,1.389554,0.658637,0.550494,1.389553,0.550496,1.499918,0.254845,0.254843,0.365457,-3.537620,...,A3--Mg--hcp,3,0,3,0,3,0,1.000000e-05,Cu,0.0
5,0.293430,0.423494,,,,,0.234322,,,-2.502544,...,A4--C--dc,3,0,3,0,3,0,1.000000e-05,Cu,0.0
6,1.719968,0.108320,0.299675,,,1.670952,0.012220,,-0.165984,-3.242210,...,A5--beta-Sn,3,0,3,0,3,0,1.000000e-05,Cu,0.0
7,1.389650,0.433559,0.770629,,,1.052581,0.478046,,0.140976,-3.540000,...,A6--In--bct,3,0,3,0,3,0,1.000000e-05,Cu,0.0
8,0.636311,0.374052,0.586643,0.636311,0.586643,0.294494,0.361845,0.361845,0.131129,-2.815877,...,A7--alpha-As,3,0,3,0,3,0,1.000000e-05,Cu,0.0
9,1.728156,0.116316,,,,,-0.151273,,,-3.111667,...,Ah--alpha-Po--sc,3,0,3,0,3,0,1.000000e-05,Cu,0.0


### 1.4 Check errors

In [9]:
if 'error' in raw_data:
    for error in np.unique(raw_data[pd.notnull(raw_data.error)].error):
        print error

Traceback (most recent call last):
  File "calc_refine_structure_static.py", line 213, in <module>
    main(*sys.argv[1:])    
  File "calc_refine_structure_static.py", line 30, in main
    input_dict['initial_system'], 
  File "c:\users\lmh1\documents\python-packages\datamodeldict\DataModelDict.py", line 58, in __getitem__
    return OrderedDict.__getitem__(self, key)
KeyError: 'initial_system'

Traceback (most recent call last):
  File "calc_refine_structure_static.py", line 213, in <module>
    main(*sys.argv[1:])    
  File "calc_refine_structure_static.py", line 36, in main
    delta = input_dict['strain_range'])
  File "calc_refine_structure_static.py", line 108, in quick_a_Cij
    raise RuntimeError('Failed to converge after 100 cycles')
RuntimeError: Failed to converge after 100 cycles

Traceback (most recent call last):
  File "calc_refine_structure_static.py", line 213, in <module>
    main(*sys.argv[1:])    
  File "calc_refine_structure_static.py", line 3

## 2. Processed Data

This section reads in or generates data that has been processed from raw_data. 

- Simulations are excluded based on an ignore_list

- Data columns are added/excluded/sorted based on a list

- Values are converted to appropriate units

- Column headers changed slightly

### 2.1 Parameters

__data_csv__ gives the location of the csv file for the processed data.

In [10]:
data_csv =   os.path.join(lib_directory, 'refine_structure_static.csv')

__ignore_txt__ gives the location of the file containing ignore_list, which is a list of simulations by key to exclude.

In [11]:
ignore_txt = os.path.join(lib_directory, 'refine_structure_static_ignore.txt')

__build_ignore_list__ indicates if ignore_list should be built based on values or read in from ignore_file. 

- = True -- Generate ignore_list from values of raw_data and save to ignore_file

- = False -- Read data in from ignore_file

In [12]:
build_ignore_list = True

__build_data__ indicates if data is built from raw_data and the ignore list, or read in from data_csv.
    
- = True -- Process raw_data and save to data_csv

- = False -- Read data in from data_csv

In [13]:
build_data = True

### 2.2 Data conversion parameters

__headers__ gives the list of data columns from raw_data to include in and how they should be renamed in data.

In [14]:
headers = OrderedDict([
        ('calc_key',    'key'),
        ('pot_id',      'potential'),
        ('prototype',   'prototype'),
        ('composition', 'composition'),
        ('a',           'a (A)'),
        ('b',           'b (A)'),
        ('c',           'c (A)'),
        ('E_cohesive',  'Ecoh (eV)'),
        ('C11',         'C11 (GPa)'),
        ('C22',         'C22 (GPa)'),
        ('C33',         'C33 (GPa)'),
        ('C12',         'C12 (GPa)'),
        ('C13',         'C13 (GPa)'),
        ('C23',         'C23 (GPa)'),
        ('C44',         'C44 (GPa)'),
        ('C55',         'C55 (GPa)'),
        ('C66',         'C66 (GPa)') ])

__units__ specifies the units that any numerical values should be converted to.

In [15]:
units = {'a (A)':     'angstrom',
         'b (A)':     'angstrom',
         'c (A)':     'angstrom',
         'Ecoh (eV)': 'eV',
         'C11 (GPa)': 'GPa',
         'C22 (GPa)': 'GPa',
         'C33 (GPa)': 'GPa',
         'C12 (GPa)': 'GPa',
         'C13 (GPa)': 'GPa',
         'C23 (GPa)': 'GPa',
         'C44 (GPa)': 'GPa',
         'C55 (GPa)': 'GPa',
         'C66 (GPa)': 'GPa'}

### 2.3. Code

Create ignore_list

In [16]:
if build_ignore_list:
    
    #Add simulations with errors to ignore_list
    ignore_list = list(raw_data.calc_key[~pd.isnull(raw_data.error)])
    
    #Add false compounds to ignore_list
    ignore_list.extend(raw_data.calc_key[raw_data['symbols'].apply(lambda x: len(np.unique(x.split('-')))) != 
                       raw_data['symbols'].apply(lambda x: len(x.split('-')))])
    
    #Add bct to ignore_list as it usually relaxes 
    #ignore_list.extend(raw_data.calc_key[raw_data['prototype'] == 'A6--In--bct'])    
    
    #Add duplicate compounds with same lattice parameter to ignore_list
    for i in xrange(len(raw_data)):
        trunc = raw_data.iloc[i+1:]
        matches = list(trunc[(trunc.pot_id ==    raw_data.iloc[i].pot_id) & 
                             (trunc.prototype == raw_data.iloc[i].prototype) &
                             (np.isclose(trunc.a, raw_data.iloc[i].a))].calc_key)
        ignore_list.extend(matches)
    
    #Save ignore_list to ignore_txt
    ignore_list = np.unique(ignore_list)
    with open(ignore_txt, 'w') as f:
        f.write('\n'.join(ignore_list))
        
else:
    with open(ignore_txt) as f:
        ignore_list = f.read().split()

Process data

In [17]:
if build_data:
    #Extract only columns listed by headers' keys
    data = pd.DataFrame(raw_data, columns=headers.keys())
    
    #Rename according to headers' values
    data.rename(columns=headers, inplace=True) 
    
    #Remove entries with simulation keys in the ignore list
    data = data[~data.key.isin(ignore_list)]
    data.reset_index(drop=True, inplace=True)

    #Perform unit conversions
    for column, unit in units.iteritems():
        data[column] = uc.get_in_units(data[column], unit)
    
    data.to_csv(data_csv, index=False, float_format='%.13g')

else:
    data = pd.read_csv(data_csv)

List remaining duplicate compounds for manual addition to ignore_list. This is necessary as some potentials have multiple energy minimums.

In [18]:
#Add duplicate compounds with same lattice parameter to ignore_list
print "Remaining duplicate compounds:"
for i in xrange(len(data)):
    trunc = data.iloc[i+1:]
    matches = trunc[(trunc.potential ==   data.iloc[i].potential) & 
                    (trunc.prototype ==   data.iloc[i].prototype) &
                    (trunc.composition == data.iloc[i].composition)]
    if len(matches) > 1:
        print data.iloc[i].potential, data.iloc[i].prototype, data.iloc[i].composition

Remaining duplicate compounds:
1996--Farkas-D--Nb-Ti-Al B2--CsCl AlNb
1996--Farkas-D--Nb-Ti-Al B2--CsCl AlNb
1996--Farkas-D--Nb-Ti-Al B2--CsCl AlNb
1996--Farkas-D--Nb-Ti-Al D0_3--BiF3 AlNb3
1996--Farkas-D--Nb-Ti-Al A15--Cr3Si Al3Nb
1996--Farkas-D--Nb-Ti-Al C1--CaF2--fluorite Al2Nb
1996--Farkas-D--Nb-Ti-Al B1--NaCl--rock-salt NbTi
1996--Farkas-D--Nb-Ti-Al L2_1--AlCu2Mn--heusler Al2NbTi
2003--Hoyt-J-J--Cu-Pb A15--Cr3Si Cu3Pb
2008--Fortini-A--Ru A1--Cu--fcc Ru
2008--Fortini-A--Ru A3'--alpha-La--double-hcp Ru
2008--Hepburn-D-J--Fe-C D0_3--BiF3 C3Fe
2008--Hepburn-D-J--Fe-C L1_2--AuCu3 C3Fe
2010--Fellinger-M-R--Nb A15--beta-W Nb
2010--Fellinger-M-R--Nb A4--C--dc Nb
2012--Jelinek-B--Al-Si-Mg-Cu-Fe A15--Cr3Si FeS3MgS
2012--Park-H--Mo Ah--alpha-Po--sc Mo
2012--Schopf-D--Al-Mn-Pd A15--Cr3Si AlMn3
2012--Schopf-D--Al-Mn-Pd B1--NaCl--rock-salt AlMn
2012--Schopf-D--Al-Mn-Pd B2--CsCl AlMn
2012--Schopf-D--Al-Mn-Pd C1--CaF2--fluorite AlMn2
2012--Schopf-D--Al-Mn-Pd D0_3--BiF3 AlMn3
2012--Schopf-D--Al-Mn

Show data

In [19]:
data

Unnamed: 0,key,potential,prototype,composition,a (A),b (A),c (A),Ecoh (eV),C11 (GPa),C22 (GPa),C33 (GPa),C12 (GPa),C13 (GPa),C23 (GPa),C44 (GPa),C55 (GPa),C66 (GPa)
0,2dcfc082-4adc-4b15-96d6-ac9b5bbaa5ab,1985--Foiles-S-M--Ni-Cu,A1--Cu--fcc,Cu,3.615000,,,-3.540000,168.642010,,,123.468353,,,76.591354,,
1,5178ce1a-1080-4b88-b030-0eaa7d81dcc7,1985--Foiles-S-M--Ni-Cu,A15--beta-W,Cu,4.588822,,,-3.457837,260.755619,,,68.718957,,,22.008235,,
2,b2310d32-ee3f-43b1-924c-0a61722d9124,1985--Foiles-S-M--Ni-Cu,A2--W--bcc,Cu,2.871624,,,-3.510146,135.417024,,,138.613748,,,91.200248,,
3,e81ade54-e4e1-48bc-a4e4-374e6db56699,1985--Foiles-S-M--Ni-Cu,A3'--alpha-La--double-hcp,Cu,2.556501,4.427990,8.338002,-3.538797,222.643662,222.643797,240.490114,105.498651,87.832872,87.832713,40.709868,40.710026,58.572604
4,591d2559-3a5f-41d8-9558-6488df0f8b6c,1985--Foiles-S-M--Ni-Cu,A3--Mg--hcp,Cu,2.556820,4.428544,4.163712,-3.537620,222.631128,222.630853,240.313316,105.525319,88.198848,88.199166,40.830658,40.830344,58.552706
5,fa5b020f-07d9-4dc9-b32d-35668f985ed2,1985--Foiles-S-M--Ni-Cu,A4--C--dc,Cu,5.361341,,,-2.502544,47.012738,,,67.851216,,,37.542592,,
6,bb3d8234-8d37-4184-a8fc-48b268d43056,1985--Foiles-S-M--Ni-Cu,A5--beta-Sn,Cu,4.616142,,2.435015,-3.242210,275.569232,,267.716051,17.354756,48.013228,,1.957825,,-26.593586
7,62d288fc-7d08-424c-94cc-7bf08074c369,1985--Foiles-S-M--Ni-Cu,A6--In--bct,Cu,2.556191,,3.615000,-3.540000,222.646535,,168.642010,69.463828,123.468353,,76.591354,,22.586828
8,21c11afb-6f06-4a14-8367-bc777d36bab4,1985--Foiles-S-M--Ni-Cu,A7--alpha-As,Cu,3.239125,5.610329,10.212297,-2.815877,101.948226,101.948226,47.183114,59.929780,93.990567,93.990567,57.973902,57.973902,21.009223
9,79560086-0971-4c82-9004-014eb6b39b6b,1985--Foiles-S-M--Ni-Cu,Ah--alpha-Po--sc,Cu,2.393158,,,-3.111667,276.881144,,,18.635907,,,-24.236673,,


## 3. Symmetry Operations



__crystal_family__ specifies the crystal family for each prototype. This is used in checking for transformations to other structures and averaging equivalent values.

In [20]:
crystal_family = {
    #elemental
    'A1--Cu--fcc':                'cubic',
    'A2--W--bcc':                 'cubic',
    'A3--Mg--hcp':                'hexagonal',
    'A3\'--alpha-La--double-hcp': 'hexagonal',
    'A4--C--dc':                  'cubic',
    'A5--beta-Sn':                'tetragonal',
    'A6--In--bct':                'tetragonal',
    'A7--alpha-As':               'hexagonal',
    'A15--beta-W':                'cubic',
    'Ah--alpha-Po--sc':           'cubic',
   #1:1
    'B1--NaCl--rock-salt':        'cubic',
    'B2--CsCl':                   'cubic',
    'B3--ZnS--cubic-zinc-blende': 'cubic',
    'L1_0--AuCu':                 'tetragonal',
   #1:2
    'C1--CaF2--fluorite':         'cubic',
   #1:3
    'A15--Cr3Si':                 'cubic',
    'D0_3--BiF3':                 'cubic',
    'L1_2--AuCu3':                'cubic',
   #1:1:2
    'L2_1--AlCu2Mn--heusler':     'cubic'
}

__symmetry_conditions__ lists conditions when a prototype becomes equivalent to another prototype with higher symmetry.

In [21]:
symmetry_conditions = {
    'A6--In--bct': [ 
        {
            'c/a': 2**(0.5)
        }
    ]
}

In [22]:
old_data = deepcopy(data)

In [23]:
for i in xrange(len(data)):
    family = crystal_family[data.loc[i, 'prototype']]  
     
    if family == 'cubic':
        ave_a =   np.nanmean([data.loc[i, 'a (A)'],     data.loc[i, 'b (A)'],     data.loc[i, 'c (A)']])
        ave_C11 = np.nanmean([data.loc[i, 'C11 (GPa)'], data.loc[i, 'C22 (GPa)'], data.loc[i, 'C33 (GPa)']])
        ave_C12 = np.nanmean([data.loc[i, 'C12 (GPa)'], data.loc[i, 'C13 (GPa)'], data.loc[i, 'C23 (GPa)']])
        ave_C44 = np.nanmean([data.loc[i, 'C44 (GPa)'], data.loc[i, 'C55 (GPa)'], data.loc[i, 'C66 (GPa)']])
        
        if (np.isclose(data.loc[i, 'a (A)'],     ave_a)   and 
            np.isclose(data.loc[i, 'C11 (GPa)'], ave_C11) and 
            np.isclose(data.loc[i, 'C12 (GPa)'], ave_C12) and  
            np.isclose(data.loc[i, 'C44 (GPa)'], ave_C44)):
            
            data.loc[i, 'a (A)'] = ave_a
            data.loc[i, 'C11 (GPa)'] = ave_C11
            data.loc[i, 'C12 (GPa)'] = ave_C12
            data.loc[i, 'C44 (GPa)'] = ave_C44
            
            
            data.loc[i, 'b (A)'] =     data.loc[i, 'c (A)'] =     np.nan
            data.loc[i, 'C22 (GPa)'] = data.loc[i, 'C33 (GPa)'] = np.nan
            data.loc[i, 'C13 (GPa)'] = data.loc[i, 'C23 (GPa)'] = np.nan
            data.loc[i, 'C55 (GPa)'] = data.loc[i, 'C66 (GPa)'] = np.nan
        else:
            data.loc[i, 'a (A)'] = np.nan
            
    elif family == 'hexagonal':
        ave_a =   np.nanmean([data.loc[i, 'a (A)'],     data.loc[i, 'b (A)'] / 3.**0.5])
        ave_C11 = np.nanmean([data.loc[i, 'C11 (GPa)'], data.loc[i, 'C22 (GPa)']])
        ave_C12 = np.nanmean([data.loc[i, 'C12 (GPa)'], (data.loc[i, 'C11 (GPa)'] - 2*data.loc[i, 'C66 (GPa)'])])        
        ave_C13 = np.nanmean([data.loc[i, 'C13 (GPa)'], data.loc[i, 'C23 (GPa)']])        
        ave_C44 = np.nanmean([data.loc[i, 'C44 (GPa)'], data.loc[i, 'C55 (GPa)']])
        
        if (np.isclose(data.loc[i, 'a (A)'],     ave_a)   and 
            np.isclose(data.loc[i, 'C11 (GPa)'], ave_C11) and 
            np.isclose(data.loc[i, 'C12 (GPa)'], ave_C12) and
            np.isclose(data.loc[i, 'C13 (GPa)'], ave_C13) and
            np.isclose(data.loc[i, 'C44 (GPa)'], ave_C44) and
            pd.notnull(data.loc[i, 'c (A)']) and
            pd.notnull(data.loc[i, 'C33 (GPa)'])):
            
            data.loc[i, 'a (A)'] =     ave_a
            data.loc[i, 'C11 (GPa)'] = ave_C11
            data.loc[i, 'C12 (GPa)'] = ave_C12
            data.loc[i, 'C13 (GPa)'] = ave_C13
            data.loc[i, 'C44 (GPa)'] = ave_C44
            
            data.loc[i, 'b (A)'] =     np.nan
            data.loc[i, 'C22 (GPa)'] = np.nan
            data.loc[i, 'C23 (GPa)'] = np.nan
            data.loc[i, 'C55 (GPa)'] = data.loc[i, 'C66 (GPa)'] = np.nan
        else:
            data.loc[i, 'a (A)'] = np.nan
            
    
    elif family == 'tetragonal':
        ave_a =   np.nanmean([data.loc[i, 'a (A)'],     data.loc[i, 'b (A)']])
        ave_C11 = np.nanmean([data.loc[i, 'C11 (GPa)'], data.loc[i, 'C22 (GPa)']])
        ave_C13 = np.nanmean([data.loc[i, 'C13 (GPa)'], data.loc[i, 'C23 (GPa)']])
        ave_C44 = np.nanmean([data.loc[i, 'C44 (GPa)'], data.loc[i, 'C55 (GPa)']])
        
        if (np.isclose(data.loc[i, 'a (A)'],     ave_a)   and 
            np.isclose(data.loc[i, 'C11 (GPa)'], ave_C11) and 
            np.isclose(data.loc[i, 'C13 (GPa)'], ave_C13) and
            np.isclose(data.loc[i, 'C44 (GPa)'], ave_C44) and
            pd.notnull(data.loc[i, 'c (A)']) and
            pd.notnull(data.loc[i, 'C33 (GPa)']) and
            pd.notnull(data.loc[i, 'C12 (GPa)']) and
            pd.notnull(data.loc[i, 'C66 (GPa)']) and
            not np.isclose(data.loc[i, 'a (A)'], data.loc[i, 'c (A)'])):
            
            data.loc[i, 'a (A)'] =     ave_a
            data.loc[i, 'C11 (GPa)'] = ave_C11
            data.loc[i, 'C13 (GPa)'] = ave_C13
            data.loc[i, 'C44 (GPa)'] = ave_C44
            
            data.loc[i, 'b (A)'] =     np.nan
            data.loc[i, 'C22 (GPa)'] = np.nan
            data.loc[i, 'C23 (GPa)'] = np.nan
            data.loc[i, 'C55 (GPa)'] = np.nan
        else:
            data.loc[i, 'a (A)'] = np.nan
     
    
    
data = data[pd.notnull(data['a (A)'])]        
data.reset_index(drop=True, inplace=True)        
    



In [24]:
data

Unnamed: 0,key,potential,prototype,composition,a (A),b (A),c (A),Ecoh (eV),C11 (GPa),C22 (GPa),C33 (GPa),C12 (GPa),C13 (GPa),C23 (GPa),C44 (GPa),C55 (GPa),C66 (GPa)
0,2dcfc082-4adc-4b15-96d6-ac9b5bbaa5ab,1985--Foiles-S-M--Ni-Cu,A1--Cu--fcc,Cu,3.615000,,,-3.540000,168.642010,,,123.468353,,,76.591354,,
1,5178ce1a-1080-4b88-b030-0eaa7d81dcc7,1985--Foiles-S-M--Ni-Cu,A15--beta-W,Cu,4.588822,,,-3.457837,260.755619,,,68.718957,,,22.008235,,
2,b2310d32-ee3f-43b1-924c-0a61722d9124,1985--Foiles-S-M--Ni-Cu,A2--W--bcc,Cu,2.871624,,,-3.510146,135.417024,,,138.613748,,,91.200248,,
3,e81ade54-e4e1-48bc-a4e4-374e6db56699,1985--Foiles-S-M--Ni-Cu,A3'--alpha-La--double-hcp,Cu,2.556501,,8.338002,-3.538797,222.643730,,240.490114,105.498553,87.832793,,40.709947,,
4,591d2559-3a5f-41d8-9558-6488df0f8b6c,1985--Foiles-S-M--Ni-Cu,A3--Mg--hcp,Cu,2.556821,,4.163712,-3.537620,222.630991,,240.313316,105.525518,88.199007,,40.830501,,
5,fa5b020f-07d9-4dc9-b32d-35668f985ed2,1985--Foiles-S-M--Ni-Cu,A4--C--dc,Cu,5.361341,,,-2.502544,47.012738,,,67.851216,,,37.542592,,
6,bb3d8234-8d37-4184-a8fc-48b268d43056,1985--Foiles-S-M--Ni-Cu,A5--beta-Sn,Cu,4.616142,,2.435015,-3.242210,275.569232,,267.716051,17.354756,48.013228,,1.957825,,-26.593586
7,62d288fc-7d08-424c-94cc-7bf08074c369,1985--Foiles-S-M--Ni-Cu,A6--In--bct,Cu,2.556191,,3.615000,-3.540000,222.646535,,168.642010,69.463828,123.468353,,76.591354,,22.586828
8,21c11afb-6f06-4a14-8367-bc777d36bab4,1985--Foiles-S-M--Ni-Cu,A7--alpha-As,Cu,3.239125,,10.212297,-2.815877,101.948226,,47.183114,59.929780,93.990567,,57.973902,,
9,79560086-0971-4c82-9004-014eb6b39b6b,1985--Foiles-S-M--Ni-Cu,Ah--alpha-Po--sc,Cu,2.393158,,,-3.111667,276.881144,,,18.635907,,,-24.236673,,


In [25]:
for i in xrange(len(data)):
    if data.loc[i, 'prototype'] in symmetry_conditions:
        for condition in symmetry_conditions[data.loc[i, 'prototype']]:
            match = False
            if 'c/a' in condition:
                c_a = data.loc[i, 'c (A)'] / data.loc[i, 'a (A)']
                if np.isclose(c_a, condition['c/a']):
                    match = True
            if match:                    
                data.loc[i, 'a (A)'] = np.nan    
                break
                
data = data[pd.notnull(data['a (A)'])]
data.reset_index(drop=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [26]:
data

Unnamed: 0,key,potential,prototype,composition,a (A),b (A),c (A),Ecoh (eV),C11 (GPa),C22 (GPa),C33 (GPa),C12 (GPa),C13 (GPa),C23 (GPa),C44 (GPa),C55 (GPa),C66 (GPa)
0,2dcfc082-4adc-4b15-96d6-ac9b5bbaa5ab,1985--Foiles-S-M--Ni-Cu,A1--Cu--fcc,Cu,3.615000,,,-3.540000,168.642010,,,123.468353,,,76.591354,,
1,5178ce1a-1080-4b88-b030-0eaa7d81dcc7,1985--Foiles-S-M--Ni-Cu,A15--beta-W,Cu,4.588822,,,-3.457837,260.755619,,,68.718957,,,22.008235,,
2,b2310d32-ee3f-43b1-924c-0a61722d9124,1985--Foiles-S-M--Ni-Cu,A2--W--bcc,Cu,2.871624,,,-3.510146,135.417024,,,138.613748,,,91.200248,,
3,e81ade54-e4e1-48bc-a4e4-374e6db56699,1985--Foiles-S-M--Ni-Cu,A3'--alpha-La--double-hcp,Cu,2.556501,,8.338002,-3.538797,222.643730,,240.490114,105.498553,87.832793,,40.709947,,
4,591d2559-3a5f-41d8-9558-6488df0f8b6c,1985--Foiles-S-M--Ni-Cu,A3--Mg--hcp,Cu,2.556821,,4.163712,-3.537620,222.630991,,240.313316,105.525518,88.199007,,40.830501,,
5,fa5b020f-07d9-4dc9-b32d-35668f985ed2,1985--Foiles-S-M--Ni-Cu,A4--C--dc,Cu,5.361341,,,-2.502544,47.012738,,,67.851216,,,37.542592,,
6,bb3d8234-8d37-4184-a8fc-48b268d43056,1985--Foiles-S-M--Ni-Cu,A5--beta-Sn,Cu,4.616142,,2.435015,-3.242210,275.569232,,267.716051,17.354756,48.013228,,1.957825,,-26.593586
7,21c11afb-6f06-4a14-8367-bc777d36bab4,1985--Foiles-S-M--Ni-Cu,A7--alpha-As,Cu,3.239125,,10.212297,-2.815877,101.948226,,47.183114,59.929780,93.990567,,57.973902,,
8,79560086-0971-4c82-9004-014eb6b39b6b,1985--Foiles-S-M--Ni-Cu,Ah--alpha-Po--sc,Cu,2.393158,,,-3.111667,276.881144,,,18.635907,,,-24.236673,,
9,f749f0db-2226-4b74-9d38-63fa1bf5fa57,1985--Foiles-S-M--Ni-Cu,A15--Cr3Si,CuNi3,4.521382,,,-4.066935,387.774115,,,101.351887,,,32.222742,,


In [27]:
for prototype in np.unique(data.prototype):
    print prototype

A1--Cu--fcc
A15--Cr3Si
A15--beta-W
A2--W--bcc
A3'--alpha-La--double-hcp
A3--Mg--hcp
A4--C--dc
A5--beta-Sn
A6--In--bct
A7--alpha-As
Ah--alpha-Po--sc
B1--NaCl--rock-salt
B2--CsCl
B3--ZnS--cubic-zinc-blende
C1--CaF2--fluorite
D0_3--BiF3
L1_0--AuCu
L1_2--AuCu3
L2_1--AlCu2Mn--heusler


## 3. HTML Tables

This section takes the processed data and generates per_potential html tables.

### 3.1 Parameters

In [28]:
per_potential_directory = 'C:\\Users\\lmh1\\Documents\\website\\per_potential'

### 3.2 Data conversion parameters

__headers__ gives the list of data columns from data to include in and how they should be renamed in html_data.

In [29]:
headers = OrderedDict([
        ('prototype',   'prototype'),
        ('Ecoh (eV)',   '<i>E</i><sub>coh</sub> (eV)'),
        ('a (A)',       '<i>a</i><sub>0</sub> (&Aring;)'),
        ('b (A)',       '<i>b</i><sub>0</sub> (&Aring;)'),
        ('c (A)',       '<i>c</i><sub>0</sub> (&Aring;)'),
        ('C11 (GPa)',   '<i>C</i><sub>11</sub> (GPa)'),
        ('C22 (GPa)',   '<i>C</i><sub>22</sub> (GPa)'),
        ('C33 (GPa)',   '<i>C</i><sub>33</sub> (GPa)'),
        ('C12 (GPa)',   '<i>C</i><sub>12</sub> (GPa)'),
        ('C13 (GPa)',   '<i>C</i><sub>13</sub> (GPa)'),
        ('C23 (GPa)',   '<i>C</i><sub>23</sub> (GPa)'),
        ('C44 (GPa)',   '<i>C</i><sub>44</sub> (GPa)'),
        ('C55 (GPa)',   '<i>C</i><sub>55</sub> (GPa)'),
        ('C66 (GPa)',   '<i>C</i><sub>66</sub> (GPa)') ])

__formating__ gives the c-style print format to use for the indivdual floating point terms

In [30]:
l_const_format = '{:.4f}'
eng_coh_format = '{:.4f}'
e_const_format = '{:.2f}'

def formatter(style, value):
    if pd.notnull(value):
        return style.format(value)
    else:
        return ''

formatters = {'<i>E</i><sub>coh</sub> (eV)':    lambda x: formatter(eng_coh_format, x),
              '<i>a</i><sub>0</sub> (&Aring;)': lambda x: formatter(l_const_format, x),
              '<i>b</i><sub>0</sub> (&Aring;)': lambda x: formatter(l_const_format, x),
              '<i>c</i><sub>0</sub> (&Aring;)': lambda x: formatter(l_const_format, x),
              '<i>C</i><sub>11</sub> (GPa)':    lambda x: formatter(e_const_format, x),
              '<i>C</i><sub>22</sub> (GPa)':    lambda x: formatter(e_const_format, x),
              '<i>C</i><sub>33</sub> (GPa)':    lambda x: formatter(e_const_format, x),
              '<i>C</i><sub>12</sub> (GPa)':    lambda x: formatter(e_const_format, x),
              '<i>C</i><sub>13</sub> (GPa)':    lambda x: formatter(e_const_format, x),
              '<i>C</i><sub>23</sub> (GPa)':    lambda x: formatter(e_const_format, x),
              '<i>C</i><sub>44</sub> (GPa)':    lambda x: formatter(e_const_format, x),
              '<i>C</i><sub>55</sub> (GPa)':    lambda x: formatter(e_const_format, x),
              '<i>C</i><sub>66</sub> (GPa)':    lambda x: formatter(e_const_format, x)}

### 3.3 Other HTML content 

Here is where additional content of the resulting html file is collected.

In [31]:
html_style = """
<style>
    .datatable {
        border: 1px solid black; 
        border-collapse: collapse; 
        padding: 5px; 
        text-align: right;

    } 
    .datatable td {
        border: 1px solid black; 
        border-collapse: collapse; 
        font: "Courier New", monospace; 
        font-size: 12px; 
        padding: 5px; 
        text-align: right;
        width: 45px;
    }
    .datatable td:nth-child(1) {
        width: 135px;
        text-align: left;
    }
    .datatable th {
        border: 1px solid black; 
        border-collapse: collapse; 
        font: "Courier New", monospace; 
        font-size: 12px; 
        padding: 5px; 
        text-align: left;
    }
</style>
"""

In [40]:
html_info = """
<h2>Static Crystal Structure Predictions</h2>
<p>
    The properties listed here are obtained from static calculations for given 
    crystal structures. The values were obtained using an algorithm that takes 
    an initial estimate for the lattice constants and evaluates the cohesive 
    energy and virial pressures for the structure. Elastic constants are calculated 
    using the changes in the virial pressures due to the application of small strains 
    (1e-5). The pressure values and elastic compliances are used to obtain a new lattice 
    parameter guess by linearly extrapolating to zero pressure. This process is repeated 
    until the lattice constants from one iteration to the next are within a relative 
    tolerance of 1e-10. The elastic constants shown coincide with the final iteration.
</p><p>
    Initial estimates for the lattice constants correspond to all the energy minima 
    identified in the cohesive energy vs interatomic spacing plots. This means that 
    it is possible that some potentials have multiple refined results for the same 
    crystal structure. Having multiple energy minimums for a structure does not 
    necessarily make the potential 'bad' as unwanted configurations may be unstable or 
    correspond to conditions that may not be relevant to the problem of interest 
    (eg. very high strains).
</p><p>
    More information about the calculation used can be found on the 
    <a href="http://www.ctcms.nist.gov/potentials/tools.html">Tools</a> page.
</p><p>
    <a href="http://www.nist.gov/public_affairs/disclaimer.cfm">NIST disclaimer</a>
</p><p>
    <b>Disclaimer:</b> These values are meant to be guidelines for comparing 
    potentials, not the absolute values for any potential's properties. The 
    presence of any structures in this list does not guarantee that those 
    structures are stable as only the box dimensions are changed, not the 
    relative positions of the atoms in the cell. Also, the lowest energy 
    structure may not be included in this list. Variations in the values may 
    occur for fully relaxed configurations, different small strain values, 
    different simulation software and different implementations of the 
    interatomic potential. The algorithm used works best when the interatomic 
    potential's elastic constants vary smoothly with changes in volume.
</p><p>
    <b>Version Information:</b> As property calculation methods are developed and updated, there 
    may be changes in the calculated values. Updates to the calculation methods 
    that affect the values will be documented and archival versions of this page 
    will be made available as a record. 
    <ul><li>
        2016-09-28. Values for simple compounds added. All identified energy minima 
        for each structure are listed. The existing elemental data was regenerated. Most values are 
        consistent with before, but some differences have been noted. Specifically, variations are 
        seen with some values for potentials where the elastic constants don't vary smoothly near 
        the equilibrium state. Additionally, the inclusion of some high-energy structures has 
        changed based on new criteria for identifying when structures have relaxed to another structure.        
    </li><li>
        2016-04-07. Values for elemental crystal structures added. Only values for the 
        global energy minimum of each unique structure given.
    </li></ul>
</p>
<hr/>
"""

In [38]:
html_note = '*<i>Multiple values for the same structure are due to multiple energy minima. More information in calculation description.</i>'

### 3.4 Code

In [41]:
for potential in np.unique(data.potential):
    potential_data = data[data.potential==potential]
    
    html = html_style + html_info
    
    #Check that a directory exists for the potential
    if not os.path.isdir(os.path.join(per_potential_directory, potential)):
        os.makedirs(os.path.join(per_potential_directory, potential))
    
    for composition in np.unique(potential_data.composition):
        composition_data = potential_data[potential_data.composition == composition]
        html += '<h3>0K Crystal Structure Properties for ' + composition + '</h3>\n'
        
        html_data = pd.DataFrame(potential_data[potential_data.composition==composition], columns=headers.keys())
        html_data.rename(columns=headers, inplace=True)
        html_data.reset_index(drop=True, inplace=True)
        
        prototypes, pcounts = np.unique(html_data.prototype, return_counts=True)
        note = False
        for prototype, pcount in zip(prototypes, pcounts):
            if pcount > 1:
                print potential, composition, prototype
                html_data.prototype.loc[html_data.prototype==prototype] = prototype+'*'
                note = True
        
        html_data.sort_values('<i>E</i><sub>coh</sub> (eV)', inplace=True)
        
        html += html_data.to_html(index=False, escape=False, formatters=formatters, classes='datatable') +'\n'
        
        if note:
            html += html_note
        html += '<hr/>\n'
        
    with open(os.path.join(per_potential_directory, potential, 'struct.info'), 'w') as html_file:
        html_file.write(html)

1987--Ackland-G-J--Cu Cu A4--C--dc
1987--Ackland-G-J--Mo Mo A1--Cu--fcc
1996--Farkas-D--Nb-Ti-Al Al2Nb C1--CaF2--fluorite
1996--Farkas-D--Nb-Ti-Al Al2NbTi L2_1--AlCu2Mn--heusler
1996--Farkas-D--Nb-Ti-Al Al3Nb A15--Cr3Si
1996--Farkas-D--Nb-Ti-Al Al3Nb D0_3--BiF3
1996--Farkas-D--Nb-Ti-Al Al3Nb L1_2--AuCu3
1996--Farkas-D--Nb-Ti-Al AlNb B2--CsCl
1996--Farkas-D--Nb-Ti-Al AlNb L1_0--AuCu
1996--Farkas-D--Nb-Ti-Al AlNb2 C1--CaF2--fluorite
1996--Farkas-D--Nb-Ti-Al AlNb3 D0_3--BiF3
1996--Farkas-D--Nb-Ti-Al AlNb3 L1_2--AuCu3
1996--Farkas-D--Nb-Ti-Al Nb A1--Cu--fcc
1996--Farkas-D--Nb-Ti-Al Nb A2--W--bcc
1996--Farkas-D--Nb-Ti-Al Nb A4--C--dc
1996--Farkas-D--Nb-Ti-Al Nb2Ti C1--CaF2--fluorite
1996--Farkas-D--Nb-Ti-Al Nb3Ti D0_3--BiF3
1996--Farkas-D--Nb-Ti-Al NbTi B1--NaCl--rock-salt
1996--Farkas-D--Nb-Ti-Al NbTi B2--CsCl
1996--Farkas-D--Nb-Ti-Al NbTi B3--ZnS--cubic-zinc-blende
1996--Farkas-D--Nb-Ti-Al NbTi2 C1--CaF2--fluorite
1996--Farkas-D--Nb-Ti-Al NbTi3 A15--Cr3Si
1996--Farkas-D--Nb-Ti-Al NbTi3 L1

## 4. Comparison Plots