In [1]:
import json
# This loads the decoder table file into a dict for use within the code
with open('decoder_table.json') as json_file:
    decoder_table = json.load(json_file)

In [2]:
# This code is written for TRC Alloys in order to pull alloy or pure metal data directly from the API. This was written by Bryan Dickson using python version 3.9.6, 
# and if you have any advice or suggestions to improve the code I would like to hear it. 
# This includes how to structure the data or potential features that could be useful. Please message me at TRCalloy@nist.gov

# To use the code, there will need to be some particular packages installed on your computer to use, these include: pandas, numpy, itertools, and plotly
# The code should function as listed, unless adding or removing features, please only adjust variables listed for use. Formats for adjusting some parts are within documentation
# on the GitHub which examples are listed. 

import requests

# Define the headers to send to the API URI, these should not need to be changed
headers = {
    'content-type':'application/x-www-form-urlencoded;',
    'Access-Control-Allow-Origin':'*',
}

###################################################################################################
### EDIT SEARCH PARAMETERS BELOW THIS LISTED SECTION WITHIN THIS BLOCK ###
# Property, representation, and state codes should be provided within a decoder table and are necessary in order to use resource properly

#Compound is a list of one or more strings, be sure to use empirical formula i.e. "Au", not "Gold"
compound = ["Hg"]
#x-axis variable, use code for in decodre table, not name of the variable i.e. Temperature: "T"
var = "T"
#property attempting to measure, use code in decoder table, not name i.e. Electrical Resisitivity: "ER"
prop = "RSS"
#representation of data, use code in decoder table, i.e. Direct: "A", if no representation wanted, use bool False
rep = False
#state of matter for data, use code in decoder table, i.e. Crystal: "C", if all states wanted, use bool False
state = False
#whether or not you want to show error bars for points on the graph, True to show, False to hide
show_uncertainty = True
#what units you wish the output to take the form of as a string variable (many units should be available but some may not) MAKE SURE TO PUT THESE IN CORRECT UNITS OR CODE WILL NOT RUN!
#default units are left to use, but if you want to use a custom unit replace with a string value of the unit represention. eg: Farenheit = 'F'
des_un_var = decoder_table['UnitsStandard'][var]
des_un_prop = decoder_table['UnitsStandard'][prop]

# Define the search data JSON to send to the API URI
# For search parameters, searching by property is standard for this doc, different search parameters are offered within the API Guide on the GitHub
# A separate example searching by Author Last name is also provided and commented off for use.
search_data = {"property_search_code":prop}
# search_data = {"author_last_name":['Author_Last_Name']}

# Define the URL to send the request
# Note that this example does require an authentication key to get the data. The one given here is an invalid authkey and will need to be replaced by
# a valid authentication key which may be requested free of charge from TRCalloy@nist.gov

# This is text to read the key from your desktop to use to access the API
api_key = open('TRC-Alloys API Key.txt').read()

# This is the URL call, the only thing that should change is that your API key should be called here
url=f'https://trc.nist.gov/MetalsAlloyAPI/search?authkey={api_key}'

In [3]:
import re
# Get the compound data from a requests post, this is what calls the data from the API directly
compound_data_response = requests.post(url, json=search_data, headers=headers)

# The compound_data object is a "response" object and the actual output JSON is in the
# text field of that object. So use compound_data_response.text to use the actual output JSON
# and this is being written to your .json file which is being called later as well.
with open('Test.json', 'w') as fp:
    fp.write((re.sub(r'[^\x00-\x7f]',r'', compound_data_response.text)))

In [4]:
# This loads the .json file into a dict for use within the code
with open('Test.json') as json_file:
    data = json.load(json_file)

In [5]:
from collections import Counter
element_ids = []
temp_spec = []
spec_par = {} #dict which connects the parent specimen to the properties
spec_desc = {} #dict which connects the children specimens to their respective parent specimen

#This code runs through an initial loop through the .json to access the compound codes in order to call them later on, these are used to identify the elements
for a in compound:
    for b in data['TRC_data']:
        for c in b['compounds']:
            if c['formula_hill'] == a:
                if c['compound_id'] not in element_ids:
                    element_ids.append(c['compound_id'])

# This loop creates a set of dicts in order to identify the elements with parent specimens and their according properties
for a in compound:
    for b in data['TRC_data']:
        for d in b['specimens']:
            if (Counter(list(set(d['compound_ids']))) == Counter(element_ids)):
                if "parent_ids" in str(d):
                    for e in d['parent_ids']:
                        temp_spec.append(e)
                    spec_par[d['specimen_id']] = temp_spec
                    temp_spec = []
            if 'description' in str(d):
                spec_desc[d['specimen_id']] = f"{d['description']['initial']}"

In [11]:
import pandas as pd
import itertools
from conversion_bridge import conversion_bridge

temp_x = "N/A" # temporary value which houses variable ID for the variable
temp_y = "N/A" # temporary value which houses variable ID for the property
temp_o = "N/A" # temporary value which houses variable ID for the secondary variable
x_lab = decoder_table['PRP'][var] # axis label to name x
y_lab = decoder_table['PRP'][prop] # axis label to name y
state_dict = {} # dict which relates state codes to states of matter
dataset = [] # data which will be converted to the pandas framwork

# With the libraries used to do unit conversion, the output of the change is automatically output, there is no need to pay attention to these values.

if var == 'T': #Other sets a value for pressure or temperature for given data since those are the two most common variables for modification of a sample
    other = 'P'
elif var == 'P':
    other = 'T'
o_lab = decoder_table['PRP'][other]
o_un = decoder_table['UnitsStandard'][other]

for a in data['TRC_data']:
    for b in a['systems']: # system data tends to be the material compositions, so other aspects can vary within a system with each data set
        try: # some intermetallics not explicitly recognized, so the try is to catch data sets which do not have a compound
             # only enters if the metal or alloy is the same as listed, is not inclusive of all alloys that contain that element
            if (Counter(b['compound_ids']) == Counter(element_ids)): 
                for p in b['phases']:
                    state_dict[p['phase_id']] = p['type'] # this is where the ids and states are related to be called later
                for c in b['data_sets']:
                    x_axt = []
                    y_axt = []
                    o_axt = []
                    stat_id = []
                    y_unc = []
                    add_data = False
                    method = ""
                    for s in c['states']:
                        stat_id.append(state_dict[s['phase_id']]) # recognizes and lists phase(s) for a given data point
                        if (s['specimen_id'] in spec_par): # this section is for listing recognizing the descriptions and purities of specimen
                            if len(element_ids) > 1: # this is to go through the data if searching through an alloy
                                init_desc = []
                                pure = []
                                p_un = []
                                for t, u in itertools.zip_longest(spec_par[s['specimen_id']],range(len(element_ids)+1)):
                                    if t == None:
                                        temp_desc = t
                                        init_desc.append(t)
                                        pure.append(t)
                                    else:
                                        temp_desc = spec_desc[t]
                                        init_desc.append(spec_desc[t])
                                        temp_pure = ""
                                        for i in temp_desc:
                                            if i.isdigit() or i == ".":
                                                temp_pure = temp_pure + str(i)
                                        if "." in temp_pure:
                                            pure.append(float(temp_pure))
                                        elif temp_pure == '' or temp_pure == None:
                                            temp_pure = None
                                            pure.append(temp_pure)
                                        else:
                                            pure.append(int(temp_pure))
                                    if (temp_desc == None) or (temp_pure == None):
                                        p_un.append(None)
                                    elif 'weight %' in temp_desc:
                                        p_un.append('Weight %')
                                    elif 'mole %' in temp_desc:
                                        p_un.append('Mole %')
                                    else:
                                        p_un.append("Not Specified %")
                            else: # this is for identifyig description and purity if a pure element
                                init_desc = spec_desc[spec_par[s['specimen_id']][0]]
                                pure = ""
                                for i in init_desc:
                                    if i.isdigit() or i == ".":
                                        pure = pure + str(i)
                                if "." in pure:
                                    pure = float(pure)
                                elif pure == '':
                                    pure = None
                                else:
                                    pure = int(pure)
                                if 'weight %' in init_desc:
                                    p_un = 'Weight %'
                                elif 'mole %' in init_desc:
                                    p_un = 'Mole %'
                                elif pure == None:
                                    p_un = None
                                else:
                                    p_un = "Not Specified %"
                        else: # lists specimen ID for easy recognition if it was not properly added to the spec_par dict
                            init_desc = s['specimen_id'] 
                    for v in c['variables']: # this section of the code is designated to identifying the variable and property for use currently
                        if v['variable_name'] == prop:
                            temp_y = v['variable_id']
                            rep_u = v['representation']
                            if (str(rep)[0] == 'R') or (str(rep)[0] == 'X'):
                                y_un = '1'
                            else:
                                y_un = v['units']
                            if 'method' in str(v):
                                if str(v['method']).upper() in str(decoder_table['Method']):
                                    method = decoder_table['Method'][v['method'].upper()]
                                else:
                                    method = v['method']
                            else:
                                method = "N/A"
                            if "'reference_temperature':" in str(v):
                                ref_temp = v['reference_temperature']
                            else:
                                ref_temp = "N/A"
                        if v['variable_name'] == var:
                            temp_x = v['variable_id']
                            if v['units'] == 'D':
                                x_un = decoder_table['UnitsSpecial'][v['units']]
                            else:
                                x_un = v['units']
                        if v['variable_name'] == other:
                            temp_o = v['variable_id']
                    for d in c['data']: # goes through data tables and eventially data values
                        if (f"'variable_id': {temp_x}" in str(c['data'])) and (f"'variable_id': {temp_y}" in str(c['data'])): # only enters if both property and variable are compatible
                            if d['variable_id'] == temp_y:
                                for e in d['data_values']:
                                    if e['value'] == None:
                                        y_axt.append(None)
                                        y_unc.append(None)
                                    else:
                                        if "'uncertainty':" in str(e): # adds the uncertainty value for use in the data framework
                                            temp_data_y = conversion_bridge(e['value'], e['uncertainty'], prop, y_un, des_un_prop)
                                            y_axt.append(temp_data_y[1])
                                            y_unc.append(temp_data_y[2])
                                        else:
                                            temp_data_y = conversion_bridge(e['value'], 0, prop, y_un, des_un_prop)
                                            y_axt.append(temp_data_y[1])
                                            y_unc.append(None)
                            if d['variable_id'] == temp_x:
                                for f in d['data_values']:
                                    temp_data_x = conversion_bridge(f['value'], 0, var, x_un, des_un_var)
                                    x_axt.append(temp_data_x[1]) # these are the data values for the variable being added to the framework
                            if d['variable_id'] == temp_o:
                                for f in d['data_values']:
                                    o_axt.append(f['value'])
                            add_data = True # adds a check to make sure that the table knows to add current data set
                    if add_data == True:
                        for i,j,k,l in itertools.zip_longest(x_axt,y_axt,y_unc,o_axt):
                            if i == None:
                                i = x_axt[0]
                                # These are all of the individual and varying aspects being added to the pandas framework, not all are currently
                                # used but are present for use if wanted.
                            dataset.append({ 
                                f'{x_lab}: {des_un_var}': i, # x-axis variable values
                                f'{y_lab}: {des_un_prop}': j, # y-axis property values
                                'Uncertainty': k, # uncertainty for property
                                o_lab: f'{l} {o_un}',
                                'Ref Temp': ref_temp, # reference temperature for relative values (K)
                                'Data Set ID': str(c['data_set_id']), # Data Set ID for which each value belongs to
                                'State': stat_id, # encoded state(s) for each point
                                'Representation': rep_u, # encoded representation for each point
                                'Method': method, # method for which data was collected
                                'Description': init_desc, # initial description of data
                                'Purity': pure, # purity value for alloys, in list structure [Purity Element 1, Purity Element 2, ..., Purity Alloy]
                                'Purity Units': p_un, # format for purity, corresponds directly to each purity value i.e. weight percent, molar percent, not specified
                                'Year': a['citation']['year'] # year of paper's publication
                            })
        except KeyError:
            break

df = pd.DataFrame(dataset) # builds the pandas dataframe

In [12]:
import plotly.express as px
import math

x = []
y = []

n = 0
for a, b, c in zip(df['Representation'], df['State'], df[f'{y_lab}: {des_un_prop}']):
    if (a != rep) and (rep != False): # drops all data points which don't have wanted represenation
        df = df.drop(df.index[n], axis = 0)
        n -= 1
    elif (b != [state]) and (state != False): # drops all points which don't have the wanted state
        df = df.drop(df.index[n], axis = 0)
        n -= 1
    elif math.isnan(c): # drops all data points which have a None value in the property
        df = df.drop(df.index[n], axis = 0)
        n -= 1
    n += 1

if show_uncertainty: # plots graph with uncertainty
    fig = px.scatter(df, x=f'{x_lab}: {des_un_var}',y=f'{y_lab}: {des_un_prop}', title=str(compound), color='Data Set ID', hover_data=[df['Ref Temp'], df.State, df.Representation, df.Method, df.Description, df.Year], error_y='Uncertainty', height=600)
else: # plots graph without uncertainty
    fig = px.scatter(df, x=f'{x_lab}: {des_un_var}',y=f'{y_lab}: {des_un_prop}', title=str(compound), color='Data Set ID', hover_data=[df['Ref Temp'], df.State, df.Representation, df.Method, df.Description, df.Year], height=600)

fig.show()

Unnamed: 0,Temperature: K,Speed of sound: m/s,Uncertainty,Pressure,Ref Temp,Data Set ID,State,Representation,Method,Description,Purity,Purity Units,Year
0,275.05,1460.30,1.46030,None kPa,,2572,[L],A,Linear variable-path acoustic interferometer,not stated,,,1928
1,275.25,1460.50,1.46050,None kPa,,2572,[L],A,Linear variable-path acoustic interferometer,not stated,,,1928
2,279.65,1458.20,1.45820,None kPa,,2572,[L],A,Linear variable-path acoustic interferometer,not stated,,,1928
3,285.05,1454.00,1.45400,None kPa,,2572,[L],A,Linear variable-path acoustic interferometer,not stated,,,1928
4,285.15,1454.10,1.45410,None kPa,,2572,[L],A,Linear variable-path acoustic interferometer,not stated,,,1928
...,...,...,...,...,...,...,...,...,...,...,...,...,...
742,1857.70,353.33,1.76665,160000 kPa,,23159,[G],A,Pulse-echo method,not stated,,,1995
743,1865.10,354.18,1.77090,160000 kPa,,23159,[G],A,Pulse-echo method,not stated,,,1995
744,1870.30,355.30,1.77650,160000 kPa,,23159,[G],A,Pulse-echo method,not stated,,,1995
745,1881.60,357.06,1.78530,160000 kPa,,23159,[G],A,Pulse-echo method,not stated,,,1995


In [8]:
df.to_csv('-'.join(compound) + '_' + prop + '_test_data.csv', index = False)