# LIBS spectra database

In [8]:
import numpy as np
import requests

In [None]:
xxx1 = "https://physics.nist.gov/cgi-bin/ASD/lines1.pl?spectra=" + 'H' + "&limits_type=0&low_w=0&upp_w=1000&unit=1&submit=Retrieve+Data&de=0&format=3&line_out=0&remove_js=on&no_spaces=on&en_unit=1&output=0&bibrefs=1&page_size=15&show_obs_wl=1&show_calc_wl=1&order_out=0&max_low_enrg=&show_av=3&max_upp_enrg=&tsb_value=0&min_str=&A_out=1&intens_out=on&max_str=&allowed_out=1&forbid_out=1&min_accur=&min_intens=&conf_out=on&term_out=on&enrg_out=on&J_out=on&g_out=on"
xxx2 = requests.get(xxx1)
test_data = xxx2.text

In [2]:
el_list = ['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Uut', 'Fl', 'Uup', 'Lv', 'Uus', 'Uuo']

In [13]:
def get_lines(element):
    """ get spectral line data from specified NIST database """
    
    url_el = "https://physics.nist.gov/cgi-bin/ASD/lines1.pl?spectra=" + str(element) + "&limits_type=0&low_w=0&upp_w=1000&unit=1&submit=Retrieve+Data&de=0&format=3&line_out=1&remove_js=on&no_spaces=on&en_unit=1&output=0&bibrefs=1&page_size=15&show_obs_wl=1&show_calc_wl=1&order_out=0&max_low_enrg=&show_av=3&max_upp_enrg=&tsb_value=0&min_str=&A_out=1&intens_out=on&max_str=&allowed_out=1&forbid_out=1&min_accur=&min_intens=&conf_out=on&term_out=on&enrg_out=on&J_out=on&g_out=on"
    response = requests.get(url_el)
    data = response.text
    
    rep_char = '\"\n='
    
    for char in rep_char: 
        data = data.replace(char, '')
    data = data.split('\t')
    data = np.array(data[:-1])
    
    # --- find number of columns in data and reshape
    # --- Se, U have 12
    # --- Ar is weird
    if len(data) % 20 == 0:
        data = np.reshape(data, (-1, 20))
    #elif len(data) % 18 == 0:
    #    data = np.reshape(data, (-1, 18))
    elif len(data) % 21 == 0:
        data = np.reshape(data, (-1, 21))
    elif len(data) % 22 == 0:
        data = np.reshape(data, (-1, 22))
    elif len(data) % 12 == 0:
        data = np.reshape(data, (-1, 12))
    elif len(data) % 16 == 0:
        data = np.reshape(data, (-1, 16))
    else:
        print(element + len(data))    
    
    return data

In [14]:
def save_lines(path, lines):
    """ save LIBS lines in 'lines' to 'path' as numpy format """
    
    np.save(path, lines)

In [15]:
def all_lines(els, path):
    """ save LIBS data corresponding to all elements 'els' to 'path'
        one file for each elmement of 'els'
    """
       
#     # --- find elements for which data already exists and exclude them from processing
#     import glob
#     el_in = [i.split('.')[0].split('/')[-1] for i in sorted(glob.glob(path + "*.npy"))]
#     d = {k:v for v,k in enumerate(els)}
#     pos_in = [*(d[k] for k in el_in)]

    for el in els:
        try:
            save_lines(path + str(el) + ".npy", get_lines(el))
        except:
            print("Element " + str(el) + " not a standard data shape")

In [16]:
# run this cell to re-scrape data for all elements
all_lines(el_list, "/Users/whitta/Projects/python/LIBS/database/")

In [None]:
# run this cell to re-scrape data for specific element
all_lines(['Ar'], "/Users/whitta/Projects/python/LIBS/database/")