In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import io
import re
import matplotlib.pyplot as plt
import warnings
from pandas.core.common import SettingWithCopyWarning

warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

from sqlalchemy import create_engine

class Lines_LIBS(object):
    
    moritz_elements = {
            'Si' : 251.6,
            'Cu' : 324.7,
            'Sn' : 326.2,
            'Ag' : 328,
            'Ni' : 349.2,
            'Cd' : 361.0,
            'Zr' : 361.0,
            'Fe' : 371.9,
            'Mo' : 386.4,
            'Al' : 396.1,
            'Cr' : 425.4,
            'W' : 430.2,
            'In' : 451.1,
            'Ti' : 453.4,
            'Ta' : 481.2,
            'Ni' : 349.2,
            'Pd' : 324.2,
            'Ar' : 420.0,
            'Nb' : [405.9, 408],
            'Zn' : [330.2,307.2, 307.6],
            'O' : 777
        }
        

    def __init__(self,element,low_w,upper_w, strongLines = True, first_sp=True):
        
        self.element = element
        self.low_w = low_w
        self.upper_w = upper_w
        self.data_frame = pd.DataFrame()
        
        self.retrieve_data()
        
        self.clean_intensity()
        
        
        if strongLines: 
            self.filter_strong_lines()
            
        self.filter_nan_values()
        self.reset_index()
        self.filter_columns()
        
        if first_sp:
            self.filter_sp()

        
    def retrieve_data(self):
        site="https://physics.nist.gov/cgi-bin/ASD/lines1.pl?spectra={}&limits_type=0&low_w={}&upp_w={}&unit=1&submit=Retrieve+Data&de=0&format=3&line_out=0&remove_js=on&en_unit=0&output=0&bibrefs=1&page_size=15&show_obs_wl=1&show_calc_wl=1&unc_out=1&order_out=0&max_low_enrg=&show_av=2&max_upp_enrg=&tsb_value=0&min_str=&A_out=1&intens_out=on&max_str=&allowed_out=1&forbid_out=1&min_accur=&min_intens=&conf_out=on&term_out=on&enrg_out=on&J_out=on"
        site = site.format(self.element,self.low_w,self.upper_w)
        respond = requests.get(site)
        soup = BeautifulSoup(respond.content, 'lxml')
        html_data = soup.get_text()
        html_data = html_data.replace('"', "")
        data = io.StringIO(html_data)
        self.data_frame = pd.read_csv(data, sep="\t")
        
    def clean_intensity(self):
        
        for i in range(len(self.data_frame['intens'])):
            self.data_frame['intens'].iloc[i] = re.sub('[^0-9]','', str(self.data_frame['intens'].iloc[i]))
            
        self.data_frame = self.data_frame[self.data_frame['intens']!='']
        self.data_frame['intens'] = pd.to_numeric(self.data_frame['intens'])
    
    def filter_strong_lines(self,value = 10**2):
        # strength line gA > 10**8 
        self.data_frame = self.data_frame[self.data_frame['intens']>value]
    
    def filter_nan_values(self, column='obs_wl_air(nm)'):
        self.data_frame = self.data_frame[self.data_frame[column] > 0]
    
    def reset_index(self):
        self.data_frame.reset_index(inplace=True, drop=True)
    
    def filter_columns(self):
        if self.element != 'H':
            self.data_frame = self.data_frame[['element','sp_num','obs_wl_air(nm)','intens','gA(s^-1)']]
        else:
            self.data_frame = self.data_frame[['obs_wl_air(nm)','intens','gA(s^-1)']]
    
    def filter_line(self,line,count):
        return pd.DataFrame(self.data_frame.iloc[(self.data_frame['obs_wl_air(nm)']-line).abs().argsort()[:count]].sort_values(by=['intens'],ascending=False).iloc[0]).transpose()
    
    def filter_sp(self,sp=1):
        self.data_frame = self.data_frame[self.data_frame['sp_num'] == 1]

In [3]:
line = Lines_LIBS('Ar',200,940,strongLines=True,first_sp=False) 

In [4]:
line.data_frame

Unnamed: 0,element,sp_num,obs_wl_air(nm),intens,gA(s^-1)
0,Ar,2,206.42110,123,
1,Ar,2,210.33517,112,
2,Ar,2,213.04258,155,
3,Ar,2,215.10518,129,
4,Ar,2,215.30684,120,
...,...,...,...,...,...
194,Ar,1,912.29670,35000,56700000.0
195,Ar,1,919.46380,550,5280000.0
196,Ar,1,922.44990,15000,25000000.0
197,Ar,1,929.15310,400,3260000.0


In [10]:
line.data_frame.to_csv('Ar2.csv')

In [43]:
line.filter_line(342.2,count=2)

Unnamed: 0,element,sp_num,obs_wl_air(nm),intens,gA(s^-1)
277,Ti,1,382.819,200,


In [251]:
def moritz_lines(filepath,sp_filter=True):
    i = 0
    for key, values in Lines_LIBS.moritz_elements.items():
        i += 1 
        line = Lines_LIBS(key,200,1000,strongLines=False,first_sp=sp_filter)
        if key in ['Nb', 'Zn']:
            for val in values:
                if i==1:
                    line.filter_line(val,1).to_csv(filepath,mode='a', header=True,index=False)
                else:
                    line.filter_line(val,1).to_csv(filepath,mode='a', header=False,index=False)
        else:
            if i==1:
                line.filter_line(values,1).to_csv(filepath,mode='a', header=True,index=False)
            else:
                line.filter_line(values,1).to_csv(filepath,mode='a', header=False,index=False)    

In [252]:
moritz_lines('data.csv',sp_filter=False)

In [253]:
pd.read_csv('data.csv')

Unnamed: 0,element,sp_num,obs_wl_air(nm),intens,gA(s^-1)
0,Si,1,251.6113,500,840000000.0
1,Cu,2,324.67898,620,
2,Sn,1,326.2331,15000,810000000.0
3,Ag,1,328.068,55000,560000000.0
4,Ni,1,349.296,5500,290000000.0
5,Cd,1,361.05077,1000,910000000.0
6,Zr,2,361.189,690,
7,Fe,1,371.84062,8500,36200000.0
8,Mo,1,386.4103,29000,437000000.0
9,Al,1,396.152,26,197000000.0


In [254]:
moritz_lines(filepath='filtered_data.csv',sp_filter=True)

In [255]:
pd.read_csv('filtered_data.csv')

Unnamed: 0,element,sp_num,obs_wl_air(nm),intens,gA(s^-1)
0,Si,1,251.6113,500,840000000.0
1,Cu,1,324.754,10000,558000000.0
2,Sn,1,326.2331,15000,810000000.0
3,Ag,1,328.068,55000,560000000.0
4,Ni,1,349.296,5500,290000000.0
5,Cd,1,361.05077,1000,910000000.0
6,Zr,1,360.119,3500,
7,Fe,1,371.84062,8500,36200000.0
8,Mo,1,386.4103,29000,437000000.0
9,Al,1,396.152,26,197000000.0
