In [None]:
import numpy as np
import re
import urllib.request
from pprint import pprint
from html_table_parser.parser import HTMLTableParser
import pandas as pd
import math

In [None]:
# scraping data from Gemini Observatory website
def url_get_contents(url):
    req = urllib.request.Request(url=url)
    f=urllib.request.urlopen(req)
    return f.read()
xhtml = url_get_contents('https://www.gemini.edu/observing/resources/near-ir-resources/spectroscopy/important-h2-lines').decode('utf-8')
p=HTMLTableParser()
p.feed(xhtml)
data =(p.tables[0])
data = data[2:]

In [None]:
# sifting through weird formatting from website and converting numbers to floats
rows = []
for i in range(0, len(data)):
    if data[i] != ['']:
        row = [data[i][0]]
        for j in range(1, len(data[i])):
            if data[i][j] == '' or data[i][j] == '.':
                row.append(np.nan)
            else:
                row.append(float(data[i][j]))
        rows.append(row)
data = rows

# defining a dataframe with all the data
dictionary = {'Lines':[vals[0] for vals in data],
              'Wavel, um':[vals[1] for vals in data],
              'Freq, cm^-1':[vals[2] for vals in data],
              'g_J, upper':[vals[3] for vals in data],
              'Temp, K':[vals[4] for vals in data],
              'A_vals, e-7 s':[vals[5] for vals in data],
              'LTE_vals':[vals[6] for vals in data]}
data = pd.DataFrame(data=dictionary)

In [None]:
# functions to calculate values required by RADMC in the necessary units
h = 6.62607015e-34 #Joule seconds
kb = 1.380649e-23 # Joules/Kelvin
c = 3e8 #m/s
def energies_invcm(wavel, temp):
    E_up_J = kb*float(temp)
    delta_E = h*c/(float(wavel)*1e-6) #Joules
    E_lo_J = E_up_J - delta_E
    E_lower = E_lo_J/(h*c)
    E_upper = E_up_J/(h*c)
    return E_lower, E_upper

def g_vals(string):
    if 'S' in data['Lines'][i]:
        delta_j = 2
    elif 'Q' in data['Lines'][i]:
        delta_j = 0
    elif 'O' in data['Lines'][i]:
        delta_j =-2
        
    characters = [x for x in string]
    num = np.nan
    if len(characters) == 8:
        num = float(characters[6])
    if len(characters) == 9:
        num = float((characters[6]+characters[7]))
        
    vib_state = num
    if (vib_state % 2) == 0:
        nuclear_weight = 1
    else:
        nuclear_weight = 3
    
    lower_g = (nuclear_weight*((2*vib_state)+1))
    upper_g = (nuclear_weight*((2*(vib_state+delta_j))+1))
    return lower_g, upper_g

In [None]:
# get rid of any lines without all necessary information and make new dataframe
Lambda, Aud, E_lo, E_up, g_lo, g_up = [], [], [], [], [], []
for i in range(0, len(data)):
    if math.isnan(data['A_vals, e-7 s'][i]) == False:
        Lambda.append(data['Wavel, um'][i])

        val = data['A_vals, e-7 s'][i]
        Aud.append((val*(10**-7)))
        
        E_lower, E_upper = energies_invcm(data['Wavel, um'][i], data['Temp, K'][i])
        E_lo.append(round(E_lower,6)) # cm^-1
        E_up.append(round(E_upper, 6)) # cm^-1
        
        lo, up = g_vals(data['Lines'][i])
        g_lo.append(lo)
        g_up.append(up)
ID = [i+1 for i in range(0, len(Lambda))]

dictionary = {'ID':ID, 'Lambda [mic]':Lambda, 'Aud [sec^-1]':Aud, 'E_lo [cm^-1]':E_lo, 'E_up [cm^-1]':E_up, 
              'g_lo':g_lo, 'g_up':g_up}
h2_data = pd.DataFrame(dictionary)

In [None]:
print(h2_data.to_string(index=False)) #copy paste for convenient formatting into linelist_XXX.inp file

In [None]:
# calculating partition function as a function of temperature
def energy_J(energy_cm): # convert energy from radmc units (cm^-1) back to Joules
    return energy_cm*h*c

E_lo_J = [energy_J(h2_data['E_lo [cm^-1]'][i]) for i in range(0, len(h2_data))]
E_up_J = [energy_J(h2_data['E_up [cm^-1]'][i]) for i in range(0, len(h2_data))]
g_lo = h2_data['g_lo']
g_up = h2_data['g_up']

def partition(lo_E, up_E, lo_g, up_g, temp):
    Z = (lo_g*math.exp(-lo_E/(kb*temp)))+(up_g*math.exp(-up_E/(kb*temp)))
    return Z

part_sum = []
for temperature in np.linspace(5, 5000, 500):
    z=0
    for i in range(0, len(h2_data)):
        z+=(partition(E_lo_J[i], E_up_J[i], g_lo[i], g_up[i], temperature))
    part_sum.append(z)

In [None]:
part_sum_df = pd.DataFrame(data={"Temp [K]":np.linspace(5, 5000, 500), "PartSum":part_sum})
#print(part_sum_df.to_string(index=True))

In [None]:
for i in range(0, len(data)):
    print(data['Lines'][i], data['Temp, K'][i])

In [None]:
def lower_temp(upper_temp, wave):
    E_up_J = kb*float(upper_temp)
    delta_E = h*c/(float(wave)*1e-6) #Joules
    E_lo_J = E_up_J - delta_E
    return E_lo_J/kb

def energies(wavel, temp):
    E_up_J = kb*float(temp)
    delta_E = h*c/(float(wavel)*1e-6) #Joules
    E_lo_J = E_up_J - delta_E
    return E_lo_J

In [None]:
lower_temp(1015, 17.0340)