In [1]:
import numpy as np
import re
import urllib.request
from pprint import pprint
from html_table_parser.parser import HTMLTableParser
import pandas as pd
import math

In [2]:
# scraping data from Gemini Observatory website
def url_get_contents(url):
    req = urllib.request.Request(url=url)
    f=urllib.request.urlopen(req)
    return f.read()
xhtml = url_get_contents('https://www.gemini.edu/observing/resources/near-ir-resources/spectroscopy/important-h2-lines').decode('utf-8')
p=HTMLTableParser()
p.feed(xhtml)
data =(p.tables[0])
data = data[2:]

In [3]:
# sifting through weird formatting from website and converting numbers to floats
rows = []
for i in range(0, len(data)):
    if data[i] != ['']:
        row = [data[i][0]]
        for j in range(1, len(data[i])):
            if data[i][j] == '' or data[i][j] == '.':
                row.append(np.nan)
            else:
                row.append(float(data[i][j]))
        rows.append(row)
data = rows

# defining a dataframe with all the data
dictionary = {'Lines':[vals[0] for vals in data],
              'Wavel, um':[vals[1] for vals in data],
              'Freq, cm^-1':[vals[2] for vals in data],
              'g_J, upper':[vals[3] for vals in data],
              'Temp, K':[vals[4] for vals in data],
              'A_vals, e-7 s':[vals[5] for vals in data],
              'LTE_vals':[vals[6] for vals in data]}
data = pd.DataFrame(data=dictionary)

In [4]:
# functions to calculate values required by RADMC in the necessary units
h = 6.62607015e-34 #Joule seconds
kb = 1.380649e-23 # Joules/Kelvin
c = 3e8 #m/s
def energies_invcm(wavel, temp):
    E_up_J = kb*float(temp)
    delta_E = h*c/(float(wavel)*1e-6) #Joules
    E_lo_J = E_up_J - delta_E
    E_lower = E_lo_J/(h*c)
    E_upper = E_up_J/(h*c)
    return E_lower, E_upper

def g_vals(string):
    if 'S' in data['Lines'][i]:
        delta_j = 2
    elif 'Q' in data['Lines'][i]:
        delta_j = 0
    elif 'O' in data['Lines'][i]:
        delta_j =-2
        
    characters = [x for x in string]
    num = np.nan
    if len(characters) == 8:
        num = float(characters[6])
    if len(characters) == 9:
        num = float((characters[6]+characters[7]))
        
    vib_state = num
    if (vib_state % 2) == 0:
        nuclear_weight = 1
    else:
        nuclear_weight = 3
    
    lower_g = (nuclear_weight*((2*vib_state)+1))
    upper_g = (nuclear_weight*((2*(vib_state+delta_j))+1))
    return lower_g, upper_g

In [7]:
# get rid of any lines without all necessary information and make new dataframe
Lambda, Aud, E_lo, E_up, g_lo, g_up = [], [], [], [], [], []
for i in range(0, len(data)):
    if math.isnan(data['A_vals, e-7 s'][i]) == False:
        Lambda.append(data['Wavel, um'][i])

        val = data['A_vals, e-7 s'][i]
        Aud.append((val*(10**-7)))
        
        E_lower, E_upper = energies_invcm(data['Wavel, um'][i], data['Temp, K'][i])
        E_lo.append(round(E_lower,6)) # cm^-1
        E_up.append(round(E_upper, 6)) # cm^-1
        
        lo, up = g_vals(data['Lines'][i])
        g_lo.append(lo)
        g_up.append(up)
ID = [i+1 for i in range(0, len(Lambda))]

dictionary = {'ID':ID, 'Lambda [mic]':Lambda, 'Aud [sec^-1]':Aud, 'E_lo [cm^-1]':E_lo, 'E_up [cm^-1]':E_up, 
              'g_lo':g_lo, 'g_up':g_up}
h2_data = pd.DataFrame(dictionary)

In [8]:
print(h2_data.to_string(index=False)) #copy paste for convenient formatting into linelist_XXX.inp file

 ID  Lambda [mic]  Aud [sec^-1]  E_lo [cm^-1]  E_up [cm^-1]  g_lo  g_up
  1       28.2210  3.000000e-11 -1.235293e+01  3.542225e+04   1.0   5.0
  2       17.0350  4.800000e-10  1.179456e+04  7.049723e+04   9.0  21.0
  3       12.2790  2.760000e-09  3.538412e+04  1.168240e+05   5.0   9.0
  4        9.6649  9.840000e-09  7.044913e+04  1.739163e+05  21.0  33.0
  5        8.0258  2.640000e-08  1.166899e+05  2.412880e+05   9.0  13.0
  6        6.9091  5.880000e-08  1.737858e+05  3.185225e+05  33.0  45.0
  7        6.1088  1.140000e-07  2.411572e+05  4.048555e+05  13.0  17.0
  8        5.5115  2.000000e-07  3.184317e+05  4.998705e+05  45.0  57.0
  9        5.0529  3.240000e-07  4.047583e+05  6.026645e+05  17.0  21.0
 10        4.6947  4.900000e-07  4.998146e+05  7.128207e+05  57.0  69.0
 11        4.4096  7.030000e-07  6.025195e+05  8.292974e+05  21.0  25.0
 12        4.1810  9.640000e-07  7.125701e+05  9.517473e+05  69.0  81.0
 13        3.9947  1.270000e-06  8.296303e+05  1.079962e+06  25.

In [14]:
# calculating partition function as a function of temperature
def energy_J(energy_cm): # convert energy from radmc units (cm^-1) back to Joules
    return energy_cm*h*c

E_lo_J = [energy_J(h2_data['E_lo [cm^-1]'][i]) for i in range(0, len(h2_data))]
E_up_J = [energy_J(h2_data['E_up [cm^-1]'][i]) for i in range(0, len(h2_data))]
g_lo = h2_data['g_lo']
g_up = h2_data['g_up']

def partition(lo_E, up_E, lo_g, up_g, temp):
    Z = (lo_g*math.exp(-lo_E/(kb*temp)))+(up_g*math.exp(-up_E/(kb*temp)))
    return Z

part_sum = []
for temperature in np.linspace(5, 5000, 500):
    z=0
    for i in range(0, len(h2_data)):
        z+=(partition(E_lo_J[i], E_up_J[i], g_lo[i], g_up[i], temperature))
    part_sum.append(z)

In [18]:
part_sum_df = pd.DataFrame(data={"Temp [K]":np.linspace(5, 5000, 500), "PartSum":part_sum})
#print(part_sum_df.to_string(index=True))

In [20]:
for i in range(0, len(data)):
    print(data['Lines'][i], data['Temp, K'][i])

0-0 S(0) 510.0
0-0 S(1) 1015.0
0-0 S(2) 1682.0
0-0 S(3) 2504.0
0-0 S(4) 3474.0
0-0 S(5) 4586.0
0-0 S(6) 5829.0
0-0 S(7) 7197.0
0-0 S(8) 8677.0
0-0 S(9) 10263.0
0-0 S(10) 11940.0
0-0 S(11) 13703.0
0-0 S(12) 15549.0
0-0 S(13) 17458.0
0-0 S(14) 19402.0
0-0 S(15) 21400.0
0-0 S(16) 23459.0
0-0 S(17) 25539.0
0-0 S(18) 27643.0
0-0 S(19) 29765.0
0-0 S(20) 31895.0
0-0 S(21) 34036.0
0-0 S(22) 36173.0
0-0 S(23) 37728.0
1-0 S(0) 6471.0
1-0 S(1) 6956.0
1-0 S(2) 7584.0
1-0 S(3) 8365.0
1-0 S(4) 9286.0
1-0 S(5) 10341.0
1-0 S(6) 11522.0
1-0 S(7) 12817.0
1-0 S(8) 14221.0
1-0 S(9) 15722.0
1-0 S(10) 17311.0
1-0 S(11) 18979.0
1-0 Q(1) 6149.0
1-0 Q(2) 6471.0
1-0 Q(3) 6956.0
1-0 Q(4) 7586.0
1-0 Q(5) 8365.0
1-0 Q(6) 9286.0
1-0 Q(7) 10341.0
1-0 O(2) 5987.0
1-0 O(3) 6149.0
1-0 O(4) 6471.0
1-0 O(5) 6956.0
1-0 O(6) 7485.0
1-0 O(7) 8365.0
1-0 O(8) 9286.0
2-1 S(0) 12095.0
2-1 S(1) 12550.0
2-1 S(2) 13150.0
2-1 S(3) 13890.0
2-1 S(4) 14764.0
2-1 S(5) 15763.0
2-1 O(2) 11635.0
2-1 O(3) 11789.0
2-1 O(4) 12095.0
2-1 O(5) 

In [26]:
def lower_temp(upper_temp, wave):
    E_up_J = kb*float(upper_temp)
    delta_E = h*c/(float(wave)*1e-6) #Joules
    E_lo_J = E_up_J - delta_E
    return E_lo_J/kb

def energies(wavel, temp):
    E_up_J = kb*float(temp)
    delta_E = h*c/(float(wavel)*1e-6) #Joules
    E_lo_J = E_up_J - delta_E
    return E_lo_J

In [27]:
lower_temp(1015, 17.0340)

169.76522131626953