## Imports

In [None]:
import os
from glob import glob
from tqdm import tqdm

import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
from scipy.integrate import simpson
from scipy.optimize import curve_fit
from scipy.signal import argrelextrema
from scipy.stats import spearmanr, median_abs_deviation, sigmaclip

from astropy.io import fits
from astropy.convolution import convolve, Gaussian1DKernel
from sklearn.neighbors import KernelDensity

%matplotlib inline

In [None]:
plt.rcParams.update({'font.size': 20})

## Load Lines and Data

In [None]:
data_path = '../data'
lines_vac, lines_air = np.loadtxt(f'{data_path}/hydroxyl_lines_rousselot_2000.txt').T

In [None]:
results_path = '../results'
df = pd.read_csv(f'{results_path}/lines_norm_gdl_fit.csv', index_col=0)
columns = df.columns

In [None]:
time_centered = df['spec_mjd'] - int(df['spec_mjd'].min())

In [None]:
mask1_1 = (time_centered > 504) & (time_centered < 504.5)
mask1_2 = (time_centered > 504.58) & (time_centered < 505.515)
mask1_3 = (time_centered > 505.58) & (time_centered < 506.578)
mask1_4 = (time_centered > 506.61) & (time_centered < 507)
mask1 = mask1_1 | mask1_2 | mask1_3 | mask1_4 #32%
mask2 = (time_centered > 543) & (time_centered < 547)

## Doublet Mus

In [None]:
mu_est_lst = []
doublet_metric = []
for i, line in tqdm(enumerate(lines_vac), total=lines_vac.shape[0]):
    mu1 = df[f'{line}_mu1'].values
    mu2 = df[f'{line}_mu2'].values
    mu = np.concatenate((mu1, mu2))

    mask_nan = ~np.isnan(mu)
    mu_kde = mu[mask_nan].reshape(-1,1)
    kde = KernelDensity(bandwidth=0.05).fit(mu_kde)
    mu_lin = np.linspace(np.nanmin(mu), np.nanmax(mu), 1000).reshape(-1, 1)
    dens = np.exp(kde.score_samples(mu_lin)).flatten()
    argx = argrelextrema(dens, np.greater)
    mu_est_cand = np.sort(dens[argx])[::-1][:2]
    mask_mu = np.isin(dens, mu_est_cand)
    mu_est = mu_lin[mask_mu]
    mu_est_lst.append(mu_est[:, 0])
    
    if False:
        plt.title(f'{line}')
        c, b, i = plt.hist(mu-line, alpha=0.5, bins=200)
        plt.plot(mu_lin-line, dens/dens.max()*c.max(), color='C0', alpha=0.5)
        plt.vlines(mu_est[0]-line, 0, c.max(), color='C2', linestyle='--', label=round(mu_est[0][0]-line, 3))
        plt.vlines(mu_est[1]-line, 0, c.max(), color='C2', linestyle='--', label=round(mu_est[1][0]-line, 3))
        plt.vlines(0, 0, c.max(), color='C1', linestyle=':')
        plt.legend(fontsize=10)
        plt.show()
    
    metric1 = np.abs((mu_est-line).sum())
    metric2 = dens[mask_mu][0] / dens[mask_mu][1]
    doublet_metric.append([metric1, metric2])

In [None]:
doublet_metric = np.array(doublet_metric)

In [None]:
#.5 .1, .3 .05, .15 .025, .05 .01
thresh = 1.3
mask_metric1 = doublet_metric[:, 0] < 0.05
mask_metric2 = (doublet_metric[:, 1] > 1/thresh) & (doublet_metric[:, 1] < thresh)
mask_metric = mask_metric1 & mask_metric2

In [None]:
mask_metric.sum()

## Sigma

In [None]:
sigma_est_lst = []
for i, line in enumerate(lines_vac):
    sigma1 = df[f'{line}_sigma1'].values
    sigma2 = df[f'{line}_sigma2'].values
    sigma = np.concatenate((sigma1, sigma2))
    sigma_est_lst.append([np.nanmedian(sigma), median_abs_deviation(sigma, nan_policy='omit')])

In [None]:
sigma_est_lst = np.array(sigma_est_lst)

In [None]:
plt.figure(figsize=[30,10])
plt.errorbar(lines_vac/10000, sigma_est_lst[:, 0], sigma_est_lst[:, 1])
plt.xlabel('Wavelength ($\mathring{A}$)')
plt.ylabel('$\sigma$')

In [None]:
plt.figure(figsize=[30,10])
plt.scatter(lines_vac/10000, sigma_est_lst[:, 1])
plt.xlabel('Wavelength ($\mathring{A}$)')
plt.ylabel('Errot bar Log($\sigma$)')

In [None]:
sigma_est_lst[:10, 0].mean()

In [None]:
sigma_est_lst[-10:, 0].mean()

In [None]:
(sigma_est_lst[:10, 1]**2).mean()**0.5

In [None]:
(sigma_est_lst[-10:, 1]**2).mean()**0.5

In [None]:
spearmanr(lines_vac, sigma_est_lst[:, 0])

In [None]:
spearmanr(lines_vac, sigma_est_lst[:, 1])