In [None]:
import os
import numpy as np

import emcee
import matplotlib.pyplot as plt
import matplotlib as mpl
import desispec.io

from astropy.table import Table
from astropy.io import fits
#from astropy.io.misc.hdf5 import read_table_hdf5
from astropy import modeling
from scipy.signal import medfilt

from operator import itemgetter

plt.rcParams.update({'font.size': 14})

#using the commissioning targeting mask
from desitarget.cmx.cmx_targetmask import cmx_mask  

In [None]:
#Turn 'b', 'r', 'z' into 'brz'
def quick_brz(specobj,spectrum):
    #pull wavelength sections
    bw=np.round(specobj.wave['b'],3)
    rw=np.round(specobj.wave['r'],3)
    zw=np.round(specobj.wave['z'],3)
    
    #find overlapping arrays in wave
    br_overlap=np.intersect1d(bw,rw)
    rz_overlap=np.intersect1d(rw,zw)

    #find indices of overlapping regions
    br_start=int(np.where(bw==br_overlap[0])[0])
    br_end=int(len(bw))

    rz_start=int(np.where(rw==rz_overlap[0])[0])
    rz_end=int(len(rw))

    #pull flux
    bf=specobj.flux['b'][spectrum]
    rf=specobj.flux['r'][spectrum]
    zf=specobj.flux['z'][spectrum]
    #pull error
    be=1/np.sqrt(specobj.ivar['b'][spectrum])
    re=1/np.sqrt(specobj.ivar['r'][spectrum])
    ze=1/np.sqrt(specobj.ivar['z'][spectrum])
    #turn into 'brz'
    x_spc=np.concatenate((bw[:br_start],(bw[br_start:br_end]+rw[:br_end-br_start])/2,rw[br_end-br_start:rz_start],(rw[rz_start:rz_end]+zw[:rz_end-rz_start])/2,zw[rz_end-rz_start:]))
    y_flx=np.concatenate((bf[:br_start],(bf[br_start:br_end]+rf[:br_end-br_start])/2,rf[br_end-br_start:rz_start],(rf[rz_start:rz_end]+zf[:rz_end-rz_start])/2,zf[rz_end-rz_start:]))
    y_err=np.concatenate((be[:br_start],(be[br_start:br_end]+re[:br_end-br_start])/2,re[br_end-br_start:rz_start],(re[rz_start:rz_end]+ze[:rz_end-rz_start])/2,ze[rz_end-rz_start:]))
    
    return(x_spc,y_flx,y_err)

In [None]:
'''
Peakfinder function.
Inputs: data - array of values in which to find peaks
        err  - array of associated error values to data
        threshold - level of standard deviation required to be defined as a peak
Description:
        Detects peaks in a 1d array with associated errors. First determines mean and standard deviation (std)
        in 2000 index window around each index, then checks if the associated value is greater than mean+err+threshold*std.
        If the value is then the value and its location in the array are appended to the output peaks array.
Outputs:
        peaks - 1d array holding locations of detected peaks

''';
def find_p(data,threshold,edge_clip=30,window_size=1000):
    #initialize output array
    peaks=[]
    
    #pad data array with 1 values (data is currently pased as a ratio of fit/signal)
    data=(np.pad(data[edge_clip:-edge_clip],window_size,mode='constant',constant_values=1))
    
    #iterate over array length
    for i in range(1000,len(data)):
        #find windowed median and std
        median=np.median(data[i-window_size:i+window_size])
        #std=np.std(data[i-window_size:i+window_size])
        #remove massive outliers (points where flux gets right near 0)
        #these points are still considered for peaks, but not factored into std
        std=np.std(data[i-window_size:i+window_size]<np.std(data)*20)
            
        #check if peak conditions are true
        if(data[i]>median+threshold*std and data[i]==np.max(data[i-10:i+10])):
            #append true location of peak pre padding
            peaks.append(i-window_size+edge_clip)
            #append value of peak
            
    return peaks

In [None]:
from operator import itemgetter
from itertools import *
fitter = modeling.fitting.LevMarLSQFitter()
model = modeling.models.Gaussian1D()

#whole thing can probably use a rewrite for the sake of efficency
def doublet_finder(targetid,continuum,data,y_err,x_spc):
    rf_line_sep=7.1755
    #may want to adjust this down if too many matches
    #I think this is what does most of the sample cutting (actually maybe not)
    rf_err_margain=0.25
    
    residual=continuum-data
    peaks=np.asarray(find_p(continuum/data,5.0),dtype=int)
    
    #Generate groups of data with positive residuals
    #From https://stackoverflow.com/questions/3149440/python-splitting-list-based-on-missing-numbers-in-a-sequence
    groups = []
    for k, g in groupby(enumerate(np.where(residual>0)[0]), lambda x: x[0]-x[1]):
        groups.append(list(map(itemgetter(1), g)))
    
    absorb_lines=[]
    for group in groups:
        #Skip groups of 1 or 2 data vals, these aren't worthwhile peaks and cause an issue in fitting
        if(len(group) < 3):
            continue
        #calculate snr, seperate class for high signal
        if(np.average(group)<5):
            snr=np.sum(residual[group]/continuum[group])/np.sqrt(np.sum(y_err[group]**2))
        else:
            snr=np.sum(residual[group]/1)/np.sqrt(np.sum(y_err[group]**2))
        #print(group,snr)
        #this 3 threshold is mutable, experiment around I guess?
        if(snr>3.5):
            #Fit gaussian model
            model = modeling.models.Gaussian1D(amplitude=np.nanmax(residual[group]),mean=np.average(x_spc[group]))
            fm = fitter(model=model, x=x_spc[group], y=residual[group])
            #determine redshift by model params
            z=fm.parameters[1]/2795.5301-1

            absorb_lines.append([z,group,snr,fm.parameters[0],fm.parameters[2]])
            
    peak_lines=[]
    for a in absorb_lines:
        for p in peaks:
            if(p in a[1]):
                peak_lines.append(a+[p])
                    
    doublets=[]
    for line1 in peak_lines:
        for line2 in peak_lines:
            line_sep=rf_line_sep*(1+line1[0])
            err_margain=rf_err_margain*(1+line1[0])
            
            low_zbound=(line_sep-err_margain)/2795.5301
            high_zbound=(line_sep+err_margain)/2795.5301
            #Check if z_seperation to second line is in appropriate range.
            if(low_zbound<line2[0]-line1[0]<high_zbound):
                #pass along the redshift of first line, redshift diff to second line, and both SNRs
                doublets.append([str(targetid),line1[0],line1[2],line2[2],line1[3],line1[4],line2[3],line2[4]])
    return doublets

In [None]:
results=[]
cnt=0
#good_candidates = np.genfromtxt('Spec_w_goodCandidates.txt',dtype=str)
#run over all spectrographs
for s in range(0,10):
    tile, date, spectrograph = "68002", "20200315", str(s)
    #can adapt to forming dir name more cleanly
    dirname = "/global/cfs/cdirs/desi/spectro/redux/andes/tiles/68002/20200315"
    #dirname = os.path.join(os.getenv("DESI_SPECTRO_REDUX"), os.getenv("SPECPROD"), "tiles", tile, date)
    filename = "coadd-{}-{}-{}.fits".format(spectrograph, tile, date)
    
    #open coadd spectrum and zbest files, will want to ope h5 file at some point
    specfile = os.path.join(dirname, filename)
    zbestfile=specfile.replace('coadd', 'zbest')
    print(specfile)
    #read them in using std method
    specobj = desispec.io.read_spectra(specfile)
    zbest = Table.read(zbestfile, hdu=1)
    #grab targetids, ra, dec and z
    fm = specobj.target_ids()
    ra = specobj.fibermap['TARGET_RA']
    dec =specobj.fibermap['TARGET_DEC']
    redshifts=zbest['Z']
    #extract all sources that were targeted as SV-like quasars
    qsos = np.where(specobj.fibermap["CMX_TARGET"] & cmx_mask["SV0_QSO"])[0]
    print(qsos)
    #run over all spectra
    for i in qsos:
        spectrum = i
        targetid=fm[i]

        #define x_range and flux_vals
        #see if "brz" is given
        if("brz" in specobj.wave):
            x_spc = specobj.wave["brz"]
            y_flx = specobj.flux["brz"][spectrum]
            y_err=1/np.sqrt(specobj.ivar["brz"][spectrum])
        #else combine into "brz" using helper fnc.
        else:
            x_spc,y_flx,y_err=quick_brz(specobj,spectrum)
            
        #estimate continuum using median filter
        cont_est = medfilt(y_flx,19)
        #calculate residual
        bc_residual = cont_est-y_flx

        doublets=doublet_finder(str(targetid),cont_est,y_flx,y_err,x_spc)

        for i in doublets:
            results.append(i)
np.savetxt('MgII_Doublets.csv', results, delimiter=",",fmt='%s')