In [36]:
#import sys
#!{sys.executable} -m pip install --user alerce

import astropy.config
astropy.config.get_cache_dir()

In [3]:
from astropy.io import fits
from astropy.table import Table
from astropy import units as u
from astropy.time import Time
from astropy.coordinates import SkyCoord, match_coordinates_sky, Angle

import numpy as np
import pandas as pd
import glob
import sys

import sqlite3
import os

global db_filename
db_filename = '/global/cfs/cdirs/desi/science/td/daily-search/transients_search.db'
global exposure_path
exposure_path = os.environ["DESI_SPECTRO_REDUX"]
global color_band
color_band = "r"

global today
today = Time.now()
#import warnings
#from ALeRCE_ledgermaker import access_alerts

In [4]:
# Grabbing the file names
def all_candidate_filenames(transient_dir: str):
    
    # This function grabs the names of all input files in the transient directory and does some python string manipulation
    # to grab the names of the input files with full path and the filenames themselves.

    #transient_dir = 
    try:
        filenames_read = glob.glob(transient_dir + "/*.fits") # Hardcoding is hopefully a temporary measure.
    
    except:
        print("Could not grab/find any fits in the transient spectra directory:")
        print(transient_dir)
        filenames_read = [] # Just in case
        #filenames_out = [] # Just in case
        raise SystemExit("Exitting.")
        
    #else:
        #filenames_out = [s.split(".")[0] for s in filenames_read]
        #filenames_out = [s.split("/")[-1] for s in filenames_read]
        #filenames_out = [s.replace("in", "out") for s in filenames_out]
        
    return filenames_read #, filenames_out

#path_to_transient = "/global/cfs/cdirs/desi/science/td/daily-search/desitrip/out"
#print(all_candidate_filenames(path_to_transient)[1])

In [12]:
# From ALeRCE_ledgermaker https://github.com/alercebroker/alerce_client
# I have had trouble importing this before so here it goes
# Now modified ***
import requests
#import matplotlib as mpl
#import matplotlib.pyplot as plt
#import pandas as pd
from alerce.core import Alerce
from alerce.exceptions import APIError

alerce_client = Alerce()

def access_alerts(lastmjd_in=None, classifier='stamp_classifier', class_names=['SN', 'AGN']):
    if type(class_names) is not list:
        raise TypeError('Argument `class_names` must be a list.')
        
    dataframes = []
    if not lastmjd_in:
        date_range = 60
        lastmjd_in = Time.now().mjd - 60
        print('Defaulting to a lastmjd range of', str(date_range), 'days before today.')
        
    for class_name in class_names:
        data = alerce_client.query_objects(classifier=classifier,
                                           class_name=class_name, 
                                           order_by='oid',
                                           order_mode='DESC',
                                           page_size=5000,
                                           lastmjd=lastmjd_in,
                                           format='pandas')
        
        #if lastmjd is not None:
        #    select = data['lastmjd'] >= lastmjd
        #    data = data[select]
            
        dataframes.append(data)
    
    return pd.concat(dataframes).sort_values(by='lastmjd')#(by='lastmjd')

In [6]:
# From https://github.com/desihub/timedomain/blob/master/too_ledgers/decam_TAMU_ledgermaker.ipynb
from bs4 import BeautifulSoup
import json
import requests
def access_decam_data(url, overwrite=False):
    """Download reduced DECam transient data from Texas A&M.
    Cache the data to avoid lengthy and expensive downloads.
    
    Parameters
    ----------
    url : str
        URL for accessing the data.
    overwrite : bool
        Download new data and overwrite the cached data.
        
    Returns
    -------
    decam_transients : pandas.DataFrame
        Table of transient data.
    """
    folders = url.split('/')
    thedate = folders[-1] if len(folders[-1]) > 0 else folders[-2]
    outfile = '{}.csv'.format(thedate)
    
    if os.path.exists(outfile) and not overwrite:
        # Access cached data.
        decam_transients = pd.read_csv(outfile)
    else:
        # Download the DECam data index.
        # A try/except is needed because the datahub SSL certificate isn't playing well with URL requests.
        try:
            decam_dets = requests.get(url, auth=('decam','tamudecam')).text
        except:
            requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
            decam_dets = requests.get(url, verify=False, auth=('decam','tamudecam')).text
            
        # Convert transient index page into scrapable data using BeautifulSoup.
        soup = BeautifulSoup(decam_dets)
        
        # Loop through transient object summary JSON files indexed in the main transient page.
        # Download the JSONs and dump the info into a Pandas table.
        decam_transients = None
        j = 0

        for a in soup.find_all('a', href=True):
            if 'object-summary.json' in a:
                link = a['href'].replace('./', '')
                summary_url  = url + link        
                summary_text = requests.get(summary_url, verify=False, auth=('decam','tamudecam')).text
                summary_data = json.loads(summary_text)

                j += 1
                #print('Accessing {:3d}  {}'.format(j, summary_url)) # Modified by Matt

                if decam_transients is None:
                    decam_transients = pd.DataFrame(summary_data, index=[0])
                else:
                    decam_transients = pd.concat([decam_transients, pd.DataFrame(summary_data, index=[0])])
                    
        # Cache the data for future access.
        print('Saving output to {}'.format(outfile))
        decam_transients.to_csv(outfile, index=False)
        
    return decam_transients

In [7]:
def read_fits_ra_dec(filepath: str, transient_candidate = True):
    
    if transient_candidate:
        hdu_num = 1
    else:
        hdu_num = 5
    
    try:
        with fits.open(filepath) as hdu1:
    
            data_table = Table(hdu1[hdu_num].data) #columns
        
            #targ_id = data_table['TARGETID']
            targ_ra = data_table['TARGET_RA'].data # Now it's a numpy array
            targ_dec = data_table['TARGET_DEC'].data
            #targ_mjd = data_table['MJD'][0] some have different versions of this so this is a *bad* idea... at least now I know the try except works!
            
            if not transient_candidate:
                targ_mjd = hdu1[hdu_num].header['MJD-OBS']
            
    except:
        filename = filepath.split("/")[-1]
        print("Could not open or use:", filename)
        print("In path:", filepath)
        print("Trying the next file...")
        return np.array([]), np.array([]), np.array([])
    
    if transient_candidate:
        targ_mjd = filepath.split("/")[-1].split("_")[-2] #to grab the date
        targ_mjd = targ_mjd[:4]+"-"+targ_mjd[4:6]+"-"+targ_mjd[6:] # Adding dashes for Time
        targ_mjd = Time(targ_mjd).mjd
    
    return targ_ra, targ_dec, targ_mjd

In [8]:
def matching(path_in: str, max_sep: float, transient_cand = True, target_ra_dec_date = ()): #, #first_run: bool = True): # To be cleaned up...
    
    max_sep *= u.arcsec
    
    if not target_ra_dec_date:
        target_ras, target_decs, obs_mjd = read_fits_ra_dec(path_in, transient_cand)
    else:
        target_ras, target_decs, obs_mjd = target_ra_dec_date
        
    if not target_ras.size:
        return np.array([]), np.array([])
    
    nan_ra = np.isnan(target_ras)
    nan_dec = np.isnan(target_decs)
    
    if np.any(nan_ra) or np.any(nan_dec):
        print("NaNs found, removing them from array (not FITS) before match.")
        #print("Original length (ra, dec): ", len(target_ras), len(target_decs))
        nans = np.logical_not(np.logical_and(nan_ra, nan_dec))
        target_ras = target_ras[nans] # Logic masking, probably more efficient
        target_decs = target_decs[nans]
        #print("Reduced length (ra, dec):", len(target_ras), len(target_decs))
        #print(np.where(np.isnan(target_ras) == True))
        #print(target_ras[:100])
        #print(np.where(np.isnan(target_decs) == True))
        #target_ras = np.delete(target_ras, nans)
        #target_decs = np.delete(target_decs, nans)
    
    alerts = access_alerts(lastmjd_in=obs_mjd - 28) # Modified Julian Day #.mjd
    # Write function to decide if alerce or DECAM
    tree_name = "kdtree_" + str(obs_mjd - 28)
    
    # For each fits file, look at one month before the observation from Alerce
    alerts_ra = alerts['meanra'].to_numpy()
    alerts_dec = alerts['meandec'].to_numpy()

    coo_trans_search = SkyCoord(target_ras*u.deg, target_decs*u.deg)
    #print(coo_trans_search)
    coo_alerts = SkyCoord(alerts_ra*u.deg, alerts_dec*u.deg)

    idx_alerts, d2d_trans, d3d_trans = match_coordinates_sky(coo_trans_search, coo_alerts, storekdtree = tree_name) # store tree to speed up subsequent results

    sep_constraint = d2d_trans < max_sep
    trans_matches = coo_trans_search[sep_constraint]
    alerts_matches = coo_alerts[idx_alerts[sep_constraint]]
    
    if trans_matches.size:
        sort_dist = np.sort(d2d_trans)
        print("Minimum distance found: ", sort_dist[0])
        print("5 closest (in case there's more than one): ", sort_dist[:5])
        print()

    #if trans_matches.size:
        #all_trans_matches.append(trans_matches)
        #all_alerts_matches.append(alerts_matches)

    return trans_matches, alerts_matches

def matching(path_in: str, max_sep: float):
    
    max_sep *= u.arcsec
    
    target_ras, target_decs, obs_date = read_fits_ra_dec(path_in)
    
    filename = path_in.split("/")[-1]
    tree_name = "kdtree_" + filename[:-5] # Gets rid of the .fits at the end
        
    if not target_ras.size:
        return np.array([]), np.array([])
    
    alerts = access_alerts(lastmjd=obs_date-28) # Modified Julian Day #.mjd
    # For each fits file, look at one month before the observation from Alerce
    alerts_ra = alerts['meanra'].to_numpy()
    alerts_dec = alerts['meandec'].to_numpy()

    coo_trans_search = SkyCoord(target_ras*u.deg, target_decs*u.deg)
    #print(coo_trans_search)
    coo_alerts = SkyCoord(alerts_ra*u.deg, alerts_dec*u.deg)
    
    print(tree_name)
    idx_trans, d2d_alerts, d3d_alerts = match_coordinates_sky(coo_alerts, coo_trans_search, storekdtree = tree_name) # Store tree to speed up subsequent results

    sep_constraint = d2d_alerts < max_sep
    alerts_matches = coo_alerts[sep_constraint]
    trans_matches = coo_trans_search[idx_trans[sep_constraint]]

    #if trans_matches.size:
        #all_trans_matches.append(trans_matches)
        #all_alerts_matches.append(alerts_matches)

    return trans_matches, alerts_matches

def check_corners(fits_path, ):

    paths_to_fits = all_candidate_filenames(fits_path)
    target_ra, target_dec, obs_date = read_fits_ra_dec(paths_to_fits)

    min_ra_idx = np.argmin(target_ra) # If we use data_table['TARGET_RA'].data it'll convert the data into a numpy array
    max_ra_idx = np.argmax(target_ra)
    min_dec_idx = np.argmin(target_dec)
    max_dec_idx = np.argmax(target_dec) # What if it's inside the area and the area is larger than 5"? If/then statement!

    min_max_ra = [target_ra[min_ra_idx], target_ra[max_ra_idx], target_ra[min_dec_idx], target_ra[max_dec_idx]]
    min_max_dec = [target_dec[min_ra_idx], target_dec[max_ra_idx], target_dec[min_dec_idx], target_dec[max_dec_idx]]

    min_max = SkyCoord(min_max_ra*u.deg, min_max_dec*u.deg)

    idx, sep2d, d3d = match_coordinates_sky(min_max, min_max, 2)

    max_dist = max(sep2d)
    # Optimal - combine all frames into one giant numpy array and check the four corners of those - then do this on the alerts, if you find nothing, good, move on
    # If you do find something, figure out which one its closest to by the index and do that
    #print(d3d)

In [11]:
# Reading FITS and grabbing RA and DEC

if __name__ == "__main__":
    
    #alerts_coords = np.array(zip(alerts['meanra'].to_numpy(), alerts['meandec'].to_numpy()))
    
    path_to_transient = "/global/cfs/cdirs/desi/science/td/daily-search/desitrip/out"
    paths_to_fits = all_candidate_filenames(path_to_transient)
    #fits_image_filename = fits.util.get_testdata_filepath(fits_name[0])
    
    # If done in serial, would it be faster to chunk all of the ra's and dec's together before comparing? Actually maybe not because of kdtree.
    # If done in parallel though, I doubt it.
    
#     max_sep = 5.0 * u.arcsec
    
    all_trans_matches = []
    all_alerts_matches = []
    
    #trans_match, alerts_match = matching(paths_to_fits[0], 5.0) # To get the kdtree started *shrugs*
    #if trans_match.size:
    #    all_trans_matches.append(trans_match)
    #    all_alerts_matches.append(alerts_match)
        
    for path in paths_to_fits[:2]:
        trans_matches, alerts_matches = matching(path, 5.0)
#         target_ras, target_decs, target_ids = read_fits_ra_dec(path)
        
#         if not target_ras.size:
#             continue

#         coo_trans_search = SkyCoord(targ_ra*u.deg, targ_dec*u.deg)
#         coo_alerts = SkyCoord(alerts_ra*u.deg, alerts_dec*u.deg)

#         idx_alerts, d2d_trans, d3d_trans = match_coordinates_sky(coo_trans_search, coo_alerts, storekdtree= True) #'kdtree_alerts') # store tree to speed up subsequent results

#         sep_constraint = d2d_trans < max_sep
#         trans_matches = coo_trans_search[sep_constraint]
#         alerts_matches = coo_alerts[idx_alerts[sep_constraint]]
        #print(trans_matches)
        if trans_matches.size:
            all_trans_matches.append(trans_matches)
            all_alerts_matches.append(alerts_matches)
        
    print(all_trans_matches)

    #print(sorted(set(idx_trans)))
    #print(d2d_trans.arcsec)

APIError: {'Error code': 500, 'Message': 'Internal Server Error', 'Data': {}}

In [7]:
def glob_frames(exp_d: str):
    # Grabbing the frame fits files
    
    # This function grabs the names of all input files in the transient directory and does some python string manipulation
    # to grab the names of the input files with full path and the filenames themselves.

    try:
        filenames_read = glob.glob(exp_d + "/cframe-" + color_band + "*.fits") # Only need one of b, r, z
        # sframes not flux calibrated
        # May want to use tiles... coadd (will need later, but not now)
    
    except:
        print("Could not grab/find any fits in the exposure directory:")
        print(exp_d)
        filenames_read = [] # Just in case
        #filenames_out = [] # Just in case
        raise SystemExit("Exitting.")
        
    #else:
        #filenames_out = [s.split(".")[0] for s in filenames_read]
        #filenames_out = [s.split("/")[-1] for s in filenames_read]
        #filenames_out = [s.replace("in", "out") for s in filenames_out]
        
    return filenames_read #, filenames_out

#path_to_transient = "/global/cfs/cdirs/desi/science/td/daily-search/desitrip/out"
#print(all_candidate_filenames(path_to_transient)[1])

In [8]:
def write_matches_to_file(start_date, end_date, all_matches_dict, ledger_type):
    filename = "./matches_" + ledger_type + "_" + start_date + "-" + end_date + ".txt"
    with open(filename, 'w') as mfile:
        mfile.write("date:\n")
        mfile.write("\tframename; (RA, DEC); Table Index in FIBERMAP; Ledger ID; Alert (RA, DEC) - matched to 2\"\n") # Add fiber id, distance between TAMU and fiber obj
        for key, val in all_matches_dict.items():
            if val:
                mfile.write(str(key) + ": \n")
                for vals in val:
                    mfile.write("\t")
                    #print(", ".join(vals))
                    mfile.write("; ".join(str(x) for x in vals))
                    mfile.write("\n")
        #for match in exp_matches:
    return None

In [9]:
def initial_check(ledger_df = None, ledger_type = ''):

    #for obsdate,tile_number in obsdates_tilenumbers:
        #redux = '/'.join([os.environ['DESI_SPECTRO_REDUX'], args.redux, 'tiles'])
        #prefix_in = '/'.join([redux, tile_number, obsdate])

    # previous_date_ranges: "20201130" - "20210302"
    query_date_start = "20201130" #"20210228"
    
    today = Time.now()
    smushed_YMD = today.iso.split(" ")[0].replace("-","")
    
    query_date_end = smushed_YMD #"20210505" #"20210501"

    query2 = "PRAGMA table_info(exposures)"
    query3 = "PRAGMA table_info(tiles)"
    # Crossmatch across tiles and exposures to grab obsdate via tileid
    query_match = "SELECT distinct tilera, tiledec, obsdate, obsmjd, expid, exposures.tileid from exposures INNER JOIN tiles ON exposures.tileid = tiles.tileid where obsdate BETWEEN " + \
        query_date_start + " AND " + query_date_end + ";" #obsdate>20210228 
    #query_dates = "SELECT obsdate from exposures INNER JOIN tiles ON exposures.tileid = tiles.tileid where obsdate BETWEEN " + \
        #query_date_start + " AND " + query_date_end + ";"

    #cur.execute(query2)
    #row2 = cur.fetchall()
    #for i in row2:
    #    print(i[:])

    conn = sqlite3.connect(db_filename)

    conn.row_factory = sqlite3.Row # https://docs.python.org/3/library/sqlite3.html#sqlite3.Row

    cur = conn.cursor()
    #cur.execute(query)
    #rows = cur.fetchall()

    cur.execute(query_match)
    matches_list = cur.fetchall()
    cur.close()

    # I knew there was a way! THANK YOU!
    # https://stackoverflow.com/questions/11276473/append-to-a-dict-of-lists-with-a-dict-comprehension
    date_dict = {k['obsdate'] : list(filter(lambda x:x['obsdate'] == k['obsdate'], matches_list)) for k in matches_list}

    #cur.execute(query_dates)
    #dates = set(cur.fetchall())

    alert_matches_dict = {} #{i['obsdate']: [] for i in matches_list}

    all_trans_matches = []
    all_alerts_matches = []
    
    #decam_transients = access_decam_data('https://datahub.geos.tamu.edu:8000/decam/LCData_Legacy/')
    if ledger_type.upper() == 'DECAM_TAMU':
        if ledger_df.empty:
            ledger_df = access_decam_data('https://datahub.geos.tamu.edu:8000/decam/LCData_Legacy/')

    for date, row in date_dict.items():
        
        date_str = str(date)
        date_str = date_str[:4]+"-"+date_str[4:6]+"-"+date_str[6:] # Adding dashes for Time
        obs_mjd = Time(date_str).mjd

        # This method is *technically* safer than doing a double list comprehension with set albeit slower
        # The lists are small enough that speed shouldn't matter here
        unique_tileid = {i[-1]:(i[0], i[1]) for i in row}
        #set([(i[0], i[1]) for i in row]) # there's probably a way to do this in SQL... oh well
        #print(unique_ra_dec)
        exposure_ras, exposure_decs = zip(*unique_tileid.values())
        
        if ledger_type.upper() == 'ALERCE':
            if ledger_df.empty:
                ledger_df = access_alerts(lastmjd=obs_mjd-28) # Modified Julian Day #.mjd
        elif ledger_type.upper() == 'DECAM_TAMU':
            pass
        else:
            print("Cannot use alerts broker/ledger provided. Stopping before match.")
            return {}
        
        #print(target_ras)
        trans_matches, alert_matches = decam_matching(target_ras, target_decs, obs_mjd, '', max_sep = 1.8, sep_units = 'deg', ledger_df_in = ledger_df, ledger_type_in = ledger_type)
        
        if trans_matches.size:
            #print(len(row))
            all_trans_matches.append(trans_matches)
            # To retrieve from SkyCoord into numpy float, x.ra.deg, x.dec.deg
            all_alerts_matches.append(alert_matches)
        else:
            continue

        alert_matches_dict[date] = []

        for tup in trans_matches:
            ra = tup.ra.deg
            dec = tup.dec.deg
            match_rows = [i for i in row if (i['tilera'], i['tiledec']) == (ra, dec)]
            alert_matches_dict[date].extend(match_rows)
            
    return alert_matches_dict
            
            
#print(alert_matches_dict)
        
# I bet if we nest the loops we could easily parallelize bematch_coordinates_skytween folders ;) 
# That can come later if necessary, it probably won't be. 

# Also parallel https://stackoverflow.com/questions/20548628/how-to-do-parallel-programming-in-python
        
#print(all_trans_matches)
#cur.close()
#tiles_path="/global/project/projectdirs/desi/spectro/redux/daily/tiles"
#run_path="/global/u2/p/palmese/desi/timedomain/cronjobs/"
#td_path="/global/cfs/cdirs/desi/science/td/daily-search/"
#mapfile -t -d $'\n' obsdates_tileids < <( sqlite3 ${td_path}transients_search.db "$query" )

In [1]:
def closer_check(matches_dict = {}, ledger_df = None, ledger_type = '', exclusion_list = []):
    all_exp_matches = {}
    #already_checked = []
    
    # easier way may be to query the sql table again and pop in the necessary information down the line
    # Just a thought for cleanup time ;) 
    
    if not matches_dict:
        print("No far matches fed in for nearby matching. Returning none.")
        return {}
    
    if ledger_type.upper() == 'DECAM_TAMU':
        
        id_head = 'ObjectID'
        ra_head = 'RA-OBJECT'
        dec_head = 'DEC-OBJECT'
        
        if ledger_df.empty:
            ledger_df = access_decam_data('https://datahub.geos.tamu.edu:8000/decam/LCData_Legacy/')
    
    for date, row in matches_dict.items(): 
        print("\n", date)
        if date in exclusion_list:
            continue
    #date = 20210404
    #for row in [alert_matches_dict[date]]:

        all_exp_matches[date] = []
        alert_exp_matches = []
        file_indices = {}

        all_targ_ras = np.array([])
        all_targ_decs = np.array([])

        for i in row:
            exp_paths = '/'.join((exposure_path, "daily/exposures", str(i['obsdate']), "000"+str(i['expid'])))
            #print(exp_paths)
            #all_exp_fits[date].extend()
            for path in glob_frames(exp_paths):
                #print(path)
                targ_ras, targ_decs, _ = read_fits_ra_dec(path, False)

                all_len = len(all_targ_ras)
                new_len = len(targ_ras)
                if all_len:
                    all_len -= 1
                    file_indices[path] = (all_len, all_len + new_len) # The start and end index, modulo number
                else:
                    file_indices[path] = (0, new_len) # The start and end index, modulo number

                if len(targ_ras) != len(targ_decs):
                    print("Length of all ras vs. all decs do not match.")
                    print("Something went wrong!")
                    print("Continuing but not adding those to match...")
                    continue

                all_targ_ras = np.append(all_targ_ras, targ_ras)
                all_targ_decs = np.append(all_targ_decs, targ_decs)

        date_mjd = str(date)[:4]+"-"+str(date)[4:6] + "-" + str(date)[6:] # Adding dashes for Time
        date_mjd = Time(date_mjd).mjd
        
        if ledger_type.upper() == 'ALERCE':
            
            id_head = 'oid'
            ra_head = 'meanra'
            dec_head = 'meandec'
            
            if ledger_df.empty:
                ledger_df = access_alerts(lastmjd_in=obs_mjd-28) # Modified Julian Day #.mjd
        
        alert_exp_matches, alerts_matches = decam_matching(all_targ_ras, all_targ_decs, date_mjd, '', max_sep = 2, sep_units = 'arcsec', ledger_df_in = ledger_df, ledger_type_in = ledger_type)

        #print(alert_exp_matches)
        for match_idx in range(len(alert_exp_matches)):
            match_ra = alert_exp_matches[match_idx].ra.deg
            match_dec = alert_exp_matches[match_idx].dec.deg
            
            location = np.where(match_ra == all_targ_ras)[0][0] # VERY unlikely to have a duplicate in the RA, I think this is safe
            
            alert_ra = alerts_matches[match_idx].ra.deg
            alert_dec = alerts_matches[match_idx].dec.deg
            
            # From meanra column, match ra, then grab the location, specify the key 'oid', grab the values from that 'series'
            # should only have one match so we can grab the first
            ledger_ID = ledger_df.loc[ledger_df[ra_head] == alert_ra][id_head].values[0] # I think this too is safe           
            
            #print(loc)
            for k, v in file_indices.items():
                if location in range(v[0], v[1]):
                    # filepath, (ra,dec) for match, loc + 1 because fits indexing starts at 1, Ledger ID, (RA, DEC) for ledger table
                    match_info = (k.split("/")[-1], (match_ra, match_dec), (loc + 1) % (v[1] - v[0]), ledger_ID, (alert_ra, alert_dec)) 
                    # Since it's a pain to retrieve the index from the table, it'll be easier to match it after the fact with np.where
                    if match_info not in all_exp_matches[date]:
                        all_exp_matches[date].append(match_info)
            
    return all_exp_matches

# For testing
for k, v in file_indices.items():
    if loc in range(v[0], v[1]):
        print()
        # filepath, (ra,dec) for match, loc + 1 because fits indexing starts at 1, (RA, DEC) for alert table
        match_info = (k.split("/")[-1], (match_ra, match_dec), (loc + 1) % (v[1] - v[0]), (alert_ra, alert_dec)) 
        # Since it's a pain to retrieve the index from the table, it'll be easier to match it after the fact with np.where
        #all_exp_matches[date].append(match_info)

In [48]:
obs_mjd = Time("2021-04-04").mjd
alerts = access_alerts(lastmjd_in=obs_mjd-28)
#print(alerts)
#alerts_ra = alerts['meanra'].to_numpy()
#alerts_dec = alerts['meandec'].to_numpy()

# Worry about accessing the data frame proper later, this is fine for now.
#print(np.where(alerts_ra == match_info[-1][-2]))
#print(np.where(alerts_dec == match_info[-1][-1]))
#print(alerts_ra[298])
#print(alerts_dec[298])
#print(alerts_dec)

TypeError: access_alerts() got an unexpected keyword argument 'lastmjd'

In [None]:
print(alert_ra)
print(alert_dec)
alerts.columns.values
#list(alerts.index.values)
alerts.iloc[np.where(pd.Index(alerts["meanra"]) == alert_ra)[0][0]] # grab row from alerts

In [None]:
#write_matches_to_file(query_date_start, query_date_end, all_exp_matches)

In [19]:
def read_matches_file(filename: str) -> dict:
    info_dict = {}
    try:
        with open(filename) as f:
            all_lines = f.readlines()[2:] # Don't need header keywords - they're just there for humans (darned humans)
    
    except:
        print("Could not open or read:", filename)
#        print("Trying the next file...")
        return info_dict
        
    for line_idx in range(len(all_lines)):
        #try:
            #_ = int(all_lines[line_idx][:-1]) # to exclude ":"
        if ":" in all_lines[line_idx]: # If even and 0
            date = all_lines[line_idx].split(':')[0] # Gets rid of ':' and newline character
            info_dict[date] = {}
        else:
            data = all_lines[line_idx].lstrip('\t').rstrip('\n').replace(" ", "").split(';')
            info_dict[date][data[0]] = data[1:] #.extend(data)
    
    return info_dict # now a dict of dicts

alert_matches_dict_2 = {i['obsdate']: [] for i in matches_list}

for tup in all_trans_matches[0]:
    ra = tup.ra.deg
    dec = tup.dec.deg
    print((ra,dec))
    for date, row in date_dict.items():   
        match_rows = [i for i in row if (i['tilera'], i['tiledec']) == (ra,dec)]
        alert_matches_dict_2[date].extend(match_rows)
print(alert_matches_dict_2)

#index = {k['obsdate'] : list(filter(lambda x:x in k, k)) for k in matches_list}
index = {k['obsdate'] : list(filter(lambda x:x['obsdate'] == k['obsdate'], matches_list)) for k in matches_list}
#print(len(matches_list))
#print(list(index.keys()))
tot = 0
for obj,val in index.items():
    for i in val:
        print(i[:], end = ", ")
    tot += len(index[obj])
    print()
    
print(tot)
#print(index)

# https://stackoverflow.com/questions/11276473/append-to-a-dict-of-lists-with-a-dict-comprehension

~~Match pointing (center of DESI circle, 1.8 deg radius)
RA, DEC of pointing in cframe files~~

~~Once match, run through CNN
obsdate, tilenumber then runs on all fibers
Force a result for those fiber(s) where match
Grab coadd file on a match and then feed that to CNN (will ask for help)~~

~~Notes for running the classifier 
Looks for broad spectrum as opposed to narrow emission line since we're looking at \*novae - blackbody
zbest has fibermap???
Can remove (From py or ipynb)                 # Apply standard event selection.
                isTGT = fibermap['OBJTYPE'] == 'TGT'
                isGAL = zbest['SPECTYPE'] == 'GALAXY'
Just look using fibernumber/fiberid (select = one fiber)
idx = selects max across all categories for whole sample so just change that 
For applygradcam to one spectrum - rsflux[specific_index, :]
https://github.com/desihub/timedomain/blob/master/desitrip/docs/nb/cnn_classify_data_gradCAM.ipynb~~


                


DECAM transients - https://github.com/desihub/timedomain/blob/master/too_ledgers/decam_ledgermaker.ipynb
Transient name server reports only most interesting transients (brokers have more info)
need RA-OBJECT and DEC-OBJECT and Discovery-Time (changes to be pushed by Antonella), it's a dictionary

In [12]:
def decam_matching(target_ras_in = np.array([]), target_decs_in = np.array([]), obs_mjd_in = '', path_in = '', max_sep = 2, sep_units = 'arcsec', ledger_df_in = None, ledger_type_in = ''): # to be combined with the other matching thing in due time
    
    if sep_units == 'arcsec':
        max_sep *= u.arcsec
    elif sep_units == 'arcmin':
        max_sep *= u.arcmin
    elif sep_units == 'deg':
        max_sep *= u.deg
    else:
        print("Separation unit specified is invalid for matching. Defaulting to arcsecond.")
        max_sep *= u.arcsec
        
    if not np.array(target_ras_in).size:
        return np.array([]), np.array([])
    
    nan_ra = np.isnan(target_ras_in)
    nan_dec = np.isnan(target_decs_in)
    
    if np.any(nan_ra) or np.any(nan_dec):
        print("NaNs found, removing them from array (not FITS) before match.")
        #print("Original length (ra, dec): ", len(target_ras), len(target_decs))
        nans = np.logical_not(np.logical_and(nan_ra, nan_dec))
        target_ras_in = target_ras_in[nans] # Logic masking, probably more efficient
        target_decs_in = target_decs_in[nans]
        #print("Reduced length (ra, dec):", len(target_ras), len(target_decs))
        #print(np.where(np.isnan(target_ras) == True))
        #print(target_ras[:100])
        #print(np.where(np.isnan(target_decs) == True))
        #target_ras = np.delete(target_ras, nans)
        #target_decs = np.delete(target_decs, nans)
    
    #alerts = access_alerts(lastmjd=obs_mjd-28) # Modified Julian Day #.mjd
    # Write function to decide if alerce or DECAM
    tree_name = "_".join(("kdtree", ledger_type_in, str(obs_mjd_in - 28)))
    
    if ledger_type_in.upper() == 'DECAM_TAMU':
        ra_head = 'RA-OBJECT'
        dec_head = 'DEC-OBJECT'
    
    elif ledger_type_in.upper() == 'ALERCE':
        ra_head = 'meanra'
        dec_head = 'meandec'
        
    else:
        print("No ledger type specified. Will try to figure it out assuming it's a pandas dataframe.")
        print("Returning empty-handed for now until that is complete - Matthew")
        return np.array([]), np.array([])
        #try: -- Do this later if necessary, try to find the right column name by searching for RA/DEC and then plug that in.
        # To account for columns with RA and DEC in there that aren't what we need, compare the RA and DEC strings and look for the shortest
        # In hopes that longer ones are for calibration and whatnot
        #    if 'ra' or 'RA' in ledger_df.columns... something along these lines
    
    alerts_ra = ledger_df_in[ra_head].to_numpy()
    alerts_dec = ledger_df_in[dec_head].to_numpy()

    coo_trans_search = SkyCoord(target_ras_in*u.deg, target_decs_in*u.deg)
    coo_alerts = SkyCoord(alerts_ra*u.deg, alerts_dec*u.deg)

    idx_alerts, d2d_trans, d3d_trans = match_coordinates_sky(coo_trans_search, coo_alerts, storekdtree = tree_name) # store tree to speed up subsequent results

    sep_constraint = d2d_trans < max_sep
    trans_matches = coo_trans_search[sep_constraint]
    alerts_matches = coo_alerts[idx_alerts[sep_constraint]]
    
    if trans_matches.size:
        sort_dist = np.sort(d2d_trans)
        print("Minimum distance found: ", sort_dist[0])
        #print("5 closest (in case there's more than one): ", sort_dist[:5])
        #print()

    #if trans_matches.size:
        #all_trans_matches.append(trans_matches)
        #all_alerts_matches.append(alerts_matches)

    return trans_matches, alerts_matches

In [51]:
# Need to figure out a way to put in username and password via Requests
decam_transients = access_decam_data('https://datahub.geos.tamu.edu:8000/decam/LCData_Legacy/', overwrite = False) # If True, grabs a fresh batch

In [52]:
decam_transients

Unnamed: 0,ObjectID,RA-OBJECT,DEC-OBJECT,NumberAlerts,MaxSCORE,RA-PSEUDO-HOST,DEC-PSEUDO-HOST,SEP-PSEUDO-HOST,RA-NEIGHBOR-STAR,DEC-NEIGHBOR-STAR,...,Discovery-Round,Discovery-Time,Discovery-Filter,Discovery-Magnitude,Discovery-SNR,Latest-Round,Latest-Time,Latest-Filter,Latest-Magnitude,Latest-SNR
0,A202103221407558m001825,211.982786,-0.306951,12,0.972,211.982614,-0.306946,0.6199,211.983372,-0.306315,...,0,2021-03-22T06:40:19.074,N,22.13,19.2,9,2021-04-18T05:37:55.763,N,22.86,10.2
1,A202103221408139m033502,212.057952,-3.583947,26,0.953,212.057864,-3.583960,0.3199,212.058798,-3.586276,...,0,2021-03-22T08:20:58.209,N,21.78,25.7,18,2021-05-18T06:16:52.581,N,22.10,13.9
2,A202103221408412p002445,212.171737,0.412527,47,0.998,212.171673,0.412394,0.5317,212.174697,0.411566,...,0,2021-03-22T06:36:50.928,S,20.61,36.2,19,2021-05-22T03:10:29.715,S,21.53,16.5
3,A202103221408578m005300,212.241200,-0.883300,2,0.855,212.241200,-0.883400,0.3000,212.239800,-0.884900,...,0,2021-03-22T08:17:30.880,S,22.55,15.5,1,2021-03-24T07:10:51.368,S,22.33,25.6
4,A202103221409059m023156,212.274757,-2.532478,21,0.969,212.274533,-2.532531,0.8290,212.275356,-2.535003,...,0,2021-03-22T08:14:02.747,N,22.27,10.6,18,2021-05-18T03:17:17.544,N,22.99,8.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,T202105301405547p043407,211.477939,4.568626,2,0.881,211.473443,4.568954,16.1787,211.477656,4.568650,...,22,2021-05-30T02:59:57.315,N,20.73,10.4,22,2021-05-30T04:39:26.875,N,22.46,12.7
526,T202105301432117p040917,218.048953,4.154764,2,0.480,218.032649,4.182142,114.6331,218.047316,4.153485,...,22,2021-05-30T05:38:41.744,S,21.74,10.8,22,2021-05-30T06:16:17.835,S,21.69,10.3
527,T202105301434113p034128,218.547346,3.691341,2,0.702,218.549157,3.689940,8.2327,218.547538,3.691805,...,22,2021-05-30T05:38:41.744,S,21.04,23.9,22,2021-05-30T06:16:17.835,S,21.19,18.7
528,T202105301452527p011841,223.219769,1.311619,3,0.997,223.218727,1.310085,6.6763,223.219019,1.313305,...,22,2021-05-30T05:17:31.450,N,20.18,38.6,22,2021-05-30T06:32:46.888,N,20.25,32.6


In [28]:
#decam_transients.loc[decam_transients["RA-OBJECT"]]
#decam_transients.loc[decam_transients["RA-OBJECT"] == 211.98278629]["ObjectID"].values[0] 
print(sorted(decam_transients["ObjectID"].values)[-10:])
#print(np.isin('T20210530', decam_transients))
#decam_transients.loc[decam_transients["RA-OBJECT"] == 211.98278629]["ObjectID"].values[0] 

['T202104271211352p000120', 'T202104271213064p001246', 'T202104271217522m005428', 'T202104291421131p004942', 'T202105031359093p063735', 'T202105031415178p020156', 'T202105031422035p013900', 'T202105031435063p004233', 'T202105031441256p023213', 'T202105031451333m023945']


In [53]:
init_matches_by_date = initial_check(ledger_df = decam_transients, ledger_type = 'DECAM_TAMU')
#matches, alert_matches = decam_matching(target_ras = [], target_decs = [], obs_mjd = '', path_in = '', max_sep = 5, sep_units = 'arcsec', ledger_df = [], ledger_type = '')

Minimum distance found:  0d03m16.5992s
Minimum distance found:  0d03m16.5992s
Minimum distance found:  0d03m16.5992s
Minimum distance found:  0d03m16.5992s
Minimum distance found:  0d03m47.5151s
Minimum distance found:  0d03m16.5992s
Minimum distance found:  0d03m16.5992s
Minimum distance found:  0d03m16.5992s
Minimum distance found:  0d03m47.5151s
Minimum distance found:  0d05m14.6746s
Minimum distance found:  0d08m46.5863s
Minimum distance found:  0d08m29.2662s
Minimum distance found:  0d10m54.8542s
Minimum distance found:  0d06m05.315s
Minimum distance found:  0d07m58.4153s
Minimum distance found:  0d07m00.7989s
Minimum distance found:  0d06m10.268s
Minimum distance found:  0d00m48.7596s
Minimum distance found:  0d10m17.9326s
Minimum distance found:  0d00m48.7596s
Minimum distance found:  0d00m48.7596s
Minimum distance found:  0d06m10.268s
Minimum distance found:  0d11m07.234s
Minimum distance found:  0d13m05.3889s
Minimum distance found:  0d13m05.7518s
Minimum distance found:  0d11

In [54]:
exclusion_list = [20210101, 20210115, 20210205, 20210208, 20210217, 20210218, 20210402, 20210411, 20210428, 20210530]
close_matches = closer_check(init_matches_by_date, ledger_df = decam_transients, ledger_type = 'DECAM_TAMU', exclusion_list = exclusion_list)


 20210101

 20210115

 20210205

 20210208

 20210217

 20210218

 20210221
Minimum distance found:  0d00m00.4727s

 20210322
Minimum distance found:  0d00m00.4727s

 20210402

 20210405
NaNs found, removing them from array (not FITS) before match.
Minimum distance found:  0d00m00.2082s

 20210406
NaNs found, removing them from array (not FITS) before match.
Minimum distance found:  0d00m00.3198s

 20210407
NaNs found, removing them from array (not FITS) before match.
Minimum distance found:  0d00m00.3034s

 20210408
NaNs found, removing them from array (not FITS) before match.
Minimum distance found:  0d00m00.5481s

 20210409
NaNs found, removing them from array (not FITS) before match.
Minimum distance found:  0d00m00.3075s

 20210410
NaNs found, removing them from array (not FITS) before match.
Minimum distance found:  0d00m00.2082s

 20210411

 20210412
NaNs found, removing them from array (not FITS) before match.
Minimum distance found:  0d00m00.3375s

 20210413
NaNs found, remov

In [55]:
#print({k:v for k, v in close_matches.items() if v})
#tot = 0
#for date, val in close_matches.items():
    #close_matches[date] = list(set(val))
#    tot += len(val)
#print(tot)
#print(len(close_matches.values()))
smushed_YMD = today.iso.split(" ")[0].replace("-","")
write_matches_to_file("20201130", smushed_YMD, close_matches, "DECAM_TAMU")

In [29]:
file_list = glob.glob("./matches/*.txt")
file_dict = read_matches_file(max(file_list, key=os.path.getctime))
#print(file_dict)

query_template = "SELECT distinct obsdate, tileid from exposures where expid == " #obsdate>20210228 

#query2 = "PRAGMA table_info(exposures)"

#cur.execute(query2)
#row2 = cur.fetchall()
#for i in row2:
#    print(i[:])

conn = sqlite3.connect(db_filename)
conn.row_factory = sqlite3.Row # https://docs.python.org/3/library/sqlite3.html#sqlite3.Row
cur = conn.cursor()

info_list = []
info_list_w_dup = []
exp_id_triplets = []

for date, v_dict in file_dict.items():
    for filename, info in v_dict.items():
        #print(v)
        exp_id = filename.strip("\t").split('-')[-1][3:-5] # [:-5] to avoid retained '.fits' at end
        row = info[1]
        #print(v)
        #print(exp_id)
        petal_num = filename.split("-")[1][1] # First split "cframe, [color_band][petal_num], [exp_id].fits"
        query = query_template + exp_id + ";"

        cur.execute(query)
        tile_id = cur.fetchone()['tileid']
        
        #target_id = cur.fetchone()['targetid'] # Grab targetid from cframe file row specified in file

        #print(date, tile_id, row)
        # See if you can't grab the targetid in modified_cnn_classify while opening zbest using info
        # from matches_decam file since we're already accessing zbest
        # Check header of zbest files per cframe name/expid/tileid for targetid
        # save yourself the trouble ;)

        #print('python3 cnn_classify_data.py -d {} -t {} -g'.format(date, tile_id))

        #coadd_filename = "-".join(("coadd", petal_num, str(row_data['tileid']), date)) + ".fits"
        zbest_filename = "-".join(("zbest", petal_num, str(tile_id), date)) + ".fits"
        #print(tile_id, date, petal_num, row)
        info_list_w_dup.append((tile_id, date, petal_num, row))
        if (tile_id, petal_num, row) in exp_id_triplets:
            pass
        else:
            info_list.append((tile_id, date, petal_num, row))
            exp_id_triplets.append((tile_id, petal_num, row))
    
    #coadd_filepath = '/'.join((exposure_path, "daily/tiles", str(row_data['tileid']), date, coadd_filename)) #coadd-7-81088-20210404.fits
    #zbest_filepath = '/'.join((exposure_path, "daily/tiles", str(row_data['tileid']), date, zbest_filename)) #zbest-7-81088-20210404.fits
    
    # Next up - figure out how to feed this to CNN!
    #print(coadd_filepath)

    #with fits.open(coadd_filepath) as hdu1:
        #data_table = Table(hdu1[hdu_num].data) #columns

    #targ_id = data_table['TARGETID']
    #targ_ra = data_table['TARGET_RA'].data # Now it's a numpy array
    #targ_dec = data_table['TARGET_DEC'].data
    #targ_mjd = data_table['MJD'][0] some have different versions of this so this is a *bad* idea... at least now I know the try except works!
cur.close()
print(len(info_list))
print(len(info_list_w_dup))

278
336


In [36]:
with open('cnn_feed.txt', 'w') as f:    
    for i in info_list:
        f.write(str(i).strip("()").replace("'", ""))
        f.write('\n')

In [None]:
# notes to self - double matches are to be expected, could be worthwhile to compare the spectrum to both
# next time setup pipeline to find these individual spectra or at least lists or something to export to other ipynb to then run as a loop
# may be tough because... you know... ipynb... we'll see!