# Create Nightly Epoch Lightcurves

Author: Melissa Graham

Create nightly-epoch lightcurves for "good" candidates, for **ALL** of the DDF not just 2021.

We start by getting rid of all objects with a R/B score < 0.1, because there's definitely a locus down there 

"Good" means at least 10 objects (detections in any filter) and a mean real-bogus (mrb) score > 0.4 for all objects.

We also make a "lonely epoch" flag for any epochs with a mrb < 0.4 **AND** for which
there is no epoch with mrb > 0.4 within 14 days. These are more likely to be spurious
coincidences with artifacts and we don't want to include these epochs in our
lightcurve summary paramters like time span or amplitude.

Create files that contain the lightcurves and the summary parameters.

Use these files as a starting sample for transient science with the DECam deep drilling field data.

Create output files in the same format as made by `candidate_nightly_epochs.ipynb`:
 * candidate_lightcurves.dat
 * candidate_lightcurve_parameters.dat 

## 0. Set up

Import packages and connect to database.

In [None]:
import psycopg2
import psycopg2.extras
import getpass
import pandas

import os
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from astropy.time import Time

from copy import deepcopy
import time

By semester.

In [None]:
semesters = ['21A', '21B', '22A', '22B', '23A']
sem_dates_isot = ['2021-02-01T00:00:00',
                  '2021-08-01T00:00:00',
                  '2022-02-01T00:00:00',
                  '2022-08-01T00:00:00',
                       '2023-02-01T00:00:00',
             '2023-08-01T00:00:00']
sem_dates = Time(sem_dates_isot, format='isot', scale='utc')

In [None]:
# temp = Time([59400,59401,59401.2,59401.3], format='mjd')
# temp2 = temp.isot
# temp3 = []
# for i in temp2:
#     temp3.append(i[0:10])
# temp3

User decat_ro, in order to access `versiontags`.

In [None]:
dbuser = input("DB User: ")
dbpasswd = getpass.getpass("DB Password: ")
db = psycopg2.connect(f"dbname='decat' user='{dbuser}' password='{dbpasswd}' host='decatdb.lbl.gov'")

In [None]:
db.autocommit = True
cursor = db.cursor( cursor_factory = psycopg2.extras.DictCursor )

If you want to print table schema.

In [None]:
# tables = ['versiontags','exposures','subtractions','images',\
#           'objects','objectrbs','objectdatas','objectdata_versiontag','candidates']
# for table in tables:
#     query = "SELECT column_name, data_type FROM information_schema.columns WHERE table_name=%s"
#     cursor.execute( query, ( table, ))
#     print( f"\nTABLE: {table}\n===========================" )
#     for row in cursor:
#         print( f"{row['column_name']:24s}  :  {row['data_type']:s}" )

Things in the database are tagged with versions.
This is because we might redo something, or we might try different subtraction algorithms.
This might change in the future, but, at the moment everything that's in the database should be tagged with the "latest" tag, which means it was the last thing saved to the database.

In [None]:
tag = "latest"
db.rollback()
q = ( "SELECT id, tag FROM versiontags WHERE tag=%(tag)s" )
cursor.execute( q, { "tag": tag } )
row = cursor.fetchone()
tagid = row['id']
print(tagid)

## 1. Query

Get all the objects in ELAIS and COSMOS fields.

This takes about a minute.

In [None]:
%%time
q = ("SELECT od.ra, od.dec, od.mag, od.magerr, rbs.rb, "
     "i.filter, i.meanmjd, o.candidate_id, e.proposalid "
     "FROM objectdatas AS od "
     "INNER JOIN objectdata_versiontag AS odvt "
     "ON od.id=odvt.objectdata_id AND odvt.versiontag_id=%(tagid)s "
     "INNER JOIN objects AS o ON od.object_id=o.id "
     "INNER JOIN images AS i ON o.image_id=i.id "
     "INNER JOIN objectrbs as rbs ON od.id=rbs.objectdata_id AND rbs.rbtype_id=2 "
     "INNER JOIN exposures AS e ON i.exposure_id=e.id "
     "WHERE ((od.ra > 147.0 AND od.ra < 153.0 AND od.dec > -0.25 AND od.dec < 5) "
     "OR (od.ra > 5.0 AND od.ra < 12.0 AND od.dec > -46 AND od.dec < -41)) ")
cursor.execute(q, {'tagid': tagid})
df = pandas.DataFrame(cursor.fetchall())

Print total number of objects.

In [None]:
print(len(df))

Put the objects into numpy arrays.

In [None]:
raw_obj_ra     = np.asarray(df[0], dtype='float')
raw_obj_dec    = np.asarray(df[1], dtype='float')
raw_obj_mag    = np.asarray(df[2], dtype='float')
raw_obj_mage   = np.asarray(df[3], dtype='float')
raw_obj_rb     = np.asarray(df[4], dtype='float')
raw_obj_filt   = np.asarray(df[5], dtype='str')
raw_obj_mjd    = np.asarray(df[6], dtype='float')
raw_obj_candid = np.asarray(df[7], dtype='str')
raw_obj_propid = np.asarray(df[8], dtype='str')
del df

### 1.2. Explore object properties 2021-2023

In [None]:
plt.hist(raw_obj_mjd, bins=100)
for x in [0, 1, 2, 3, 4, 5]:
    plt.axvline(sem_dates[x].mjd, color='magenta')
for x in [0, 1, 2, 3, 4]:    
    plt.text(sem_dates[x].mjd, 270000, semesters[x])
plt.axvline(Time.now().mjd, color='red')
plt.xlabel('MJD')
plt.ylabel('# Objects')
plt.title('COSMOS & ELAIS')
plt.savefig('all_candidate_nightly_epochs_files/cnelc_plot1')
plt.show()

In [None]:
fig, ax = plt.subplots(5, 2, figsize=(10, 15), sharex=True)
for x in [0, 1, 2, 3, 4]:
    tx = np.where((raw_obj_mjd > sem_dates[x].mjd) & (raw_obj_mjd <= sem_dates[x+1].mjd))[0]
    ax[x, 0].plot(raw_obj_mag[tx], raw_obj_rb[tx], 'o', ms=2, alpha=0.2, mew=0, color='grey', 
                  label=semesters[x]+' (N='+str(len(tx))+')')
    ax[x, 0].axhline(0.1, color='black')
    ax[x, 0].set_ylabel('rb')
    ax[x, 0].legend(loc='upper right')
    ax[x, 1].plot(raw_obj_mag[tx], raw_obj_mage[tx], 'o', ms=2, alpha=0.2, mew=0, color='grey')
    ax[x, 1].set_ylim([0.0,0.5])
    ax[x, 1].set_ylabel('mag e')
ax[4, 0].set_xlabel('mag')
ax[4, 1].set_xlabel('mag')
plt.savefig('all_candidate_nightly_epochs_files/cnelc_plot2')

## 2. Identify good candidates only

The unique candidates for these objects.

In [None]:
values, indices, inverse, counts = np.unique(raw_obj_candid, return_index=True, return_inverse=True,  return_counts=True)

In [None]:
print('len(values), len(indices), len(inverse), len(counts)', len(values), len(indices), len(inverse), len(counts))
print(' ')
print('On average, %4.2f objects per candidate' % (len(raw_obj_candid)/len(values)))

In [None]:
hist = plt.hist(np.log10(counts), bins=100, log=True)
plt.show()

In [None]:
print(hist[0][0])

In [None]:
dx = np.where(counts[inverse] == 1)[0]
print(len(dx))

<br> 

Delete all objects with R/B < 0.1, as that is clearly a locus of "BAD".

Delete all objects of candidates with less than 10 objects.

Making the object arrays smaller speeds up the processing later on.

In [None]:
dx = np.where((counts[inverse] < 10) | (raw_obj_rb < 0.1))[0]
print(len(dx))

In [None]:
obj_ra     = np.delete(raw_obj_ra, dx)
obj_dec    = np.delete(raw_obj_dec, dx)
obj_mag    = np.delete(raw_obj_mag, dx)
obj_mage   = np.delete(raw_obj_mage, dx)
obj_rb     = np.delete(raw_obj_rb, dx)
obj_filt   = np.delete(raw_obj_filt, dx)
obj_mjd    = np.delete(raw_obj_mjd, dx)
obj_candid = np.delete(raw_obj_candid, dx)
obj_propid = np.delete(raw_obj_propid, dx)
del dx
del values, indices, counts

In [None]:
values, indices, inverse, counts = np.unique(obj_candid, return_index=True, return_inverse=True, return_counts=True)

In [None]:
print('len(values), len(indices), len(inverse), len(counts)', len(values), len(indices), len(inverse), len(counts))
print(' ')
print('On average, %4.2f objects per candidate' % (len(obj_candid)/len(values)))

In [None]:
plt.hist(np.log10(counts), bins=100, log=True)
plt.show()

Calculate the mean real-bogus for all remaining candidates.

In [None]:
cand_id = values
cand_nob = counts
cand_mrb = np.zeros(len(cand_id), dtype='float')

This takes about 10 minutes.

In [None]:
%%time
t0 = time.time()
for c, cid in enumerate(cand_id):
    if (c == 100) | (c == 1000) | (c == 10000):
        t1 = time.time()
        print(c, t1 - t0, 'sec')
    cx = np.where(obj_candid == cid)[0]
    cand_nob[c] = len(cx)
    cand_mrb[c] = np.nanmean(obj_rb[cx])
    del cx

Delete the candidates for which the mean real-bogus score is <0.4.

Also delete any candidates that have < 10 objects (now that the R/B<0.1 objects are gone).

In [None]:
dx = np.where((cand_mrb < 0.4) | (cand_nob < 10))[0]
print(len(dx))

In [None]:
cand_ids = np.delete(cand_id, dx)
cand_nobjs = np.delete(cand_nob, dx)
cand_meanrb = np.delete(cand_mrb, dx)

In [None]:
print(len(cand_ids))

## 3. Make the lightcurve files for good objects

Epochs with mean real-bogus 0.1 to 0.4 **ARE** included as points in the lightcurve file
**AND DO** contribute to the lightcurve parameters
**BUT NOT** if they are a lonely epoch.

Lonely epoch: mean r/b <0.4 and no other r/b > 0.4 detection within 14 days.

In [None]:
t1 = time.time()

temp_ne_candid = []
temp_ne_field = []
temp_ne_cal = []
temp_ne_mjd = []
temp_ne_fil = []
temp_ne_nobj = []
temp_ne_mag = []
temp_ne_mage = []
temp_ne_mrb = []

for c in range(len(cand_ids)):
    tally_ne = 0
    
    if (c == 10) | (c == 100) | (c == 1000):
        t2 = time.time()
        print(c, ((t2-t1)/float(c))*(float(len(cand_ids)-c)),' remain')
       
    cx = np.where(obj_candid == cand_ids[c])[0]
    
    if (np.mean(obj_ra[cx]) > 5.0) & (np.mean(obj_ra[cx]) < 12.0):
        field = 'ELAIS'
    else:
        field = 'COSMOS'
    
    mjds = obj_mjd[cx]
    temp = Time(mjds, format='mjd')
    temp2 = temp.isot
    temp3 = []
    for i in temp2:
        temp3.append(i[0:10])    
    ucals, indices = np.unique(temp3, return_index=True)
    umjds = mjds[indices]
    del temp, temp2, temp3, indices

    for d, mjd in enumerate(umjds):
        
        for f,fil in enumerate(['g','r','i']):
            fx = np.where((np.abs(obj_mjd[cx] - mjd) < 0.4) & 
                          (obj_filt[cx] == fil) & 
                          (np.isfinite(obj_mag[cx])))[0]
            
            if len(fx) >= 2:
                temp_ne_field.append(field)
                temp_ne_candid.append(cand_ids[c])
                temp_ne_cal.append(ucals[d])
                temp_ne_mjd.append(np.mean(obj_mjd[cx[fx]]))
                temp_ne_fil.append(fil)
                temp_ne_nobj.append(len(obj_mjd[cx[fx]]))
                temp_ne_mag.append(np.mean(obj_mag[cx[fx]]))
                temp_ne_mage.append(np.sqrt(np.mean(obj_mage[cx[fx]])**2 + \
                                            np.std(obj_mag[cx[fx]])**2))
                temp_ne_mrb.append(np.mean(obj_rb[cx[fx]]))
                tally_ne += 1
            
            elif len(fx) == 1:
                temp_ne_field.append(field)
                temp_ne_candid.append(cand_ids[c])
                temp_ne_cal.append(ucals[d])
                temp_ne_mjd.append(obj_mjd[cx[fx[0]]])
                temp_ne_fil.append(fil)
                temp_ne_nobj.append(1)
                temp_ne_mag.append(obj_mag[cx[fx[0]]])
                temp_ne_mage.append(obj_mage[cx[fx[0]]])
                temp_ne_mrb.append(obj_rb[cx[fx[0]]])
                tally_ne += 1
                
            del fx
    
    if tally_ne == 0:
        print('warning: ', cand_id[c], ' no nightly epochs with detections')
    del cx

ne_field  = np.asarray( temp_ne_field, dtype='str' )
ne_candid = np.asarray( temp_ne_candid, dtype='str' )
ne_nobj   = np.asarray( temp_ne_nobj, dtype='int' )
ne_mjd    = np.asarray( temp_ne_mjd, dtype='float' )
ne_cal    = np.asarray( temp_ne_cal, dtype='str' )
ne_fil    = np.asarray( temp_ne_fil, dtype='str' )
ne_mag    = np.asarray( temp_ne_mag, dtype='float' )
ne_mage   = np.asarray( temp_ne_mage, dtype='float' )
ne_mrb    = np.asarray( temp_ne_mrb, dtype='float' )

del temp_ne_field,temp_ne_candid,temp_ne_nobj,temp_ne_mjd,temp_ne_cal
del temp_ne_fil,temp_ne_mag,temp_ne_mage,temp_ne_mrb,tally_ne

t2 = time.time()
print('elapsed: ',t2-t1)

### 3.1. Create the "lonely" flag for low-R/B epochs

In [None]:
t1 = time.time()

ne_loneflag = np.zeros(len(ne_mrb), dtype='int')

for c, cand in enumerate(cand_ids):
    if (c == 10) | (c == 100) | (c == 1000):
        t2 = time.time()
        print(c, ((t2-t1)/float(c))*(float(len(cand_ids)-c)),' remain')
    for f, filt in enumerate(['g','r','i']):
        tx1 = np.where((ne_candid == cand) & (ne_fil == filt))[0]
        tx2 = np.where((ne_candid == cand) & (ne_fil == filt) & (ne_mrb > 0.4))[0]
        if len(tx1) == 1:
            if ne_mrb[tx1[0]] < 0.4:
                ne_loneflag[tx1[0]] = 1
        elif (len(tx1) > 1) & (len(tx2) == 0):
            for x1 in tx1:
                ne_loneflag[x1] = 1            
        elif (len(tx1) > 1) & (len(tx2) > 0):
            for i, x1 in enumerate(tx1):
                if ne_mrb[x1] < 0.4:
                    nearest_good = np.min(np.abs(ne_mjd[x1] - ne_mjd[tx2]))
                    if nearest_good > 14:
                        ne_loneflag[x1] = 1
                    del nearest_good
        del tx1, tx2

t2 = time.time()
print('elapsed: ',t2-t1)
del t1, t2

In [None]:
tx = np.where(ne_loneflag == 1)[0]
print('Number and fraction of lonely epochs: ', len(tx), len(tx)/len(ne_loneflag))

### 3.2. Write the nightly-epoch lightcurves to file

In [None]:
fnm = 'all_candidate_nightly_epochs_files/candidate_lightcurves.dat'
fout = open(fnm, 'w')

fout.write('# Melissa Graham, candidate_nightly_epochs.ipynb \n')
tnow = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
fout.write('# UTC = '+tnow+' \n')
del tnow
fout.write('# \n')
fout.write('# Columns \n')
fout.write('#  0 field  \n')
fout.write('#  1 id -- candidate identifier \n')
fout.write('#  2 calendar date \n')
fout.write('#  3 MJD \n')
fout.write('#  4 filter \n')
fout.write('#  5 number of objects combined \n')
fout.write('#  6 magnitude (mean of objects combined) \n')
fout.write('#  7 magnitude error \n')
fout.write('#  8 real/bogus (mean of objects combined) \n')
fout.write('#  9 lonely epoch flag \n')
fout.write('# \n')

for i in range(len(ne_candid)):
    fout.write('%-9s %-14s %-8s %12.6f '\
               '%1s %3i %6.3f %6.3f %6.4f %1i \n' % \
               (ne_field[i], ne_candid[i], ne_cal[i], ne_mjd[i], \
                ne_fil[i], ne_nobj[i], ne_mag[i], ne_mage[i], ne_mrb[i], ne_loneflag[i]) )

fout.close()
print('Wrote to: ',fnm)
del fnm

### 3.3. Calculate the lightcurve summary parameters and write to file

Recall that "lonely epochs" are not included in the calculation of the summary parameters.

In [None]:
t1 = time.time()

### timespan (last date - first date of detection)
nelc_tspan = np.zeros( len(cand_ids), dtype='float' )
nelc_tspan_g = np.zeros( len(cand_ids), dtype='float' )
nelc_tspan_r = np.zeros( len(cand_ids), dtype='float' )
nelc_tspan_i = np.zeros( len(cand_ids), dtype='float' )

### minimum magnitude (brightest detection)
nelc_minmag = np.zeros( len(cand_ids), dtype='float' )
nelc_minmag_g = np.zeros( len(cand_ids), dtype='float' )
nelc_minmag_r = np.zeros( len(cand_ids), dtype='float' )
nelc_minmag_i = np.zeros( len(cand_ids), dtype='float' )

### amplitude (maximum - minimum detection)
nelc_lcamp = np.zeros( len(cand_ids), dtype='float' )
nelc_lcamp_g = np.zeros( len(cand_ids), dtype='float' )
nelc_lcamp_r = np.zeros( len(cand_ids), dtype='float' )
nelc_lcamp_i = np.zeros( len(cand_ids), dtype='float' )

### number of epochs with detections
nelc_nde = np.zeros( len(cand_ids), dtype='int' )
nelc_nde_g = np.zeros( len(cand_ids), dtype='int' )
nelc_nde_r = np.zeros( len(cand_ids), dtype='int' )
nelc_nde_i = np.zeros( len(cand_ids), dtype='int' )

tempfield = []

### for every candidate
for c, candid in enumerate(cand_ids):
    if (c == 10) | (c == 100) | (c == 1000):
        t2 = time.time()
        print(c, ((t2-t1)/float(c))*(float(len(cand_ids)-c)),' remain')
        
    ### at first, skip the loneflag constraint to fill tempfield and check mag issues
    cx = np.where( (ne_candid == candid) & (ne_nobj >= 1) )[0]
    tempfield.append(ne_field[cx[0]])
    ### where the nobj >= 1, the mag should never be nan
    tx = np.where( np.isnan( ne_mag[cx] ) )[0]
    if len(tx) != 0:
        print('warning, ', candid, ' has a nobj >=1, mag=nan epoch')
    del tx

    ### but now, can apply the loneflag constraint if wanted
    # cx = np.where( (ne_candid == candid) & (ne_nobj >= 1) )[0]
    cx = np.where( (ne_candid == candid) & (ne_nobj >= 1) & (ne_loneflag == 0) )[0]
    if len(cx) > 0:
        temp = np.unique( ne_cal[cx] )
        nelc_nde[c] = len(temp)
        del temp
        nelc_tspan[c]  = np.max( ne_mjd[cx] ) - np.min( ne_mjd[cx] )
        nelc_minmag[c] = np.min( ne_mag[cx] )
        nelc_lcamp[c]  = np.max( ne_mag[cx] ) - np.min( ne_mag[cx] )
    del cx

    # gx = np.where( (ne_candid == candid) & (ne_nobj >= 1) & (ne_fil == 'g') )[0]
    # rx = np.where( (ne_candid == candid) & (ne_nobj >= 1) & (ne_fil == 'r') )[0]
    # ix = np.where( (ne_candid == candid) & (ne_nobj >= 1) & (ne_fil == 'i') )[0]
    gx = np.where( (ne_candid == candid) & (ne_nobj >= 1) & (ne_fil == 'g') & (ne_loneflag == 0) )[0]
    rx = np.where( (ne_candid == candid) & (ne_nobj >= 1) & (ne_fil == 'r') & (ne_loneflag == 0) )[0]
    ix = np.where( (ne_candid == candid) & (ne_nobj >= 1) & (ne_fil == 'i') & (ne_loneflag == 0) )[0]
    if len(gx) > 0:
        nelc_nde_g[c]    = len(gx)
        nelc_tspan_g[c]  = np.max( ne_mjd[gx] ) - np.min( ne_mjd[gx] )
        nelc_minmag_g[c] = np.min( ne_mag[gx] )
        nelc_lcamp_g[c]  = np.max( ne_mag[gx] ) - np.min( ne_mag[gx] )
    if len(rx) > 0:
        nelc_nde_r[c]    = len(rx)
        nelc_tspan_r[c]  = np.max( ne_mjd[rx] ) - np.min( ne_mjd[rx] )
        nelc_minmag_r[c] = np.min( ne_mag[rx] )
        nelc_lcamp_r[c]  = np.max( ne_mag[rx] ) - np.min( ne_mag[rx] )
    if len(ix) > 0:
        nelc_nde_i[c]    = len(ix)
        nelc_tspan_i[c]  = np.max( ne_mjd[ix] ) - np.min( ne_mjd[ix] )
        nelc_minmag_i[c] = np.min( ne_mag[ix] )
        nelc_lcamp_i[c]  = np.max( ne_mag[ix] ) - np.min( ne_mag[ix] )
    del gx,rx,ix

cand_field = np.asarray(tempfield, dtype='str')
del tempfield

t2 = time.time()
print('elapsed: ',t2-t1)

In [None]:
fnm = 'all_candidate_nightly_epochs_files/candidate_lightcurve_parameters.dat'
fout = open(fnm, 'w')

fout.write('# Melissa Graham, candidate_nightly_epochs.ipynb \n')
tnow = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
fout.write('# UTC = '+tnow+' \n')
del tnow
fout.write('# \n')
fout.write('# Columns \n')
fout.write('#  0 field  \n')
fout.write('#  1 id -- candidate identifier \n')
fout.write('#  2 timespan (days between first and last detection) \n')
fout.write('#  3 timespan in g \n')
fout.write('#  4 timespan in r \n')
fout.write('#  5 timespan in i \n')
fout.write('#  6 minimum magnitude (not necessarily the peak) \n')
fout.write('#  7 minimum magnitude g \n')
fout.write('#  8 minimum magnitude r \n')
fout.write('#  9 minimum magnitude i \n')
fout.write('# 10 amplitude (magnitudes between brightest and faintest detection) \n')
fout.write('# 11 amplitude g \n')
fout.write('# 12 amplitude r \n')
fout.write('# 13 amplitude i \n')
fout.write('# 14 number of epochs (number of unique nights detected) \n')
fout.write('# 15 number of epochs in g \n')
fout.write('# 16 number of epochs in r \n')
fout.write('# 17 number of epochs in i \n')
fout.write('# \n')

for i in range(len(cand_ids)):
    fout.write('%-8s %-14s %7.2f %7.2f %7.2f %7.2f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %3i %3i %3i %3i \n' % \
               (cand_field[i], cand_ids[i], \
                nelc_tspan[i], nelc_tspan_g[i], nelc_tspan_r[i], nelc_tspan_i[i], \
                nelc_minmag[i], nelc_minmag_g[i], nelc_minmag_r[i], nelc_minmag_i[i], \
                nelc_lcamp[i], nelc_lcamp_g[i], nelc_lcamp_r[i], nelc_lcamp_i[i], \
                nelc_nde[i], nelc_nde_g[i], nelc_nde_r[i], nelc_nde_i[i] ) )

fout.close()
print('Wrote to: ',fnm)
del fnm