In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pyarrow.parquet as pq
import pandas as pd
from rich import print
import warnings
import datetime
from astropy.time import Time
import os

In [2]:
def counts2mag(cps, band):
    scale = 18.82 if band == 'FUV' else 20.08
    with np.errstate(invalid='ignore'):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            mag = -2.5 * np.log10(cps) + scale
    return mag

In [3]:
def angularSeparation(ra1, dec1, ra2, dec2):

    d2r = np.pi/180.
    ra2deg = 1./d2r

    d1 = dec1*d2r
    d2 = dec2*d2r

    r1 = ra1*d2r
    r2 = ra2*d2r

    a = np.sin((d2-d1)/2.)**2.+np.cos(d1)*np.cos(d2)*np.sin((r2-r1)/2.)**2.
    r = 2*np.arcsin(np.sqrt(a))

    return r*ra2deg

In [4]:
header_data = pd.read_csv('../ref/mislike_image_header_table.csv')
catalog_filename = '../ref/catalog_nd_daostarfinder.parquet'
catalog_file = pq.ParquetFile(catalog_filename)

In [5]:
def make_target_table(objid_list,morphology=''):
    table = pd.DataFrame()
    for objid in objid_list:
        eclipse = int(str(objid)[-5:])
        this_star = pq.read_table(catalog_filename,filters =
                                  [('eclipse','=',eclipse),
                                   ('obj_id','=',objid)]).to_pandas()
        this_star['morphology'] = morphology
        table = pd.concat([table,this_star[["eclipse","obj_id","ra","dec","morphology"]]])
    return table

In [6]:
gfcat_corpus = pd.DataFrame()
path = "/Users/cm/GFCAT/gfcat/test_gifs/sorted/"
for d in ['flare','eclipse','trend','unk variable']:
    targets = []
    for fn in os.listdir(f"{path}{d}/"):
        try:
            targets+=[int(fn.split('-')[1])]
        except IndexError:
            continue
    table = make_target_table(targets,morphology=d.upper()[0])
    gfcat_corpus = pd.concat([gfcat_corpus,table])
print(f'{len(gfcat_corpus)} sources found in general search')

gfcat_targeted = pd.DataFrame()
path = "/Users/cm/GFCAT/gfcat/test_known_jpg/sorted/"
for d in ['flare','eclipse','trend','unk variable']:
    targets = []
    for fn in os.listdir(f"{path}{d}/"):
        try:
            targets+=[int(fn.split('-')[1])]
        except IndexError:
            continue
    table = make_target_table(targets,morphology=d.upper()[0])
    gfcat_targeted = pd.concat([gfcat_targeted,table])
print(f'{len(gfcat_targeted)} sources found in targeted search')

gfcat_fuv_matched = pd.DataFrame()
path = "/Users/cm/GFCAT/gfcat/test_fuv_matched/sorted/"
for d in ['flare','eclipse','trend','unk variable']:
    targets = []
    for fn in os.listdir(f"{path}{d}/"):
        try:
            targets+=[int(fn.split('-')[1])]
        except IndexError:
            continue
    table = make_target_table(targets,morphology=d.upper()[0])
    gfcat_fuv_matched = pd.concat([gfcat_fuv_matched,table])
print(f'{len(gfcat_fuv_matched)} sources found in matched FUV targets')



In [7]:
def make_variable_target_table(obj_ids):
    variable_table = pd.DataFrame()
    for obj_id in obj_ids:
        eclipse = int(str(obj_id)[-5:])
        #obj_id = int(objid)
        this_star = pq.read_table(catalog_filename,filters =
                                  [('eclipse','=',eclipse),
                                   ('obj_id','=',obj_id)]).to_pandas()
        expt = header_data.loc[header_data['ECLIPSE']==eclipse].loc[header_data['BAND']=='NUV']['EXPT_0']
        cps = this_star["aperture_sum_n_12_8"].tolist()[0]/expt.tolist()[0]
        cps_err = np.sqrt(this_star["aperture_sum_n_12_8"].tolist()[0])/expt.tolist()[0]
        mag = counts2mag(cps,'NUV')
        mag_err_1 = mag-counts2mag(cps+cps_err,'NUV')
        mag_err_2 = counts2mag(cps-cps_err,'NUV')-mag # this one is always larger
        this_star['cps'] = cps
        this_star['cps_err'] = cps_err
        this_star['NUVmag'] = mag
        this_star['NUVmag_err_1'] = mag_err_1
        this_star['NUVmag_err_2'] = mag_err_2
        variable_table = pd.concat([variable_table,
                                 this_star[
                                    ["obj_id","ra","dec","eclipse","NUVmag","NUVmag_err_1","NUVmag_err_2",]]])
    return variable_table


In [8]:
# define manually adjusted source positions
mpt = """06882 99999906882 310.03898  -0.86965 F 
15112 99999915112 173.68275  45.53918 F
29166 99999929166 355.72871   1.65753 F
29390 99999929390  10.92245 -42.34763 U
29643 99999929643  53.06506 -29.51007 U
43201 99999943201 217.15856  57.75332 U
03858 99999903858 135.96049  52.35710 F
04028 99999904028 189.00277  62.49956 F
07940 99999907940   6.43415  17.08139 F
09709 99999909709 164.46273  -3.76613 U
13533 99999913533  16.65372  45.38989 F
13656 99999913656  24.76303 -17.94913 F
16497 99999916497 219.53099  33.10083 F
18081 99999918081 358.46939 -10.54433 F
18380 99999918380   7.70821 -71.84528 F
18753 99999918753  51.26207 -17.21407 F
23505 99999923505  13.11492   0.09855 F
26488 99999926488 212.80185   6.83090 F
28619 99999928619 326.15289   0.70565 F
30122 99999930122  53.00912 -27.04739 F
30668 99999930668 133.73297   2.57409 F
31274 99999931274 120.70829  36.45803 F
36705 99999936705 146.54298  15.54300 F
40521 99999940521  29.19705  -0.35408 F
40930 99999940930  70.63517  18.43965 F
42499 99999942499 165.10157  12.73207 F
43238 99999943238 243.41123  54.66503 F
43828 99999943828 338.34099 -60.93026 F
44684 99999944684 308.13204   7.97132 F
45629 99999945629 160.64052  76.02885 F"""

gfcat_adjusted = pd.DataFrame(
    {'eclipse':np.array(np.array([t.split() for t in mpt.split('\n')])[:,0],dtype='int'),
     'obj_id':np.array(np.array([t.split() for t in mpt.split('\n')])[:,1],dtype='int64'),
     'ra':np.array(np.array([t.split() for t in mpt.split('\n')])[:,2],dtype='float'),
     'dec':np.array(np.array([t.split() for t in mpt.split('\n')])[:,3],dtype='float'),
     'morphology':np.array(np.array([t.split() for t in mpt.split('\n')])[:,4],dtype='str')})

In [9]:
# compile both targeted and untargeted sources
#gfcat_draft = pd.concat([make_variable_target_table(gfcat_corpus['obj_id'].values),
#                         make_variable_target_table(gfcat_targeted['obj_id'].values),
#                         make_variable_target_table(gfcat_fuv_matched['obj_id'].values),
#                         gfcat_adjusted])
#print(len(gfcat_draft))

In [31]:
gfcat_old_finds = pd.read_csv('missing_gfcat_visits_221217.csv',index_col=None)
gfcat_old_finds.iloc[-12:]

Unnamed: 0,eclipse,obj_id,ra,dec,morphology
823,15133,99999920000.0,204.17073,51.9146,E
824,4276,99999900000.0,149.84515,2.96435,F
825,24237,99999920000.0,358.90042,0.69566,F
826,21750,99999920000.0,202.73628,65.71721,U
827,9939,99999910000.0,215.08597,52.26377,E
828,7471,99999910000.0,346.70626,-0.61413,T
829,7545,99999910000.0,21.95165,-28.89494,F
830,30574,99999930000.0,127.88794,32.46371,U
831,28449,99999930000.0,351.76689,7.3603,T
832,15251,99999920000.0,194.85851,27.35632,U


In [32]:
gfcat_combined = pd.concat([gfcat_corpus,gfcat_targeted,gfcat_adjusted,gfcat_fuv_matched,
                            gfcat_old_finds.iloc[-12:]]) # just the repointed ones
print(f"Starting with {len(gfcat_combined)} identified variable source-visits")
gfcat_combined.to_csv('/Users/cm/Downloads/temp.csv')

In [33]:
un,ix=np.unique(gfcat_combined['obj_id'],return_index=True)
gfcat_visits = gfcat_combined.iloc[ix].reset_index()
gfcat_visits

Unnamed: 0,index,eclipse,obj_id,ra,dec,morphology
0,0,26808,2.072681e+07,185.750875,29.544538,T
1,0,21378,3.912138e+07,204.877337,30.654018,U
2,0,26045,4.102604e+07,149.468637,2.338955,U
3,0,23102,4.272310e+07,257.253864,58.873073,F
4,0,17070,4.461707e+07,247.108147,40.861124,F
...,...,...,...,...,...,...
1541,5,43201,9.999994e+10,217.158560,57.753320,U
1542,26,43238,9.999994e+10,243.411230,54.665030,F
1543,27,43828,9.999994e+10,338.340990,-60.930260,F
1544,28,44684,9.999994e+10,308.132040,7.971320,F


In [40]:
for e in np.unique(gfcat_combined.iloc[ix]['eclipse']):
    visits = gfcat_visits[gfcat_visits['eclipse']==e]
    # the following judgements to drop were made during manual reviews
    #if e in [1754,46775,2101,3094,4274,6042,6079,6435,6536,6745,
    #         7261,7315,7471,7541,8042,8226,8634,8645,8839,8845,9055,
    #         9427,9461,9756,9939,1038,10678,10726,14098,14102,15731,
    #         15883,16508,16530,16561,16642,16853,16940,17099,17126,
    #         17168,17228,17264,18124,18897,18905,18984,19013,19139,
    #         19579,19591,19618,21232,22111,22598,23828,26770,26808,
    #         27753,28248,28449,28445,28483,28638,28648,29072,29105,
    #         29126,29127,29166,29184,29643,30520,30521,30668,31178,
    #         31334,32012,32260,32294,32986,33020,33820,33994,33998,]:
    #    gfcat_visits.drop(visits.index[:],inplace=True)
    #if e in [7601,18674,19643,]:
    #    gfcat_visits.drop(visits.index[1],inplace=True)
    #if e in [16121,16654,16848,18620,24845]:
    #    gfcat_visits.drop(visits.index[0],inplace=True)
    #if e in [23279]:
    #    gfcat_visits.drop(visits.index[1:],inplace=True)
    if len(visits)==2:
        d = angularSeparation(visits['ra'].values.mean(),
                              visits['dec'].values.mean(),
                              visits['ra'].values,visits['dec'].values)
        if e==4276: # repointed and then both bands were very non-linear
            gfcat_visits.drop(visits.index[:],inplace=True)
        if e==7784:
            gfcat_visits.drop(visits.index[0],inplace=True)
        if e==21750: # drop the repointed one
            gfcat_visits.drop(visits.index[1],inplace=True)
        if e==27109:
            pass # This is an actual binary system in which both sources are resolvable and flare!
        if e==46828: # keep the better one
            gfcat_visits.drop(visits.index[0],inplace=True)
        if all(d<12.5/60/60):
            adj_test = [obj_id.startswith('999999') for obj_id in visits['obj_id'].astype('str').values]
            if any(adj_test):
                # drop the non-adjusted one, which will always be the first one by index because
                # of how we constructed the table => ugly shortcut; don't be like me
                gfcat_visits.drop(visits.index[0],inplace=True)
            elif e==9625:
                gfcat_visits.drop(visits.index[0],inplace=True)
            elif e==19732:
                gfcat_visits.drop(visits.index[0],inplace=True)
            elif e==22274:
                gfcat_visits.drop(visits.index[0],inplace=True)
            elif e==27109:
                pass # two flaring stars in the same visit right next to each other!
            elif e==35599:
                gfcat_visits.drop(visits.index[1],inplace=True)
            else:
                print(visits)
                #raise
# This results in 5 probably duplicates. I'm going to run these all the way through QA plot generation
# and then screen them manually. There are probably also other sources with unique obj_id that are basically
# the same source, and those will also be obvious during QA and I can filter them as well.

# Now add back the variable sources found in previous iterations of the project for completeness
gfcat_visits = pd.concat([gfcat_visits,gfcat_old_finds.iloc[:-12]])

In [41]:
gfcat_visits

Unnamed: 0,index,eclipse,obj_id,ra,dec,morphology
0,0.0,26808,20726808.0,185.750875,29.544538,T
1,0.0,21378,39121378.0,204.877337,30.654018,U
2,0.0,26045,41026045.0,149.468637,2.338955,U
3,0.0,23102,42723102.0,257.253864,58.873073,F
4,0.0,17070,44617070.0,247.108147,40.861124,F
...,...,...,...,...,...,...
818,,46807,,183.176317,-26.926904,U
819,,46818,,184.069175,-30.364380,U
820,,46828,,183.710681,-32.431776,F
821,,46828,,183.706376,-32.431746,F


In [42]:
gfcat_visits[['eclipse','obj_id','ra','dec','morphology']].to_csv(
    'gfcat_visit_table_positions.csv',index=None)

In [67]:
for e in np.unique(gfcat_visits['eclipse']):
    f = gfcat_visits[gfcat_visits['eclipse']==e]
    if len(f)>1:
        d = angularSeparation(f['ra'].values.mean(),f['dec'].mean(),
                              f['ra'].values,f['dec'].values)
        if any(d<17.5/60/60):
            print(d)
            print(f)
            print()