# Time Correlation Filter Function

The Purpose of this Notebook will be to to plan and test a time correlation function that will filter sources depending on how well its opitcal and radio data lines up timewise.

In [1]:
#here are the necessary imports
import os
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from io import StringIO
from vasttools.pipeline import Pipeline
from vasttools.query import Query
import Projecttools as pro #brand new module for frequently used code!

%matplotlib inline

In [2]:
cms = pd.read_pickle('Fink_2020_sources_matched_to_VAST_all_sources.pickle')
pro.family_sort(cms)
cms.groupby('family').size().sort_values(ascending=False)

family
AGN                827
Unknown            516
Galaxy             167
Solar System        81
Radio               70
Supernova           51
Multiwavelength     39
Star                21
dtype: int64

In [3]:
#This will automatically find the base directory that needed to be specified
pipe=Pipeline()
#this way, we can also load specific runs from the VAST pipeline:
my_run=pipe.load_run('tiles_corrected')



In [4]:
#Im just putting the eta and v threshholds because the eta-v analysis takes an actual eternity to complete and I already
#have the values here:
eta_thresh=2.315552652171963
v_thresh=0.2878888414273631

In [5]:
cms_candidates = pro.eta_v_candidate_filter(cms,my_run,eta_thresh,v_thresh)
cms_candidates.groupby('family').size().sort_values(ascending=False)

There are 213 candidate sources:


family
AGN                93
Unknown            53
Solar System       30
Galaxy             15
Radio               9
Star                5
Multiwavelength     4
Supernova           4
dtype: int64

I will be testing out this function on the ETA-V filtered sources. In order for this to work, I need to have both the radio and optical data available for each source. Since the FINK broker has a limit as to how many sources can be queried at a time, I will have to either take a random sample of sources from the filtered catalogue or apply the function to a sampling of each family seperately.

In [6]:
#These IDs are selected from the curated list of interesting sources (lightcurves can be seen via powerpoint.)
Special_IDS_1=['ZTF18abqsfre',
           'ZTF19acxyuza',
           'ZTF19aalpgfb',
           'ZTF20aakbllj',
           'ZTF19ablozzh',
           'ZTF18acyerqq',
           'ZTF19aaapnwa',
           'ZTF19aauczln',
           'ZTF19aarqoey',
           'ZTF18acnnguv',
           'ZTF18aczeoif',
           'ZTF19abbnhro']

In [7]:
cms_candidates_selection = cms_candidates.query('objectId == @Special_IDS_1')

#This reformats the index's in the candidate selection, so specific row ranges can be pulled out.
cms_candidates_selection = cms_candidates_selection.reset_index()
len(cms_candidates_selection)

12

In [8]:
#This selects rows in our selection in interger index steps from a starting and stopping point
#test.loc[start:stop:steps]

#This selects up to 30 rows at a time
candidate_sample = cms_candidates_selection.loc[0:11:1]
len(candidate_sample)

12

In [9]:
#defining column array for cutouts
cutouts=[
'b:cutoutScience_stampData',
'b:cutoutTemplate_stampData',
'b:cutoutDifference_stampData'
]

#this is the request made to the fink portal to pull out the info for each source
r = requests.post(
  'https://fink-portal.org/api/v1/objects',
  json={
    'objectId': 'ZTF18abqsfre', 
    'output-format': 'json',
    'withcutouts': 'True',
    'cols': ','.join(cutouts),
    'withupperlim': 'True' #important for lightcurve plotting
  }
)

In [10]:
#reads in json file data as DataFrame. fsd stands for 'FINK source data'
fsd=pd.read_json(StringIO(r.content.decode()))

In [120]:
#fsd object should contain the optical time data for the selected objects
fsd

Unnamed: 0,b:cutoutDifference_stampData,b:cutoutScience_stampData,b:cutoutTemplate_stampData,d:DR3Name,d:Plx,d:cdsxmatch,d:e_Plx,d:gcvs,d:mulens,d:nalerthist,...,v:rate(g-r),v:dg,v:rate(dg),v:dr,v:rate(dr),v:lastdate,v:firstdate,v:lapse,v:constellation,d:tag
0,"[[-5.9875321388, -6.7324624062, 3.4277901649, ...","[[90.1131591797, 95.1599273682, 82.206489563, ...","[[87.1571350098, 88.1126861572, 87.6664505005,...",Gaia DR3 2562630793080464000,0.0933,Seyfert_1,0.0902,Unknown,0.0,8.0,...,,-0.078635,-0.013218,0.0,0.0,2022-09-02 09:56:00.001,2019-06-28 10:41:05.997,1161.968681,Pisces,valid
1,,,,,,,,,,,...,,,,,,,,,,upperlim
2,,,,,,,,,,,...,,,,,,,,,,upperlim
3,,,,,,,,,,,...,,,,,,,,,,upperlim
4,"[[0.6013054848, 13.9731464386, -4.9625067711, ...","[[92.7742233276, 85.338180542, 98.8190383911, ...","[[93.5476150513, 96.9389190674, 94.7161712646,...",Gaia DR3 2562630793080464000,0.0933,Seyfert_1,0.0902,Unknown,0.0,10.0,...,-0.023308,0.032750,0.005426,0.0,0.0,2022-08-27 11:09:26.997,2019-06-28 10:41:05.997,1156.019688,Pisces,valid
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,,,,,,,,,,,...,,,,,,,,,,badquality
137,,,,,,,,,,,...,,,,,,,,,,badquality
138,,,,,,,,,,,...,,,,,,,,,,badquality
139,,,,,,,,,,,...,,,,,,,,,,badquality


In [124]:
#vsd stands for 'vast source data'
vsd=[]
for x in Special_IDS_1:
    y=cms_candidates_selection[cms_candidates_selection['objectId'] == x]['matched_id'].astype(int).values[0]
    z=my_run.get_source(y).measurements
    vsd.append(z)

As you can see below, the data is there, I just need to format it correctly.

In [125]:
vsd

[    source                       island_id  \
 0  3187458          RACS_0126+00A_island_1   
 1  3187458   SB9667_VAST_0126+00A_island_1   
 2  3187458  SB10323_VAST_0102+00A_island_5   
 3  3187458                SB11185_island_5   
 4  3187458                SB11275_island_2   
 5  3187458                SB11445_island_2   
 6  3187458                SB11517_island_2   
 7  3187458  SB14867_VAST_0126+00A_island_1   
 8  3187458                SB15640_island_6   
 
                          component_id  local_rms         ra        ra_err  \
 0          RACS_0126+00A_component_1a     14.866  19.577106  2.045875e-05   
 1   SB9667_VAST_0126+00A_component_1a      1.459  19.577015  1.883213e-06   
 2  SB10323_VAST_0102+00A_component_5b      3.638  19.575748  3.168933e-05   
 3                SB11185_component_5b      3.296  19.575937  1.529328e-05   
 4                SB11275_component_2a      1.414  19.576441  1.696701e-06   
 5                SB11445_component_2a      1.230  19.577040

In [29]:
#simillarly, vsd should have the radio time data for each object
#vsd=my_run.get_source(cms_candidates_selection[cms_candidates_selection['objectId'] == Special_IDS_1]['matched_id'].astype(int).values[0])

In [73]:
#for i in vsi:
#    z=vsd.filter(vsd.source == i)   