# FINK Portal Database Query for Crossmatched Sources

Since I've been able to download the full FINK data from the crossmatched catalogue using batching before, I no longer need to query it individually within each notebook. To save on space and having to redo it for each notebook, I've created this notebook to house the code necessary to query the portal and save the resulting dataframe as a pickle file to be imported into the other notebooks.

In [2]:
#here are the necessary imports
import os
import sys
import gc
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from io import StringIO
from vasttools.pipeline import Pipeline
from vasttools.query import Query
import Projecttools as pro #brand new module for frequently used code!

%matplotlib inline

In [3]:
#This reads in the crossmatch catalogue
cms = pd.read_pickle('Fink_2020_sources_matched_to_VAST_all_sources.pickle')

#These are the FINK IDs that are selected from the catalogue.
Idlist=cms['objectId'][10:20].reset_index()
Idlist.drop('index', inplace=True, axis=1)
Idlist=Idlist['objectId']

In [22]:
num_elem=len(Idlist)#length of id list
num_chunks=num_elem//30+1 #number of chunks, based on how you want to divide them up. in this case, 30 IDS per chunk
list_chunks=(np.array_split(np.arange(num_elem), num_chunks))#np.arange(num_elem) makes an ordered array, from 0 to (num_elem - 1).
                                                            #np.array_split splits said ordered array according to the number of chunks specified by num_chunks
                                                            #each chunk is an element in the array 'list_chunks'
for i in list(range(len(list_chunks))):
    list_chunks[i]=list_chunks[i].tolist()

#defining column array for cutouts
cutouts=[
'b:cutoutScience_stampData',
'b:cutoutTemplate_stampData',
'b:cutoutDifference_stampData'
]

for chunk_idx in list_chunks: #for each chunk in list_chunks
    start,end=chunk_idx[0],chunk_idx[-1]+1 #define the starting and ending indexes for the given chunk

    #this is the request made to the fink portal to pull out the info for each source
    r = requests.post(
        'https://fink-portal.org/api/v1/objects',
        json={
        'objectId': ','.join(Idlist[start:end].to_list()), #This is where the 'chunk_idx[-1] +1' comes into play. the 'end' variable when slicing the list is inclusive of the index.
        'output-format': 'json',
        'withcutouts': 'True',
        'cols': ','.join(cutouts),
        'withupperlim': 'True' #important for lightcurve plotting
        }
    )
    df_tmp=pd.read_json(StringIO(r.content.decode()))#define a temporary dataframe that holds the queried sources from the chunk
    
    #saves the temporary dataframe to a folder as a .pkl file. the naming is based on which batch we're looking at
    df_tmp.to_pickle('/home/jovyan/work/Project_VAST_FINK/FINK_Batches_2/Batch_{}.pkl'.format(list_chunks.index(chunk_idx)+1))
    
    #clears memory from jupyter to help it not get stuck.
    gc.collect()
    
list_df=[] #empty array to hold fink sources.

#now, we're loading back in all the batches we saved and appending/concatonating them all back together into one dataframe: fsd
for chunk_idx in list_chunks:
    df_tmp=pd.read_pickle('/home/jovyan/work/Project_VAST_FINK/FINK_Batches_2/Batch_{}.pkl'.format(list_chunks.index(chunk_idx)+1+46))
    list_df.append(df_tmp)
#fsd=pd.concat(list_df)

#fsd.to_pickle('/home/jovyan/work/Project_VAST_FINK/FINK_Batches_2/FSD_Partial.pkl')

ValueError: Expected object or value

In [None]:
list_df=[] #empty array to hold fink sources.

x=np.arange(1,11)
#now, we're loading back in all the batches we saved and appending/concatonating them all back together into one dataframe: fsd
for i in x:
    df_tmp=pd.read_pickle('/home/jovyan/work/Project_VAST_FINK/FINK_Batches/Batch_{}.pkl'.format(i))
    list_df.append(df_tmp)
    gc.collect()

fsd=pd.concat(list_df)