In [1]:
import os
import glob

import DESutils
import GPRutils
import vK2KGPR
import plotGPR
import vK2KGPR
import vonkarmanFT as vk

import numpy as np
import astropy.units as u
import astropy.table as tb
from astropy.time import Time
import matplotlib.pyplot as plt
# plt.style.use('~/GitHub/custom-matplotlib/custom.mplstyle')

from IPython import embed
from importlib import reload
import warnings
warnings.filterwarnings("ignore")

Created TAP+ (v1.2.1) - Connection:
	Host: gea.esac.esa.int
	Use HTTPS: True
	Port: 443
	SSL Port: 443
Created TAP+ (v1.2.1) - Connection:
	Host: geadata.esac.esa.int
	Use HTTPS: True
	Port: 443
	SSL Port: 443


# This notebook will run through how to solve for, save, and load GPR solutions for a DES exposure. All functions used here have been commented if you want to know more.

You should open runGPR.py and follow along. That script is the script you can use to submit jobs easily with folio and it does all of these steps for you. Even though that script exists, this tutorial still might be useful so that you can have more of a guded tour of the important parts of the code.

NB: Some lines of code will be commented out because they take a while to run.

In [2]:
# First thing is first, we need to know what exposure numbers there are.

# This function will create a .pkl file for the band dictionary and completed exposures list for a particular zone.
# This array and dictionary takes a while to create so we store them as pkl files.
# runGPR.py will use this function when you use the --zone command line argument.

completed_exposures, band_dictionary = DESutils.createBandDict("/data3/garyb/tno/y6/zone134")
completed_exposures, band_dictionary

([244218,
  248717,
  348819,
  355302,
  355303,
  361577,
  361580,
  361582,
  362365,
  362366,
  364209,
  364210,
  364211,
  364213,
  364214,
  364215,
  365879,
  367482,
  367483,
  367484,
  367488,
  369801,
  369802,
  369803,
  369804,
  370199,
  370200,
  370204,
  370600,
  370601,
  370602,
  370609,
  371367,
  371368,
  371369,
  371649,
  372006,
  372060,
  372063,
  372064,
  372437,
  372522,
  372869,
  373245,
  374797,
  474260,
  474261,
  474262,
  474263,
  474264,
  474265,
  476846,
  476847,
  477204,
  477205,
  478378,
  478379,
  479272,
  479308,
  484481,
  484482,
  484483,
  484490,
  484491,
  484499,
  573396,
  573398,
  573796,
  576861,
  576862,
  576863,
  576864,
  576865,
  576866,
  579815,
  579816,
  586534,
  592152,
  674340,
  675158,
  675159,
  675645,
  676791,
  676792,
  676799,
  676800,
  676801,
  680497,
  681166,
  686427,
  686457,
  686459,
  689611,
  689612,
  689613,
  691478,
  696547,
  696552,
  784503,
  788112,


In [3]:
# Create a dataContainer object.
dataC = GPRutils.dataContainer()

In [4]:
# Load data for a particular DES exposure. See docs for all args and kwargs.
dataC.load(
    244218,  # Exposure number
    maxDESErr=250*u.mas**2,  # The max ERRAWIN_WORLD value we used for the paper
    useRMS=True,  # Replace ERRAWIN_WORLD values with empirically calculated RMS values
)

# Crucially, this sets up the dataC.TV and dataC.Pred tables that will be used to store all of the sigma-clipping masks and 
# fbar_s values, etc.

DES Exposure : 244218
DES Passband : Y
DES nSources : 59264
INFO: Query finished. [astroquery.utils.tap.core]
Maximum allowed stars is 15000, but
    there were 11384 matches within 0.5 arcsec.


In [5]:
# Next we can initialize a vonKarman2KernelGPR object which has all of "science" functions.
GP = vK2KGPR.vonKarman2KernelGPR(
    dataC,  # The dataContainer class. This has all of the data so this object needs it.
    printing=True,  # Whether or not you want to print to stdout each step of optimization. This does not have to be True if you want each step to be written to a file.
    outDir="./",  # The existing directory you want to generate a file to store the steps of the optimization. This outfile will be important for generating statistics on time of optmization, number of steps, etc.
    curl=True  # Use the curl-free approach to GPR
)

# This step will initialize the outfile. 

In [6]:
# This finds the fitted kernel parameters as dataC.fitCorrParams

# GP.fitCorr()

In [7]:
# Here we Jackknife the dataset to find fbar_s values with the fitted parameters.
# It's important that fC=True because this function needs to access and generate different columns
# in the dataC.TV table at different parts of ths algorithm

# dataC.JackKnife(GP, dataC.fitCorrParams, fC=True)

In [8]:
# The dual optimzation scheme using L_BFGS_B optimization was never formally written into vK2KGPR.py.
# See runGPR.py starting at line 230.

In [9]:
# Here we Jackknife the dataset to find fbar_s values with the final parameters.

# dataC.JackKnife(GP, dataC.params)

In [10]:
# This function will save all of the attributes and tables into a FITS file for you at the
# designated directory. The name conventions of the FITS file are automatically handled.
# The filenames will look like 
# GPR.{EXPNUM}.{BAND}RC.fits
# If useRMS is False then the R will not be there.
# If curl is false then the C will not be there.
# The naming scheme here is the exact same for the outfile above except the extension is .out instead of .fits.

# The directory used here should be the same on used above in the initialization of the vonKarman2KernelGPR object.

# dataC.saveFITS(".")

In [11]:
# The last thngs the runGPR.py script does is save the time of each step and print them to the outfile.

# One key take away

When I have written code that analyzes the data, I have assumed that the FITS file and the out file will always be in the same directory, so don't get rid of that out file.

In [1]:
# The last thing to do is load the FITS file that we just saved.
# When we load it, all attributes and tables should be exactly the same as they were when 
# they were initially created. So if you want to access the table of training+validation set data
# (or the Prediction set data), you can just do dataC.TV (or dataC.Pred) as before.

# dataC = GPRutils.loadFITS("./GPR.244218.YRC.fits")

In [2]:
# One last note. When you use this code, the dataC.TV and dataC.Pred tables will
# have columns for RA and Dec (these are the NEW_RA and NEW_DEC columns from Pedro's
# _final.fits files.). These columns will not be present in the data that has already
# been run. This was a change I made afterwards.