# Examining SN hubble residuals VS Host Galaxy Properties

#### Goal:
This notebook will (eventually) fit and standardize simulated supernova observations using the Snoopy package to determine variations in the Hubble residual with host galaxy properties. 

#### Current Issues:
- Light curves are currently being fit with DES filters, not LSST filters
- Simulated light curves do not have error values. We are using 1% of the principal value as a placeholder
- Due to an issue with the simulated catalogs, redshifts values cannot be matched to each supernova. We use a random value to allow for continued development while the issue is addressed.

## Setup

Using the `GCRCatalogs` package we create catalog reader instances to access the necessary catalogs. For clarity we provide a few brief summaries:

- **Cosmo DC2:** Contains host galaxy properties used as inputs to simulated images
- **Truth Variable Summary Catalog:** Specifies metadata (type, object id, host id) of variable objects
- **Truth Variable Light Curve Catalog:** Contains the "observed" light curves including MJD and magnitudes
- **Truth Static Catalog:** A snapshot in time of the Truth Variable Light Curve Catalog containing additional target data (Ra, Dec, redshift, etc.)

In [None]:
import os
from glob import glob
from random import random

import matplotlib.pyplot as plt
import numpy as np
#import snpy
from astropy.table import Table
from IPython.display import clear_output
from matplotlib import pyplot as plt

import GCRCatalogs


In [None]:
# Catalog readers
cosmo_gc = GCRCatalogs.load_catalog('cosmoDC2_v1.1.4_small')
truth_variable_summary_gc = GCRCatalogs.load_catalog('dc2_truth_run1.2_variable_summary')
truth_variable_gc = GCRCatalogs.load_catalog('dc2_truth_run1.2_variable_lightcurve')
truth_static_gc = GCRCatalogs.load_catalog('dc2_truth_run1.2_static')

# To store intermediate files
snoopy_dir = './snoopy_data'


## Creating Snoopy Inputs

Snoopy requires lightcurves to be specified as a series of input files. We create those files here.

In [None]:
def get_supernovae_data():
    """Return supernovae data from the Truth Variable Summary Catalog
    
    Returns:
        A dictionary of arrays with keys 'galaxy_id', 'uniqueId', 'ra', and 'dec'
    """
    
    supernovae = truth_variable_summary_gc.get_quantities(
        ['galaxy_id', 'uniqueId', 'ra', 'dec'], 
        native_filters=['sn == 1', 'galaxy_id != -1']
    )
    
    return supernovae


In [None]:
def write_snoopy_file(out_path, **kwargs):
    """Creates a snoopy input file out_path/name.txt
    
    Args:
        out_path         (str): Where to write the input file
        name             (str): The name of the target
        redshift       (float): The redshift of the target    
        ra             (float): The ra of the target
        dec            (float): The dec of the target
        <u, g, r, i, z> (dict): The date ('mjd') and magnitude ('mag')
    """
    
    file_text = f'{kwargs["name"]} {kwargs["redshift"]} {kwargs["ra"]} {kwargs["dec"]}\n'
    
    # Todo: Specify LSST filters including y
    band_names = {'u': 'u_s', 
                  'g': 'g_s',
                  'r': 'r_s',
                  'i': 'i_s',
                  'z': 'z_s'}
    
    for band, name in band_names.items():
        if band not in kwargs:
            continue
            
        file_text += f'filter {name}\n'
        
        for mjd, mag in zip(kwargs[band]['mjd'], kwargs[band]['mag']):
            # Todo: Specify the actual error
            file_text += f'{mjd} {mag} {0.01 * mag}\n'
    
    with open(out_path, 'w') as ofile:
        ofile.write(file_text)
    

In [None]:
def create_snoopy_inputs(out_dir, supernovae):
    """Generate snoopy input files for supernova in the truth catalouge
    
    Args:
        out_dir     (str): The directory to write input files to
        supernovae (dict): Catalog data returned by get_supernovae_data
    
    Returns:
        A list of created file paths
    """

    os.makedirs(out_dir, exist_ok=True)
    meta_table_path = os.path.join(out_dir, 'meta_data.csv')
    return_paths_list = []
    meta_table = Table(names=['target', 'stellat_mass', 'sfr'])
    iter_data = zip(supernovae['ra'], supernovae['dec'], supernovae['uniqueId'], supernovae['galaxy_id'])
    for ra, dec, sn_id, gal_id in iter_data:
        
        # Todo: Specify actual redshift
        redshift = 3 * random()  # truth_static_gc.get_quantities(['redshift'], filters=[f'object_id == {gal_id}'])

        # Truth variable catalog is SQL based so individual queries go by quickly
        u = truth_variable_gc.get_quantities(['mjd', 'mag'], filters=['filter == 0'], native_filters=['sn == 1', f'uniqueId == {sn_id}'])
        g = truth_variable_gc.get_quantities(['mjd', 'mag'], filters=['filter == 1'], native_filters=['sn == 1', f'uniqueId == {sn_id}'])
        r = truth_variable_gc.get_quantities(['mjd', 'mag'], filters=['filter == 2'], native_filters=['sn == 1', f'uniqueId == {sn_id}'])
        i = truth_variable_gc.get_quantities(['mjd', 'mag'], filters=['filter == 3'], native_filters=['sn == 1', f'uniqueId == {sn_id}'])
        z = truth_variable_gc.get_quantities(['mjd', 'mag'], filters=['filter == 4'], native_filters=['sn == 1', f'uniqueId == {sn_id}'])
        y = truth_variable_gc.get_quantities(['mjd', 'mag'], filters=['filter == 5'], native_filters=['sn == 1', f'uniqueId == {sn_id}'])

        # Skip SN with no observations
        bands = (u, g, r, i , z, y)
        if sum((len(x['mjd']) for x in bands)) == 0:
            # print(f'No Observations: {sn_id}\n')  
            continue

        #meta_data = cosmo_gc.get_quantities(['baseDC2/obs_sfr', 'stellar_mass'], filters=[f'galaxy_id == {gal_id}'])
        # meta_table.add_row(... meta_data ...)
        
        out_path = os.path.join(out_dir, f'{sn_id}.txt')
        write_snoopy_file(
            out_path,
            u=u,
            g=g,
            r=r,
            i=i,
            z=z,
            name=sn_id,
            redshift=redshift,
            ra=ra,
            dec=dec
        )
        
        return_paths_list.append(out_path)
    
    return return_paths_list


In [None]:
# Get supernova data from the truth variable summary catalog
supernovae = get_supernovae_data()

# Keep small subset of data for testing
for key, array in supernovae.items():
    supernovae[key] = array[0: 5]

# Generate input files for snoopy
snoopy_paths = create_snoopy_inputs(snoopy_dir, supernovae)
print(f'{len(snoopy_paths)} input files created')


## Curve Fitting

Having saved the light curves as individual files, we run snoopy for each light curve.


In [None]:
dist_mod = []
redshift = []

for path in snoopy_paths:
    try:
        s = snpy.get_sn(file)
        s.choose_model('max_model')
        s.fit()
        
    except Error as e :
        print('error', e, file)
        continue
    
    dist_mod.append(s.get_distmod(cosmo='LambdaCDM'))
    with open(path) as f:
        redshift_this = float(f.readline().split(' ')[1])
        redshift.append(redshift_this)


In [None]:
plt.semilogy(z, d_mod, '.')
plt.xlabel('redshift')
plt.ylabel(r'$\nu$')
