In [1]:
import sys
sys.path.insert(0, '/global/common/software/lsst/common/miniconda/current/lib/python3.6/site-packages')

In [2]:
#If running in Colab, to switch to GPU, go to the menu and select Runtime -> Change runtime type -> Hardware accelerator -> GPU.

#In addition, uncomment and run the following code:
# !pip install pzflow

#Intro to pzflow

#This notebook demonstrates building a normalizing flow with pzflow to learn the joint probability distribution of some 2-D data.#

#You do not need to have any previous knowledge of normalizing flows to get started with pzflow, however if you are interested, here are some good sources:

#    Eric Jang's tutorial: part 1, part 2
#    Here is a comprehensive list of papers, blogs, videos, and packages
#    Two good intro papers using Coupling Layers: NICE, Real NVP
#    The paper on Neural Spline Couplings

import jax.numpy as np
import matplotlib.pyplot as plt

import pzflow
from pzflow import Flow
from pzflow.bijectors import Chain, ColorTransform, InvSoftplus, StandardScaler, RollingSplineCoupling
#from pzflow.examples import galaxy_data
from pzflow.distributions import Uniform, Joint, Normal
import pandas as pd
import GCRCatalogs
import timeit
import numpy as onp

import seaborn as sns
#change the plot styling
sns.set_context("talk",font_scale=1.5)

sns.set_style('white', {'axes.linewidth': 0.5})
plt.rcParams['xtick.major.size'] = 15
plt.rcParams['ytick.major.size'] = 15

plt.rcParams['xtick.minor.size'] = 10
plt.rcParams['ytick.minor.size'] = 10
plt.rcParams['xtick.minor.width'] = 2
plt.rcParams['ytick.minor.width'] = 2

plt.rcParams['xtick.major.width'] = 2
plt.rcParams['ytick.major.width'] = 2
plt.rcParams['xtick.bottom'] = True
plt.rcParams['xtick.top'] = True
plt.rcParams['ytick.left'] = True
plt.rcParams['ytick.right'] = True

plt.rcParams['xtick.minor.visible'] = True
plt.rcParams['ytick.minor.visible'] = True
plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'

plt.rcParams.update({
    "text.usetex": False,
    "font.family": "sans-serif",
    "font.sans-serif": ["Helvetica"]})
## for Palatino and other serif fonts use:
plt.rcParams.update({
    "text.usetex": False,
    "font.family": "serif",
    "font.serif": ["Palatino"],
})


%matplotlib inline



In [3]:
import sys
sys.path.insert(0, '/global/common/software/lsst/common/miniconda/current/lib/python3.6/site-packages')

In [4]:
#merge Kpc sets 
set1 = pd.read_csv("/global/cscratch1/sd/agaglian/DC2full_pzRedshifts_tenHealpix_sdss_updMag_Rkpc.tar.gz")
set2 = pd.read_csv("/global/cscratch1/sd/agaglian/DC2full_pzRedshifts_tenHealpix_sdss_updMag_Rkpc_secondSet.tar.gz")

In [10]:
set_comb = pd.concat([set1, set2])
set_comb.to_csv("/global/cscratch1/sd/agaglian/DC2full_pzRedshifts_twentyHealpix_sdss_updMag_Rkpc_Final.tar.gz",index=False)

In [12]:
set_comb

Unnamed: 0,galaxy_id,DC2redshift,PZflowredshift,Mag_true_g_sdss_z0,Mag_true_r_sdss_z0,Mag_true_i_sdss_z0,Mag_true_z_sdss_z0,R_kpc
0,8751222988,0.758340,0.804479,-17.489836,-17.634062,-17.702017,-17.734333,9.460621
1,8752517690,1.014355,1.036861,-18.488428,-18.759039,-18.960004,-19.081531,19.197772
2,8757406419,1.796789,1.827007,-19.581698,-19.709780,-19.808768,-19.871696,18.690062
3,8766076619,2.438104,2.515672,-20.711130,-20.837253,-20.932871,-21.001268,53.626015
4,8763087211,2.058508,2.194695,-20.823303,-20.905174,-20.965090,-20.997666,44.047350
...,...,...,...,...,...,...,...,...
24548277,6138061063,2.027011,2.001752,-18.355928,-18.362360,-18.378444,-18.380008,60.727286
24548278,6129737713,1.323697,1.364383,-17.492582,-17.377902,-17.330597,-17.308857,
24548279,6129777172,1.422939,0.881403,-18.550947,-18.685375,-18.797890,-18.869108,8.714039
24548280,6130584111,1.528024,1.530636,-18.657193,-18.735358,-18.779700,-18.797636,


In [None]:
# Implement a magnitude cut where LSST will never see a galaxy for each of the chunks of data 
df_set = []
#for s in [10000, 10001, 10002, 10003, 10003, 10004, 10005, 10006, 10007, 10008, 10009]:
for s in [9556, 9557, 9558, 9559, 9560, 9683, 9683, 10068, 10069, 10070, 10071, 10072, 10195, 10196, 10197, 10198, 10199]:    
    file1 = pd.read_csv("/global/cscratch1/sd/agaglian/additional_healpixels/DC2full_pzRedshifts_div1000_%i.csv"%s)
    file1_cut = file1[file1['mag_true_r_lsst'] < 28]
    df_set.append(file1_cut)
    file1_cut.to_csv("/global/cscratch1/sd/agaglian/additional_healpixels/DC2full_pzRedshifts_div1000_%i_cut.tar.gz"%s,index=False)

In [None]:
# Implement a magnitude cut where LSST will never see a galaxy for each of the chunks of data 
df_set = []
for s in [10000, 10001, 10002, 10003, 10003, 10004, 10005, 10006, 10007, 10008, 10009]:
    file1 = pd.read_csv("/global/cscratch1/sd/agaglian/pzflow_oversampled/DC2full_pzRedshifts_1itertest_div%i_cut.tar.gz"%s)
    #file1_cut = file1[file1['mag_true_r_lsst'] < 28]
    df_set.append(file1)

In [None]:
# Combine chunks into one dataframe
df_comb = pd.concat(df_set, ignore_index=True)

In [None]:
# Make cuts at 0.05 > z > 3.0 to make sure we remove the edge artifacts
df_comb = df_comb[df_comb['PZflowredshift'] > 0.05]
df_comb = df_comb[df_comb['PZflowredshift'] < 3.0]

In [None]:
# Plot to see what the colors sv pzflowredshifts look like 
%matplotlib inline 
plt.figure(figsize=(10,7))
plt.plot(df_comb['PZflowredshift'], df_comb['mag_true_g_lsst'] - df_comb['mag_true_i_lsst'], 'o', ms=0.05, alpha=0.1)
plt.xlabel("PZFlowRedshift")
plt.axvline(x=0.05, c='tab:red')
plt.axvline(x=3.0, c='tab:red')
plt.ylabel(r"$g-i$")

In [None]:
# And SFR (which before we were sampling with PZFlow, but are no longer)
%matplotlib inline 
plt.figure(figsize=(10,7))
plt.plot(df_comb['PZflowredshift'], df_comb['SFRtot'], 'o', ms=0.05, alpha=0.1)
plt.xlabel("PZFlowRedshift")
plt.ylim((0, 100))
plt.ylabel(r"SFRtot")

In [None]:
# And a comparison to the original (discrete) SFR vs z plot
%matplotlib inline 
plt.figure(figsize=(10,7))
plt.plot(df_comb['DC2redshift'], df_comb['SFRtot'], 'o', ms=0.05, alpha=0.1)
plt.xlabel("DC2Redshift")
plt.ylim((0, 100))
plt.ylabel(r"SFRtot")

In [None]:
#looks good, save it! 
df_comb.to_csv("/global/cscratch1/sd/agaglian/DC2full_pzRedshifts_tenHealpix_secondSet_Final.tar.gz",index=False)

In [None]:
df_comb = pd.read_csv("/global/cscratch1/sd/agaglian/DC2full_pzRedshifts_secondSet_tenHealpix_Final.tar.gz")

In [None]:
#load the original dataframe and get the absolute magnitudes in SDSS filters from cosmoDC2 (we were using LSST apparent magnitudes for pzflow sampling)
df_comb = pd.read_csv("/global/cscratch1/sd/agaglian/DC2full_pzRedshifts_tenHealpix_secondSet_Final.tar.gz",usecols=['galaxy_id', 'PZflowredshift'])
cosmo=GCRCatalogs.load_catalog("cosmoDC2_v1.1.4")
features = ['galaxy_id', 'Mag_true_g_sdss_z0', 'Mag_true_r_sdss_z0', 'Mag_true_i_sdss_z0', 'Mag_true_z_sdss_z0']
gal = cosmo.get_quantities(features, filters=[(lambda x: np.in1d(x, df_comb['galaxy_id'].values), 'galaxy_id')])#, return_iterator=True)
df_sdss = pd.DataFrame(gal)
df_sdss_comb = df_sdss.merge(df_comb, on='galaxy_id', ignore_index=True)
df_sdss_comb.to_csv("/global/cscratch1/sd/agaglian/DC2full_pzRedshifts_tenHealpix_secondSet_Final_sdss.tar.gz",index=False)

In [None]:
# Load our df of SDSS photometry
df_sdss_comb = pd.read_csv("/global/cscratch1/sd/agaglian/DC2full_pzRedshifts_tenHealpix_secondSet_Final_sdss.tar.gz")

In [None]:
# Look at the % change in redshift w PZFlow
plt.figure(figsize=(10,7))
plt.plot(df_sdss_comb['DC2redshift'], (df_sdss_comb['PZflowredshift'] - df_sdss_comb['DC2redshift']), 'o', ms=0.03, alpha=0.5);
plt.xlabel(r"$z_{\rm DC2}$")
plt.ylabel(r"$(z_{\rm PZFlow} - z_{DC2})$")
plt.ylim((-1, 1))
#plt.yscale("log")

In [None]:
#calculate all luminosity distances, old and new
import astropy.units as u
from astropy.cosmology import FlatLambdaCDM
cosmo = FlatLambdaCDM(H0=70, Om0=0.3, Tcmb0=2.725)
dLum_old = cosmo.luminosity_distance(df_sdss_comb['DC2redshift']).to(u.pc).value
dLum_new = cosmo.luminosity_distance(df_sdss_comb['PZflowredshift']).to(u.pc).value

In [None]:
# Check out how our luminosity distance changes between PZFlow redshift and cosmoDC2 redshift
plt.figure(figsize=(10,7))
plt.hist(np.abs(dLum_old - dLum_new)/dLum_old*100, bins=np.logspace(-6, 5, 50))
plt.xlabel(r"$(D_{\rm New} - D_{\rm Old}) / D_{\rm Old} (\%)$")
plt.ylabel(r"$N$")
plt.xscale("log")
plt.yscale("log")

In [None]:
# Look at the % change in redshift w PZFlow
plt.figure(figsize=(10,7))
plt.hist((df_sdss_comb['PZflowredshift'] - df_sdss_comb['DC2redshift'])/df_sdss_comb['DC2redshift']*100, bins=np.linspace(-100, 100));
plt.xlabel("% Redshift Change")
plt.ylabel(r"$N$")
plt.yscale("log")

In [None]:
# Update the absolute SDSS magnitudes for matching with the GHOST catalog 
for band in ['g', 'r', 'i', 'z']:
    #Mv = m - 2.5 log[ (d/10)2 ]. 
    
    mapp = df_sdss_comb['Mag_true_%s_sdss_z0'%band] + 2.5 * np.log10((dLum_old/10)**2)
    
    #now convert back with the NEW redshift
    Mtrue = mapp - 2.5 * np.log10((dLum_new/10)**2)
    
    #update the values in the dataframe
    df_sdss_comb['Mag_true_%s_sdss_z0_upd'%band] = Mtrue

In [None]:
# See how the magnitudes differ after updating
# actually by quite a bit...
df_sdss_comb[['Mag_true_g_sdss_z0', 'Mag_true_g_sdss_z0_upd']]

In [None]:
# Loop through and calculate the percent change in the absolute magnitude in each band
for band in ['g', 'r', 'i', 'z']:
    plt.figure(figsize=(10,7))
    plt.hist((df_sdss_comb['Mag_true_%s_sdss_z0'%band].values - df_sdss_comb['Mag_true_%s_sdss_z0_upd'%band].values)/df_sdss_comb['Mag_true_%s_sdss_z0'%band].values, bins=np.linspace(-1, 1, 50))
    plt.xlabel(r"$(M_{%s, \rm Old} - M_{%s, \rm New}) / M_{%s, \rm Old}$"%(band, band, band))
    plt.yscale("log")
    plt.ylabel(r"$N$")

In [None]:
# Update the absolute magnitudes in the catalog and then save them!
for band in ['g', 'r', 'i', 'z']:
    del df_sdss_comb['Mag_true_%s_sdss_z0'%band]
    df_sdss_comb['Mag_true_%s_sdss_z0'%band] = df_sdss_comb['Mag_true_%s_sdss_z0_upd'%band]
    del df_sdss_comb['Mag_true_%s_sdss_z0_upd'%band]
df_sdss_comb.to_csv("/global/cscratch1/sd/agaglian/DC2full_pzRedshifts_tenHealpix_secondSet_sdss_updMag.tar.gz",index=False)

In [None]:
np.sort(df_sdss_comb['galaxy_id'].values)

In [None]:
df_Image = pd.read_csv("/global/cscratch1/sd/agaglian/FullImageMomentsCatalog_noR.tar.gz")

In [None]:
onp.sort(df_Image.loc[df_Image['cosmoDC2_ID'] > 0, 'cosmoDC2_ID'].values)

In [None]:
onp.sort(df_Image.loc[df_Image['cosmodc2_id_truth'] > 0, 'cosmodc2_id_truth'].values)

In [None]:
df_Image[df_Image['cosmoDC2_ID'] == -2134901886]

In [None]:
len(df_Image)

In [None]:
np.sort(df_Image['cosmoDC2_ID'].values)

In [None]:
len(df_Image[df_Image['cosmoDC2_ID'].values < 0])/len(df_Image)

In [None]:
#combine with the image catalog and get physical radius values
df_Image = pd.read_csv("/global/cscratch1/sd/agaglian/FullImageMomentsCatalog.tar.gz", usecols=['cosmoDC2_ID', 'RSQ_pixel_gal'])
df_Image['galaxy_id'] = df_Image['cosmoDC2_ID']
del df_Image['cosmoDC2_ID']
df_sdss_wImg = df_Image.merge(df_sdss_comb, on='galaxy_id')

In [None]:
#df_sdss_wImg['RSQ_arcsec_gal'] = df_sdss_wImg['RSQ_pixel_gal']*0.2 # 0.2''/px is the scale for LSST
#dLum = cosmo.luminosity_distance(df_sdss_wImg['PZflowredshift']).to(u.pc).value #get luminosity distance again

#first take sqrt(RSQ) to get R in px, then convert to arcsec, then convert to physical distance using the following formula:
#(angle in arcsec)/206265 = d/D ---> d = (angle in arcsec * luminosity distance)/206265 (divide by 1.e3 to get kpc)
df_sdss_wImg['RSQ_kpc_gal'] = np.sqrt(df_sdss_wImg['RSQ_pixel_gal'])*0.2 * 
    cosmo.luminosity_distance(df_sdss_wImg['PZflowredshift']).to(u.pc).value/206265./1.e3

In [None]:
# get rid of this column and save the full catalog
del df_sdss_wImg['RSQ_pixel_gal']
df_sdss_wImg.to_csv("/global/cscratch1/sd/agaglian/DC2full_pzRedshifts_tenHealpix_sdss_updMag_Rkpc.tar.gz",index=False)

In [None]:
#check out the catalog
df_sdss_healpix = pd.read_csv("/global/cscratch1/sd/agaglian/DC2full_pzRedshifts_tenHealpix_sdss_updMag_Rkpc.tar.gz")

In [None]:
df_sdss_healpix 

In [None]:
cosmo=GCRCatalogs.load_catalog("cosmoDC2_v1.1.4")
features = ['galaxy_id', 'Mag_true_g_sdss_z0', 'Mag_true_r_sdss_z0', 'Mag_true_i_sdss_z0', 'Mag_true_z_sdss_z0']
gal = cosmo.get_quantities(features, filters=[(lambda x: np.in1d(x, np.array([1375530850])), 'galaxy_id')])#, return_iterator=True)

In [None]:
df_Image = pd.read_csv("/global/cscratch1/sd/agaglian/FullImageMomentsCatalog.tar.gz")
df_Image.dtypes

In [None]:
df_Image['cosmo

In [None]:
df_Image.dtypes

In [None]:
df_Image['galaxy_id'] = df_Image['cosmodc2_id_truth']

In [None]:
matched = pd.read_csv("/global/cscratch1/sd/agaglian/DC2full_pzRedshifts_tenHealpix_sdss_updMag_Rkpc.tar.gz")

In [None]:
plt.hist(matched['Mag_true_g_sdss_z0'])

In [None]:
#usecols=['cosmoDC2_ID', 'RSQ_pixel_gal'], 

In [None]:
df_sdss_comb.dtypes

In [None]:
df_sdss_comb = pd.read_csv("/global/cscratch1/sd/agaglian/DC2full_pzRedshifts_tenHealpix_sdss_updMag.tar.gz")

In [None]:
len(df_sdss_comb)

In [None]:
#combined = 
df_sdss_comb.merge(df_Image, on='galaxy_id')

In [None]:
combined.dropna(subset=['DC2redshift'])

In [None]:
len(df_sdss_comb)

In [None]:
len(df_Image)