In [0]:
ISOCHRONES_PATH = '/datascope/subaru/data/cmdfit/isochrones/dartmouth/import/afep0_cfht_sdss_hsc'
OBS_PATH = '/datascope/subaru/data/cmdfit/dSph/umi_tpall3e_g24.cat'
SIM_PATH = '/datascope/subaru/data/cmdfit/run/umi/sim/bin_chab_250k_001/sample.h5'

In [0]:
import os, sys
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.patches import Ellipse, Circle
from scipy.special import logsumexp
from scipy.interpolate import interp1d

In [0]:
plt.rc('font', size=6) #controls default text size

In [0]:
%load_ext autoreload

In [0]:
%autoreload 2

# Load observations

In [0]:
from pfs.ga.targeting.instrument import *
from pfs.ga.targeting.diagram import CMD, CCD, ColorAxis, MagnitudeAxis
from pfs.ga.targeting.photometry import Photometry, Magnitude, Color

In [0]:
obs = SubaruHSC.text_observation_reader().read(OBS_PATH)
obs.data.head()

# Load simulation

In [0]:
from pfs.ga.targeting.instrument import *
from pfs.ga.targeting.diagram import CMD, CCD, ColorAxis, MagnitudeAxis
from pfs.ga.targeting.photometry import Photometry, Magnitude, Color

In [0]:
# Ursa minor
if True:
    hsc = SubaruHSC.photometry()
    cmd = CMD([
            ColorAxis(
                Color([hsc.magnitudes['g'], hsc.magnitudes['i']]),
                limits=(-1, 4)),
            MagnitudeAxis(
                hsc.magnitudes['g'],
                limits=(15.5, 24.5)
            )
        ])
    ccd = CCD([
        cmd.axes[0],
        ColorAxis(
            Color([hsc.magnitudes['g'], hsc.magnitudes['nb515']]),
            limits=(-0.5, 0.5)
        )
    ])

In [0]:
# Fornax
if False:
    hsc = SubaruHSC.photometry()
    cmd = CMD([
            ColorAxis(
                Color([hsc.magnitudes['g'], hsc.magnitudes['i']]),
                limits=(-1, 4)),
            MagnitudeAxis(
                hsc.magnitudes['g'],
                limits=(15.5, 24.5)
            )
        ])
    ccd = CCD([
        cmd.axes[0],
        ColorAxis(
            Color([hsc.magnitudes['g'], hsc.magnitudes['nb515']]),
            limits=(-0.75, 0.5)
        )
    ])

In [0]:
from pfs.ga.targeting.io import Hdf5SimulationReader

In [0]:
r = Hdf5SimulationReader()
cm = {}
# for prefix in ['', 'obs_', 'err_', 'flux_', 'obs_flux_', 'err_flux_', 'counts_', 'obs_counts_', 'err_counts_']:
#     cm[prefix + 'hsc_g2'] = prefix + 'hsc_g'
#     cm[prefix + 'hsc_i2'] = prefix + 'hsc_i'
r.column_mapping = cm
r.append_photometry(SubaruHSC.photometry())
sim = r.read(SIM_PATH)

In [0]:
for k in sim.data.keys():
    print(k, sim.data[k].shape)

In [0]:
f, axs = plt.subplots(1, 2, figsize=(6, 4), dpi=120)

cmd.plot_simulation(axs[0], sim, s=np.s_[::100], size=0.05)
if False:
    ccd.plot_simulation(axs[1], sim, s=np.s_[::100], size=0.05)

f.tight_layout()

# Define selections

In [0]:
from pfs.ga.targeting import ProbabilityMap
from pfs.ga.targeting.selection import ProbabilityCut, ProbabilitySampling, MagnitudeSelection, ColorSelection, LinearSelection

In [0]:
# Ursa Minor selection
if True:
    def get_selection_mask(catalog, nb=True, blue=False, probcut=None, observed=None, bright=16, faint=23.5):
        # Broadband colors
        mask = ColorSelection(ccd.axes[0], 0.12, 2.0).apply(catalog, observed=observed)

        # Narrow band
        if nb:
            mask &= (
                ColorSelection(ccd.axes[0], 0.12, 0.5).apply(catalog, observed=observed)

                | ColorSelection(ccd.axes[1], 0.1, None).apply(catalog, observed=observed)
                & ColorSelection(ccd.axes[0], None, 1.65).apply(catalog, observed=observed)
                
                | LinearSelection(ccd.axes, [-0.25, 1.0], -0.15, None).apply(catalog, observed=observed)
            )

        # Probability-based cut (map) - nonzero membership probability
        if probcut is not None:
            mask &= probcut.apply(catalog, observed=observed)

        # Allow blue
        if blue:
            mask |= (
                ColorSelection(ccd.axes[0], None, 0.12).apply(catalog, observed=observed)
            )

        # Always impose faint and bright magnitude cuts
        mask &= MagnitudeSelection(cmd.axes[1], bright, faint).apply(catalog, observed=observed)

        return mask

In [0]:
# Fornax selection
if False:
    def get_selection_mask(catalog, nb=True, blue=False, probcut=None, observed=None, bright=16, faint=23.5):
        # Broadband colors
        mask = ColorSelection(ccd.axes[0], -0.75, 2.0).apply(catalog, observed=observed)

        # Narrow band
        if nb:
            mask &= (
                ColorSelection(ccd.axes[0], 0.12, 0.5).apply(catalog, observed=observed)

                | ColorSelection(ccd.axes[1], 0.1, None).apply(catalog, observed=observed)
                & ColorSelection(ccd.axes[0], None, 1.65).apply(catalog, observed=observed)
                
                | LinearSelection(ccd.axes, [-0.25, 1.0], -0.15, None).apply(catalog, observed=observed)
            )

        # Probability-based cut (map) - nonzero membership probability
        if probcut is not None:
            mask &= probcut.apply(catalog, observed=observed)

        # Allow blue
        if blue:
            mask |= (
                ColorSelection(ccd.axes[0], None, 0.12).apply(catalog, observed=observed)
            )

        # Always impose faint and bright magnitude cuts
        mask &= MagnitudeSelection(cmd.axes[1], bright, faint).apply(catalog, observed=observed)

        return mask

In [0]:
f, ax = plt.subplots(1, 1)

mask = get_selection_mask(obs, observed=True, nb=False, blue=True)
cmd.plot_catalog(ax, obs, observed=True, mask=mask)

# Compare simulation to observations

In [0]:
# This is for Ursa Minor
if True:
    # Original weights
    print('data.w', sim.data['w'])

    w = np.bincount(sim.data['g']) / sim.data['g'].shape
    print('w', w, np.sum(w[:-2]), np.sum(w[-2:]))

    # New weights, boost thick disk and halo
    w1 = np.r_[w[:-2] / 0.4 * 0.5, w[-2:] / 0.6 * 0.5]
    w1[2:4] *= 100
    # w1[0:6] *= 3  # thin disk 1-3
    # w1[4:6] *= 1.2  # thin disk 3
    w1[6:8] *= 15  # thick disk
    w1[8:10] *= 28   # halo
    ##### good for histograms w1[10:12] *= 18  # dSph
    ##### good for ghost plot
    w1[10:12] *= 50  # dSph
    w1 /= w1.sum()
    print('w1', w1, np.sum(w1[:-2]), np.sum(w1[-2:]))

    # New categories
    g1 = np.random.choice(np.arange(w1.size, dtype=int), sim.data['g'].size, p=w1)
    print('g1', g1.shape)

    # Verify new categories
    w2 = np.bincount(g1) / g1.shape
    print('w2', w2, np.sum(w2[:-2]), np.sum(w2[-2:]))

In [0]:
# This is for Fornax
if False:
    # Original weights
    print('data.w', sim.data['w'])

    w = np.bincount(sim.data['g']) / sim.data['g'].shape
    print('w', w, np.sum(w[:-2]), np.sum(w[-2:]))

    w1 = w.copy()
    w1[:-6] * 0.7       # MW foreground
    w1[-6:-4] *= 2.0    # broad RGB population
    w1[-4:-2] *= 0.3    # old RGB population
    w1[-2:] *= 1.0      # member MS population
    w1 /= w1.sum()

    g1 = np.random.choice(np.arange(w1.size, dtype=int), sim.data['g'].size, p=w1)
    print('g1', g1.shape)

    # Verify new categories
    w2 = np.bincount(g1) / g1.shape
    print('w2', w2, np.sum(w2[:-2]), np.sum(w2[-2:]))

In [0]:
# Calculate the best combination of weight by fitting the color histogram of observations

In [0]:
sim.data['g'], g1

In [0]:
# Number of objects inside cuts
mask = get_selection_mask(obs, nb=False, observed=True)
n_obs = mask.sum()
print('obs', n_obs)

#(x, x_err), (y, y_err) = obs.get_diagram_values(cmd.axes, observed=True, mask=mask)
#mask = (0.1 < x) & (x < 2) & (y < 23.5)
#print('obs', n_obs)

mask = get_selection_mask(sim, nb=False, observed=True)
mask = sim.apply_categories(mask, g=g1)
n_sim = mask.sum()
print('sim', n_sim)

# (x, x_err), (y, y_err) = sim.get_diagram_values(cmd.axes, observed=True)
# x = sim.apply_categories(x, g=g1)
# y = sim.apply_categories(y, g=g1)
# mask = (0.1 < x) & (x < 2) & (y < 23.5)
# n_sim = mask.sum()
# print('sim', n_sim)

n_sim / n_obs

In [0]:
NB = False

f, axs = plt.subplots(1, 3, figsize=(6, 4), dpi=120)

s = np.s_[::1]

mask = get_selection_mask(obs, nb=NB, observed=True)
cmd.plot_observation(axs[0], obs, size=0.05, mask=mask, s=s)
axs[0].set_title('OBS')

s = np.s_[::5]

mask = get_selection_mask(sim, nb=NB, observed=True)
mask = sim.apply_categories(mask, g=g1)
cmd.plot_simulation(axs[1], sim, observed=True, apply_categories=True, mask=mask, g=g1, s=s, size=0.05)
axs[1].set_title('SIM updated weights')

mask = get_selection_mask(sim, nb=NB, observed=True)
mask = sim.apply_categories(mask, g=sim.data['g'])
cmd.plot_simulation(axs[2], sim, observed=True, apply_categories=True, mask=mask, g=sim.data['g'], s=s, size=0.05)
axs[2].set_title('SIM original weights')

for ax in axs:
    ax.grid()
    ax.set_xlim(-1, 2.2)

f.tight_layout()

In [0]:
w.shape

In [0]:
def plot_histogram(ax, obs, sim, axis, plot_populations=True):
    ((x, x_err),) = obs.get_diagram_values([axis], observed=True)
    mask = get_selection_mask(obs, nb=NB, observed=True)
    hist, bins = np.histogram(x[mask], bins=100, density=True)
    ax.step(0.5 * (bins[1:] + bins[:-1]), hist, lw=1, label='OBS')
    print(x.min(), x.max())

    ((x, x_err),) = sim.get_diagram_values([axis], observed=True)
    mask = get_selection_mask(sim, nb=NB, observed=True)
    mask = sim.apply_categories(mask, g=g1)
    x = sim.apply_categories(x, g=g1)
    hist, bins = np.histogram(x[mask], bins=100, density=True)
    ax.step(0.5 * (bins[1:] + bins[:-1]), hist, lw=1, label='SIM')
    
    if plot_populations:
        for i, name in enumerate(['thin1', 'thin2', 'thin3', 'thick', 'halo', 'dSph']):
        # for i, name in enumerate(['thin1', 'thin2', 'thin3', 'thick', 'halo', 'dSph1', 'dSph2', 'dSph3']):
            hist, bins = np.histogram(x[mask][(g1[mask[:,0]] == 2 * i) | (g1[mask[:,0]] == 2 * i + 1)], bins=100, density=True)
            ax.step(0.5 * (bins[1:] + bins[:-1]), (w1[2 * i] + w1[2 * i + 1]) * hist, lw=0.5, label=name)

In [0]:
f, ax = plt.subplots(1, 1, figsize=(3.5, 2.4), dpi=240)

plot_histogram(ax, obs, sim, cmd.axes[0])

ax.set_xlim(-1, 2.2)
ax.set_xlabel('HSC $g - i$')
ax.legend()

In [0]:
f, ax = plt.subplots(1, 1, figsize=(3.5, 2.4), dpi=240)

plot_histogram(ax, obs, sim, cmd.axes[1])

ax.set_xlabel('HSC $g$')
ax.legend()

In [0]:
f, ax = plt.subplots(1, 1, figsize=(3.5, 2.4), dpi=240)

plot_histogram(ax, obs, sim, MagnitudeAxis(hsc.magnitudes['i']))

ax.set_xlabel('HSC $i$')
ax.legend()

# Create probability map

In [0]:
from pfs.ga.targeting import ProbabilityMap
from pfs.ga.targeting.selection import ProbabilityCut, ProbabilitySampling

In [0]:
# Ursa Minor dwarf
if True:
    # Original weights
    print('data.w', sim.data['w'])

    w = np.bincount(sim.data['g']) / sim.data['g'].shape
    print('w', w, np.sum(w[:-2]), np.sum(w[-2:]))

    # New weights, boost thick disk and halo
    w1 = np.r_[w[:-2] / 0.4 * 0.5, w[-2:] / 0.6 * 0.5]
    w1[2:4] *= 100
    # w1[0:6] *= 3  # thin disk 1-3
    # w1[4:6] *= 1.2  # thin disk 3
    w1[6:8] *= 15  # thick disk
    w1[8:10] *= 28   # halo
    ##### good for histograms w1[10:12] *= 18  # dSph
    ##### good for ghost plot
    w1[10:12] *= 50  # dSph
    w1 /= w1.sum()
    print('w1', w1, np.sum(w1[:-2]), np.sum(w1[-2:]))

    # New categories
    g1 = np.random.choice(np.arange(w1.size, dtype=int), sim.data['g'].size, p=w1)
    print('g1', g1.shape)

    # Verify new categories
    w2 = np.bincount(g1) / g1.shape
    print('w2', w2, np.sum(w2[:-2]), np.sum(w2[-2:]))

In [0]:
# Fornax dwarf
if False:
    # Original weights
    print('data.w', sim.data['w'])

    w = np.bincount(sim.data['g']) / sim.data['g'].shape
    print('w', w, np.sum(w[:-2]), np.sum(w[-2:]))

    w1 = w.copy()
    w1[:-6] * 0.7       # MW foreground
    w1[-6:-4] *= 2.0    # broad RGB population
    w1[-4:-2] *= 0.3    # old RGB population
    w1[-2:] *= 1.0      # member MS population
    w1 /= w1.sum()

    g1 = np.random.choice(np.arange(w1.size, dtype=int), sim.data['g'].size, p=w1)
    print('g1', g1.shape)

    # Verify new categories
    w2 = np.bincount(g1) / g1.shape
    print('w2', w2, np.sum(w2[:-2]), np.sum(w2[-2:]))

In [0]:
# The simulation has 10 + 2 * N sub-populations (binaries treated separately for each)
# 10 is for 5 MW (3 for the thin disk + 1 thick disk + 1 halo)
# Merge foreground populations and members + member binaries when creating the map

# Ursa Minor
if True:
    extents = [[0.1, 2.0], [17.0, 23.5]]

# Fornax
if False:
    extents = [[-0.75, 2.0], [17.0, 23.5]]

pmap = ProbabilityMap(cmd.axes)
pmap.from_simulation(sim, bins=[100, 100], extents=extents,
    merge_list=[np.s_[:10], np.s_[10:]], population_weights=w1, observed=True, mask=None)
pmap.maximum_filter()

In [0]:
pmap.extents

In [0]:
f, axs = plt.subplots(1, 2, figsize=(6, 4), dpi=120)

l0 = cmd.plot_probability_map(axs[0], pmap, 0)
l1 = cmd.plot_probability_map(axs[1], pmap, 1)

f.tight_layout()

In [0]:
# Save probability maps

pmap.save(os.path.join(os.path.dirname(SIM_PATH), 'pmap.h5'))

# Membership probability based on the map

In [0]:
lp_member, mask_member = pmap.lookup_lp_member(obs)

lp_member.shape, np.isnan(lp_member).sum(), np.isnan(lp_member[mask_member]).sum(), mask_member.shape, mask_member.sum()

In [0]:
f, axs = plt.subplots(1, 2, figsize=(6, 4), dpi=120)

cmd.plot_observation(axs[0], obs, c=lp_member[...,0])
ccd.plot_observation(axs[1], obs, c=lp_member[...,0])

f.tight_layout()

# Selection based on probability cut

In [0]:
pcut = ProbabilityCut(pmap, 1, -3)  # cut at e**-3

In [0]:
mask = pcut.apply(obs, observed=True)

In [0]:
f, axs = plt.subplots(1, 2, figsize=(6, 4), dpi=120)

cmd.plot_observation(axs[0], obs, mask=mask)
ccd.plot_observation(axs[1], obs, mask=mask)

f.tight_layout()