# Comparing G16 vs ML for the Referee

In [18]:
# imports
import numpy as np
import os, sys

from dla_cnn.io import load_ml_dr7, load_ml_dr12, load_garnett16
from dla_cnn.catalogs import match_boss_catalogs

sys.path.append(os.path.abspath("../Analysis/py"))
import defs as analy_defs

## Setup

In [13]:
g16_pcut = analy_defs.g16_pcut
dztoler = 0.015

## Load em up

### ML

In [10]:
# Load BOSS ML
_, dr12_abs = load_ml_dr12()
# Cut on DLA
dlas = dr12_abs['NHI'] >= 20.3
no_bals = dr12_abs['flg_BAL'] == 0
high_conf = dr12_abs['conf'] > 0.9
#
zem = (dr12_abs['zem'] > dr12_abs['zabs']) & (dr12_abs['zem'] > 2.15)  # G16 cut on zem
zcut = dr12_abs['zabs'] > 2.15
zprox = dr12_abs['zabs'] < (dr12_abs['zem'] - 3000./3e5)
# cut
dr12_cut = dlas & no_bals & high_conf & zem & zcut  & zprox
dr12_dla = dr12_abs[dr12_cut]
len(dr12_dla)

15922

### Load Garnett

In [14]:
g16_abs = load_garnett16()
g_dlas = g16_abs['log.NHI'] >= 20.3
g_conf = g16_abs['pDLAD'] >= g16_pcut
# Cut
g_cut = g_dlas & g_conf
g16_dlas = g16_abs[g_cut]
len(g16_dlas)

  return getattr(self.data, oper)(other)


18173

## Match ML to G16

In [20]:
dr12_to_g16 = match_boss_catalogs(dr12_dla, g16_dlas, dztoler=dztoler)
matched = dr12_to_g16 >= 0
g16_idx = dr12_to_g16[matched]
print("We matched {:d} of {:d} DLAs between high quality ML and G16 within dz={:g}".format(
    np.sum(matched), np.sum(dr12_cut), dztoler))

high_conf = (dr12_dla['conf'][matched] > 0.9) & (g16_dlas['pDLAD'][g16_idx] > analy_defs.g16_pcut)
print("Of these, {:d} are high confidence in both".format(np.sum(high_conf)))

We matched 10205 of 15922 DLAs between high quality ML and G16 within dz=0.015
Of these, 10205 are high confidence in both


### Consider large dNHI

In [21]:
NHI = dr12_dla['NHI'][matched]
dNHI = dr12_dla['NHI'][matched] - g16_dlas['log.NHI'][g16_idx]

In [23]:
large_dNHI = np.abs(dNHI) > 0.5
np.sum(large_dNHI)

133