# Fiddling about with DR5 results (a bit)

In [1]:
# imports
import numpy as np
import pdb, imp
from astropy.coordinates import SkyCoord, match_coordinates_sky
from linetools import utils as ltu
from pyigm.surveys.dlasurvey import DLASurvey

## Read JSON file

In [25]:
ml_dr5 = ltu.loadjson('../../results/dr5_v1_predictions.json')

In [26]:
len(ml_dr5)

7478

In [27]:
ml_dr5[0]

{u'classification': u'NO_DLA',
 u'classification_confidence': 96,
 u'dlas': [],
 u'fiber': 5,
 u'mjd': 0,
 u'num_dlas': 0,
 u'plate': 266}

In [28]:
ml_dr5[2]

{u'classification': u'NO_DLA',
 u'classification_confidence': 93,
 u'dlas': [{u'column_density': 19.800457000732422,
   u'dla_confidence': 0.3738497495651245,
   u'rest': 927.2395328982095,
   u'spectrum': 3236.21719131183}],
 u'fiber': 254,
 u'mjd': 0,
 u'num_dlas': 1,
 u'plate': 270}

In [29]:
ml_dr5[5]

{u'classification': u'HAS_DLA',
 u'classification_confidence': 97,
 u'dlas': [{u'column_density': 20.375356674194336,
   u'dla_confidence': 0.8868335485458374,
   u'rest': 986.0872066647199,
   u'spectrum': 3441.6051700972403},
  {u'column_density': 20.587078094482422,
   u'dla_confidence': 0.8954387307167053,
   u'rest': 1083.0557379057761,
   u'spectrum': 3780.0411585173047},
  {u'column_density': 20.485034942626953,
   u'dla_confidence': 0.795092761516571,
   u'rest': 1165.6127221361494,
   u'spectrum': 4068.1784975219425},
  {u'column_density': 20.31381607055664,
   u'dla_confidence': 0.28717735409736633,
   u'rest': 1107.7819356232567,
   u'spectrum': 3866.339621093609}],
 u'fiber': 345,
 u'mjd': 0,
 u'num_dlas': 4,
 u'plate': 271}

## Load up DR5

In [2]:
sdssdr5 = DLASurvey.load_SDSS_DR5()

SDSS-DR5: Loading DLA file /Users/xavier/local/Python/pyigm/pyigm/data/DLA/SDSS_DR5/dr5_alldla.fits.gz
SDSS-DR5: Loading QSOs file /Users/xavier/local/Python/pyigm/pyigm/data/DLA/SDSS_DR5/dr5_dlagz_s2n4.fits
SDSS-DR5: Performing stats (~60s)
SDSS-DR5: Loaded


In [19]:
sdssdr5.sightlines[0:4]

PLATE,FIB,RA,DEC,FLG_BAL,IQSO,MAG,S2N,Z_START,Z_END,ZEM,DX
Unnamed: 0_level_1,Unnamed: 1_level_1,deg,deg,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
int32,int32,float64,float64,int16,int32,float64,float64,float64,float64,float64,float64
266,5,146.93861,-0.68701194,1,0,19.341999054,4.94595003128,2.39664643878,2.74649000168,2.8287498951,1.17426266257
266,92,146.22601,-0.72509875,0,4,19.0820007324,8.54980564117,2.20000004768,2.25759506226,2.29049992561,0.184012498912
270,254,152.23239,-0.97123272,0,9,19.0230007172,7.49763822556,2.30636157714,3.0556242466,3.09659004211,2.56581152274
271,391,154.14992,0.14750838,0,16,18.0650005341,18.982629776,2.20000004768,2.25551605225,2.28839993477,0.177634457341


In [48]:
len(sdssdr5.sightlines), len(ml_dr5)

(7482, 7478)

### Setup coords

In [22]:
dla_coord = sdssdr5.coord

In [23]:
sl_coord = SkyCoord(ra=sdssdr5.sightlines['RA'], dec=sdssdr5.sightlines['DEC'])

In [37]:
idx, d2d, d3d = match_coordinates_sky(dla_coord, sl_coord, nthneighbor=1)

In [38]:
np.max(d2d.to('arcsec'))

<Angle 0.3689950092602071 arcsec>

### Specify HAS_DLA

In [40]:
sdssdr5.sightlines['HAS_ML'] = [False]*len(sdssdr5.sightlines)

In [47]:
jj = 0
for obj in ml_dr5:
    # Check plate fiber
    while ((obj['plate'] != sdssdr5.sightlines['PLATE'][jj]) or (obj['fiber'] != sdssdr5.sightlines['FIB'][jj])):
        print("Skipping plate={:d}, fiber={:d}".format(sdssdr5.sightlines['PLATE'][jj],sdssdr5.sightlines['FIB'][jj]))
        jj+=1
    # Machine learning
    if obj['classification'] == u'HAS_DLA':
        sdssdr5.sightlines['HAS_ML'][jj] = True
    jj+=1

Skipping plate=359, fiber=279
Skipping plate=1950, fiber=602
Skipping plate=1980, fiber=484
Skipping plate=2083, fiber=33


## Fraction of Hits

In [60]:
# Calculate fraction of HAS_DLA as a function of NHI
def chk_has_nhi(dlas, idx, NHI=20.3):
    # Cut on NHI
    gdD = dlas.NHI >= NHI
    # Dummy array
    jxp = np.array([False]*len(dlas.sightlines))
    # Sightline with DLA in it
    jxp[idx[gdD]] = True
    njxp = np.sum(jxp)
    # HAS?
    good = dlas.sightlines['HAS_ML'] & jxp
    ngood = np.sum(good)
    #pdb.set_trace()
    # Return
    return ngood, njxp

### NHI = 20.3

In [62]:
ngood, ntot = chk_has_nhi(sdssdr5, idx)
ngood, ntot

(629, 667)

### NHI = 21.

In [63]:
ngood21, ntot21 = chk_has_nhi(sdssdr5, idx, NHI=21.)
ngood21, ntot21

(121, 121)

In [65]:
629./667

0.9430284857571214