### this notebook saves the metrics (static ones; and lss ones) for v1.6 dbs.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
import os
import pandas as pd
%matplotlib inline

In [2]:
data_dir = '/global/cscratch1/sd/awan/lsst_output/post_wp_output_v1.6_-0.1cuts/summary_data/'
outdir = '/global/homes/a/awan/LSST/lsstRepos/ObsStrat/postwp/paper-data/summary_csv_v1.6_-0.1cuts/'
os.makedirs(outdir, exist_ok=True)

#### read in the data for the static metrics and store as a dictionary

In [3]:
# read in the data 
data = {}
yr_label = {}
for yr in [1, 3, 6, 10]:
    files = [f for f in os.listdir( data_dir ) if f.endswith('csv') and f.__contains__('y%s_' % yr) and f.startswith('eg_') ]
    for file in files:
        print( file )
        key = 'yr%s_%s' % (yr, file.split('_')[4])
        data[ key ] = pd.read_csv('%s/%s' % (data_dir, file))
        yr_label[ key ] =  r'Y%s (i$>$%s) ' % (yr, file.split('_')[4].split('limi')[-1])
        
print( yr_label )
print(data.keys())
print(data[list(data.keys())[0]].keys() )

eg_footprint_stats_y1_limi24.65_nodither_nside256.csv
eg_footprint_stats_y3_limi25.25_nodither_nside256.csv
eg_footprint_stats_y6_limi25.619999999999997_nodither_nside256.csv
eg_footprint_stats_y10_limi25.9_nodither_nside256.csv
{'yr1_limi24.65': 'Y1 (i$>$24.65) ', 'yr3_limi25.25': 'Y3 (i$>$25.25) ', 'yr6_limi25.619999999999997': 'Y6 (i$>$25.619999999999997) ', 'yr10_limi25.9': 'Y10 (i$>$25.9) '}
dict_keys(['yr1_limi24.65', 'yr3_limi25.25', 'yr6_limi25.619999999999997', 'yr10_limi25.9'])
Index(['dbname', 'Area (deg2)', '$i$-band depth: median',
       ' $i$-band depth: std'],
      dtype='object')


In [4]:
# read in the static metrics
for key in data:
    yr_tag = key.split('_')[0].split('yr')[-1]
    lim_tag = key.split('_')[1].split('limi')[-1]
    print(yr_tag, lim_tag)

    data[key] = data[key].rename(columns={'Area (deg2)': 'Y%s effective survey area ' % (yr_tag),
                                          '$i$-band depth: median': 'median Y%s $i$-band coadded depth in effective survey area' % (yr_tag),
                                          ' $i$-band depth: std': 'std in Y%s $i$-band coadded depth in effective survey area' % (yr_tag)} )
    #fname = 'lss_metrics_%s.csv' % key
    #data[key].to_csv('%s/%s' % (outdir, fname), index=False)

1 24.65
3 25.25
6 25.619999999999997
10 25.9


#### read in the `list.csv` file: gives us the order of the dbs

In [5]:
given_list = pd.read_csv('%s/given_order_v1.6.csv' % outdir, )
given_list

Unnamed: 0,name
0,barebones_nexp2_v1.6_10yrs.db
1,barebones_v1.6_10yrs.db
2,baseline_nexp1_v1.6_10yrs.db
3,baseline_nexp2_scaleddown_v1.6_10yrs.db
4,baseline_nexp2_v1.6_10yrs.db
5,combo_dust_nexp2_v1.6_10yrs.db
6,combo_dust_v1.6_10yrs.db
7,ddf_heavy_nexp2_v1.6_10yrs.db
8,ddf_heavy_v1.6_10yrs.db
9,dm_heavy_nexp2_v1.6_10yrs.db


In [6]:
# check some things:
len( list( given_list['name'] ) ), len( np.unique( given_list['name'] ) )

(30, 30)

In [7]:
yr_key = [f for f in data.keys() if f.__contains__('yr1_')][0]
yr_key

'yr1_limi24.65'

#### find the indices to get the order

In [8]:
# find the indices to get the order
ind = []
for db in given_list['name']:
    ind_temp = np.where( data[yr_key]['dbname'].values == db.split('.db')[0] )[0]

    if len(ind_temp) == 0:
        print('%s not found in data dict' % db)
        #ind_temp = np.array([ 0 ])
        
    if ind_temp in ind:
        print('%s ind already in. %s' % (ind_temp, db))
    ind +=  list( ind_temp )
    
ind = np.array(ind)
print(ind)

[29 23 18 24 12 22  9  4 19  2 11 21 13 26 16  0 28  5 17 14  3  1 20 15
 27  7 25 10  8  6]


In [9]:
# check we have the right indices
print( data[yr_key]['dbname'].values[28],  given_list['name'][0] )

# check we have the right number of inds
print( len(ind), len(np.unique(ind)) )

mw_heavy_v1.6_10yrs barebones_nexp2_v1.6_10yrs.db
30 30


#### save the data in the order in which to save things

In [10]:
data.keys()

dict_keys(['yr1_limi24.65', 'yr3_limi25.25', 'yr6_limi25.619999999999997', 'yr10_limi25.9'])

In [11]:
for yr_lim_tag in data:
    for colname in data[yr_lim_tag]:
        if colname != 'dbname':
             given_list[colname] = data[yr_lim_tag][colname].values[ind]

In [12]:
given_list

Unnamed: 0,name,Y1 effective survey area,median Y1 $i$-band coadded depth in effective survey area,std in Y1 $i$-band coadded depth in effective survey area,Y3 effective survey area,median Y3 $i$-band coadded depth in effective survey area,std in Y3 $i$-band coadded depth in effective survey area,Y6 effective survey area,median Y6 $i$-band coadded depth in effective survey area,std in Y6 $i$-band coadded depth in effective survey area,Y10 effective survey area,median Y10 $i$-band coadded depth in effective survey area,std in Y10 $i$-band coadded depth in effective survey area
0,barebones_nexp2_v1.6_10yrs.db,15163.15,25.2,0.19,15326.81,25.81,0.15,15288.94,26.14,0.14,15282.23,26.4,0.13
1,barebones_v1.6_10yrs.db,15198.09,25.21,0.19,15382.68,25.86,0.15,15355.87,26.19,0.14,15339.35,26.46,0.13
2,baseline_nexp1_v1.6_10yrs.db,15165.04,25.11,0.18,15390.34,25.73,0.13,15353.99,26.07,0.13,15334.0,26.34,0.12
3,baseline_nexp2_scaleddown_v1.6_10yrs.db,14863.26,25.06,0.17,15257.1,25.69,0.13,15177.94,26.03,0.13,15178.68,26.3,0.12
4,baseline_nexp2_v1.6_10yrs.db,14836.77,25.03,0.17,15246.82,25.67,0.13,15188.64,26.01,0.12,15159.64,26.27,0.12
5,combo_dust_nexp2_v1.6_10yrs.db,15765.87,24.97,0.18,17556.45,25.56,0.13,17194.56,25.91,0.13,17087.49,26.18,0.12
6,combo_dust_v1.6_10yrs.db,17161.14,25.03,0.19,18024.2,25.63,0.14,17771.62,25.98,0.14,17693.36,26.24,0.13
7,ddf_heavy_nexp2_v1.6_10yrs.db,14262.06,24.98,0.14,15029.02,25.56,0.1,14827.7,25.89,0.1,14765.8,26.16,0.09
8,ddf_heavy_v1.6_10yrs.db,14944.15,25.06,0.16,15235.75,25.64,0.12,15129.58,25.97,0.11,15089.35,26.23,0.1
9,dm_heavy_nexp2_v1.6_10yrs.db,14330.89,24.99,0.16,15177.68,25.65,0.13,15150.25,26.0,0.12,15150.09,26.27,0.11


In [13]:
cols = given_list.columns.tolist()
cols

['name',
 'Y1 effective survey area ',
 'median Y1 $i$-band coadded depth in effective survey area',
 'std in Y1 $i$-band coadded depth in effective survey area',
 'Y3 effective survey area ',
 'median Y3 $i$-band coadded depth in effective survey area',
 'std in Y3 $i$-band coadded depth in effective survey area',
 'Y6 effective survey area ',
 'median Y6 $i$-band coadded depth in effective survey area',
 'std in Y6 $i$-band coadded depth in effective survey area',
 'Y10 effective survey area ',
 'median Y10 $i$-band coadded depth in effective survey area',
 'std in Y10 $i$-band coadded depth in effective survey area']

#### realized that there's too much finaging when sharing data so its best to reorder columns to make sure the order of the columns is the same.

In [14]:
new_cols = ['name',
            'median Y1 $i$-band coadded depth in effective survey area',
            'median Y3 $i$-band coadded depth in effective survey area',
            'median Y6 $i$-band coadded depth in effective survey area',
            'median Y10 $i$-band coadded depth in effective survey area',
            'std in Y1 $i$-band coadded depth in effective survey area',
            'std in Y3 $i$-band coadded depth in effective survey area',
            'std in Y6 $i$-band coadded depth in effective survey area',
            'std in Y10 $i$-band coadded depth in effective survey area',
            'Y1 effective survey area ',
            'Y3 effective survey area ',
            'Y6 effective survey area ',
            'Y10 effective survey area ',]
new_cols

['name',
 'median Y1 $i$-band coadded depth in effective survey area',
 'median Y3 $i$-band coadded depth in effective survey area',
 'median Y6 $i$-band coadded depth in effective survey area',
 'median Y10 $i$-band coadded depth in effective survey area',
 'std in Y1 $i$-band coadded depth in effective survey area',
 'std in Y3 $i$-band coadded depth in effective survey area',
 'std in Y6 $i$-band coadded depth in effective survey area',
 'std in Y10 $i$-band coadded depth in effective survey area',
 'Y1 effective survey area ',
 'Y3 effective survey area ',
 'Y6 effective survey area ',
 'Y10 effective survey area ']

In [15]:
given_list = given_list[new_cols]
given_list

Unnamed: 0,name,median Y1 $i$-band coadded depth in effective survey area,median Y3 $i$-band coadded depth in effective survey area,median Y6 $i$-band coadded depth in effective survey area,median Y10 $i$-band coadded depth in effective survey area,std in Y1 $i$-band coadded depth in effective survey area,std in Y3 $i$-band coadded depth in effective survey area,std in Y6 $i$-band coadded depth in effective survey area,std in Y10 $i$-band coadded depth in effective survey area,Y1 effective survey area,Y3 effective survey area,Y6 effective survey area,Y10 effective survey area
0,barebones_nexp2_v1.6_10yrs.db,25.2,25.81,26.14,26.4,0.19,0.15,0.14,0.13,15163.15,15326.81,15288.94,15282.23
1,barebones_v1.6_10yrs.db,25.21,25.86,26.19,26.46,0.19,0.15,0.14,0.13,15198.09,15382.68,15355.87,15339.35
2,baseline_nexp1_v1.6_10yrs.db,25.11,25.73,26.07,26.34,0.18,0.13,0.13,0.12,15165.04,15390.34,15353.99,15334.0
3,baseline_nexp2_scaleddown_v1.6_10yrs.db,25.06,25.69,26.03,26.3,0.17,0.13,0.13,0.12,14863.26,15257.1,15177.94,15178.68
4,baseline_nexp2_v1.6_10yrs.db,25.03,25.67,26.01,26.27,0.17,0.13,0.12,0.12,14836.77,15246.82,15188.64,15159.64
5,combo_dust_nexp2_v1.6_10yrs.db,24.97,25.56,25.91,26.18,0.18,0.13,0.13,0.12,15765.87,17556.45,17194.56,17087.49
6,combo_dust_v1.6_10yrs.db,25.03,25.63,25.98,26.24,0.19,0.14,0.14,0.13,17161.14,18024.2,17771.62,17693.36
7,ddf_heavy_nexp2_v1.6_10yrs.db,24.98,25.56,25.89,26.16,0.14,0.1,0.1,0.09,14262.06,15029.02,14827.7,14765.8
8,ddf_heavy_v1.6_10yrs.db,25.06,25.64,25.97,26.23,0.16,0.12,0.11,0.1,14944.15,15235.75,15129.58,15089.35
9,dm_heavy_nexp2_v1.6_10yrs.db,24.99,25.65,26.0,26.27,0.16,0.13,0.12,0.11,14330.89,15177.68,15150.25,15150.09


In [16]:
# save the metrics
fname = 'static_metrics.csv'
given_list.to_csv('%s/%s' % (outdir, fname), index=False)

#### now read and save lss-fom and ngal

In [17]:
# clean up the give_list dataframe
for key in given_list.keys():
    if key.__contains__('Y'):
        given_list = given_list.drop(key, axis=1)
        
given_list

Unnamed: 0,name
0,barebones_nexp2_v1.6_10yrs.db
1,barebones_v1.6_10yrs.db
2,baseline_nexp1_v1.6_10yrs.db
3,baseline_nexp2_scaleddown_v1.6_10yrs.db
4,baseline_nexp2_v1.6_10yrs.db
5,combo_dust_nexp2_v1.6_10yrs.db
6,combo_dust_v1.6_10yrs.db
7,ddf_heavy_nexp2_v1.6_10yrs.db
8,ddf_heavy_v1.6_10yrs.db
9,dm_heavy_nexp2_v1.6_10yrs.db


#### read in the lss fom values

In [18]:
data_dir_base = '/global/cscratch1/sd/awan/lsst_output/post_wp_output_v1.6_-0.1cuts/lss_fom/'

In [19]:
data_dir = '%s/os_bias_overplots/' % data_dir_base
[f for f in os.listdir(data_dir) if f.__contains__('fomdata')] 

['2020-12-01_fomdata_Y10_i<25.2_snfom_egfootprint_30cadences_0.66<z<1.0_th-r<25.60_100<ell<300_logtcuts.csv',
 '2020-12-01_fomdata_Y1_i<23.95_snfom_egfootprint_30cadences_0.66<z<1.0_th-r<24.35_100<ell<300_logtcuts.csv']

In [20]:
fname = [f for f in os.listdir(data_dir) if f.__contains__('fomdata') if f.__contains__('_Y1_')][0]
print(fname)
data_file = '%s/%s' % (data_dir, fname)
lss_fom = pd.read_csv(data_file)

# find the indices to get the order
ind = []
for db in given_list['name']:
    ind_temp = np.where( lss_fom['dbname'].values == db.split('.db')[0] )[0]

    if len(ind_temp) == 0:
        print('%s not found in data dict' % db)
        #ind_temp = np.array([ 0 ])
        
    if ind_temp in ind:
        print('%s ind already in. %s' % (ind_temp, db))
    ind +=  list( ind_temp )
    
ind = np.array(ind)
print(ind)

given_list['Y1 lss fom'] = lss_fom['FoM'].values[ind]

2020-12-01_fomdata_Y1_i<23.95_snfom_egfootprint_30cadences_0.66<z<1.0_th-r<24.35_100<ell<300_logtcuts.csv
[29 23 18 24 12 22  9  4 19  2 11 21 13 26 16  0 28  5 17 14  3  1 20 15
 27  7 25 10  8  6]


In [21]:
fname = [f for f in os.listdir(data_dir) if f.__contains__('fomdata') if f.__contains__('_Y10_')][0]
print(fname)
data_file = '%s/%s' % (data_dir, fname)
lss_fom = pd.read_csv(data_file)

# find the indices to get the order
ind = []
for db in given_list['name']:
    ind_temp = np.where( lss_fom['dbname'].values == db.split('.db')[0] )[0]

    if len(ind_temp) == 0:
        print('%s not found in data dict' % db)
        #ind_temp = np.array([ 0 ])
        
    if ind_temp in ind:
        print('%s ind already in. %s' % (ind_temp, db))
    ind +=  list( ind_temp )
    
ind = np.array(ind)
print(ind)

given_list['Y10 lss fom'] = lss_fom['FoM'].values[ind]

2020-12-01_fomdata_Y10_i<25.2_snfom_egfootprint_30cadences_0.66<z<1.0_th-r<25.60_100<ell<300_logtcuts.csv
[29 23 18 24 12 22  9  4 19  2 11 21 13 26 16  0 28  5 17 14  3  1 20 15
 27  7 25 10  8  6]


In [22]:
given_list

Unnamed: 0,name,Y1 lss fom,Y10 lss fom
0,barebones_nexp2_v1.6_10yrs.db,0.893868,0.975746
1,barebones_v1.6_10yrs.db,0.924743,0.98002
2,baseline_nexp1_v1.6_10yrs.db,0.935198,0.979563
3,baseline_nexp2_scaleddown_v1.6_10yrs.db,0.915822,0.974355
4,baseline_nexp2_v1.6_10yrs.db,0.912427,0.973666
5,combo_dust_nexp2_v1.6_10yrs.db,0.916119,1.030839
6,combo_dust_v1.6_10yrs.db,0.9577,1.049415
7,ddf_heavy_nexp2_v1.6_10yrs.db,0.898389,0.962697
8,ddf_heavy_v1.6_10yrs.db,0.922243,0.973398
9,dm_heavy_nexp2_v1.6_10yrs.db,0.895523,0.972498


#### now read in the ngal values

In [23]:
ngal_data_dir = data_dir_base

ngal_data = {}
for yr in [1, 10]:
    ngal_data[yr] =  np.zeros(len(given_list.name))
    folders = [f for f in os.listdir( ngal_data_dir ) if f.__contains__('_Y%s_' % yr) and f.__contains__('i<') ]
    for i, folder in enumerate( folders ):
        out = pd.read_csv('%s/%s/ngal-final_total/ngal_NoDither.txt' % (ngal_data_dir, folder), header=None, delimiter=' ') 
        dbname, ngal = out[0].values[0], out[1].values[0]
        ind = np.where( given_list['name'].values == '%s.db' % dbname)[0]
        ngal_data[yr][ind] = ngal
    print('read in %s folders for Y%s' % (i+1, yr) )

read in 30 folders for Y1
read in 30 folders for Y10


In [24]:
for yr in ngal_data:
    given_list['Y%s Ngal (0.66<z<1.0)' % yr] = ngal_data[yr]

In [25]:
given_list

Unnamed: 0,name,Y1 lss fom,Y10 lss fom,Y1 Ngal (0.66<z<1.0),Y10 Ngal (0.66<z<1.0)
0,barebones_nexp2_v1.6_10yrs.db,0.893868,0.975746,444000000.0,959000000.0
1,barebones_v1.6_10yrs.db,0.924743,0.98002,445000000.0,964000000.0
2,baseline_nexp1_v1.6_10yrs.db,0.935198,0.979563,443000000.0,961000000.0
3,baseline_nexp2_scaleddown_v1.6_10yrs.db,0.915822,0.974355,434000000.0,950000000.0
4,baseline_nexp2_v1.6_10yrs.db,0.912427,0.973666,433000000.0,948000000.0
5,combo_dust_nexp2_v1.6_10yrs.db,0.916119,1.030839,459000000.0,1060000000.0
6,combo_dust_v1.6_10yrs.db,0.9577,1.049415,500000000.0,1100000000.0
7,ddf_heavy_nexp2_v1.6_10yrs.db,0.898389,0.962697,415000000.0,919000000.0
8,ddf_heavy_v1.6_10yrs.db,0.922243,0.973398,436000000.0,942000000.0
9,dm_heavy_nexp2_v1.6_10yrs.db,0.895523,0.972498,417000000.0,947000000.0


In [26]:
fname = 'lss_metrics.csv'
given_list.to_csv('%s/%s' % (outdir, fname), index=False)