### this notebook saves the metrics (static ones; and lss ones) for v1.5 dbs.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
import os
import pandas as pd
%matplotlib inline

In [2]:
data_dir = '/global/cscratch1/sd/awan/lsst_output/post_wp_output_v1.5_-0.1cuts/summary_data/'
outdir = '/global/homes/a/awan/LSST/lsstRepos/ObsStrat/postwp/paper-data/summary_csv_v1.5_-0.1cuts/'
os.makedirs(outdir, exist_ok=True)

#### read in the data for the static metrics and store as a dictionary

In [3]:
# read in the data 
data = {}
yr_label = {}
for yr in [1, 3, 6, 10]:
    files = [f for f in os.listdir( data_dir ) if f.endswith('csv') and f.__contains__('y%s_' % yr) and f.startswith('eg_') ]
    for file in files:
        print( file )
        key = 'yr%s_%s' % (yr, file.split('_')[4])
        data[ key ] = pd.read_csv('%s/%s' % (data_dir, file))
        yr_label[ key ] =  r'Y%s (i$>$%s) ' % (yr, file.split('_')[4].split('limi')[-1])
        
print( yr_label )
print(data.keys())
print(data[list(data.keys())[0]].keys() )

eg_footprint_stats_y1_limi24.65_nodither_nside256.csv
eg_footprint_stats_y3_limi25.25_nodither_nside256.csv
eg_footprint_stats_y6_limi25.619999999999997_nodither_nside256.csv
eg_footprint_stats_y10_limi25.9_nodither_nside256.csv
{'yr1_limi24.65': 'Y1 (i$>$24.65) ', 'yr3_limi25.25': 'Y3 (i$>$25.25) ', 'yr6_limi25.619999999999997': 'Y6 (i$>$25.619999999999997) ', 'yr10_limi25.9': 'Y10 (i$>$25.9) '}
dict_keys(['yr1_limi24.65', 'yr3_limi25.25', 'yr6_limi25.619999999999997', 'yr10_limi25.9'])
Index(['dbname', 'Area (deg2)', '$i$-band depth: median',
       ' $i$-band depth: std'],
      dtype='object')


In [4]:
# read in the static metrics
for key in data:
    yr_tag = key.split('_')[0].split('yr')[-1]
    lim_tag = key.split('_')[1].split('limi')[-1]
    print(yr_tag, lim_tag)

    data[key] = data[key].rename(columns={'Area (deg2)': 'Y%s effective survey area ' % (yr_tag),
                                          '$i$-band depth: median': 'median Y%s $i$-band coadded depth in effective survey area' % (yr_tag),
                                          ' $i$-band depth: std': 'std in Y%s $i$-band coadded depth in effective survey area' % (yr_tag)} )
    #fname = 'lss_metrics_%s.csv' % key
    #data[key].to_csv('%s/%s' % (outdir, fname), index=False)

1 24.65
3 25.25
6 25.619999999999997
10 25.9


#### get the order of the dbs

In [5]:
df_in_order = pd.read_csv('%s/given_order_v1.5.csv' % outdir, )
df_in_order

Unnamed: 0,name
0,agnddf_v1.5_10yrs
1,alt_dust_v1.5_10yrs
2,alt_roll_mod2_dust_sdf_0.20_v1.5_10yrs
3,baseline_2snaps_v1.5_10yrs
4,baseline_v1.5_10yrs
...,...
80,wfd_depth_scale0.90_v1.5_10yrs
81,wfd_depth_scale0.95_noddf_v1.5_10yrs
82,wfd_depth_scale0.95_v1.5_10yrs
83,wfd_depth_scale0.99_noddf_v1.5_10yrs


In [6]:
# check some things:
len( list( df_in_order['name'] ) ), len( np.unique( df_in_order['name'] ) )

(85, 85)

In [7]:
yr_key = [f for f in data.keys() if f.__contains__('yr1_')][0]
yr_key

'yr1_limi24.65'

#### find the indices to get the order

In [8]:
# find the indices to get the order
ind = []
for db in df_in_order['name']:
    ind_temp = np.where( data[yr_key]['dbname'].values == db.split('.db')[0] )[0]

    if len(ind_temp) == 0:
        print('%s not found in data dict' % db)
        #ind_temp = np.array([ 0 ])
        
    if ind_temp in ind:
        print('%s ind already in. %s' % (ind_temp, db))
    ind +=  list( ind_temp )
    
ind = np.array(ind)
print(ind)

[56 61 60 27  2 28  7 71 43  1 81 31 32 24  3 67 76 68 22 38 82 48 47 10
 54 84 11 70 62 16 41 63  8 18 45 30 12 55 17 72 34 25 33 40 69 50 74 37
 53 52 58 64 79 29 75 42 23 19 35  9 49  0 57 78 14  6 77 73 83 66 15 65
 21 51  5 26 20 46 80 36 44 39 13  4 59]


In [9]:
# check we have the right indices
print( data[yr_key]['dbname'].values[28],  df_in_order['name'][0] )

# check we have the right number of inds
print( len(ind), len(np.unique(ind)) )

bulges_bs_v1.5_10yrs agnddf_v1.5_10yrs
85 85


#### save the data in the order in which to save things

In [10]:
data.keys()

dict_keys(['yr1_limi24.65', 'yr3_limi25.25', 'yr6_limi25.619999999999997', 'yr10_limi25.9'])

In [11]:
for yr_lim_tag in data:
    for colname in data[yr_lim_tag]:
        if colname != 'dbname':
             df_in_order[colname] = data[yr_lim_tag][colname].values[ind]

In [12]:
df_in_order

Unnamed: 0,name,Y1 effective survey area,median Y1 $i$-band coadded depth in effective survey area,std in Y1 $i$-band coadded depth in effective survey area,Y3 effective survey area,median Y3 $i$-band coadded depth in effective survey area,std in Y3 $i$-band coadded depth in effective survey area,Y6 effective survey area,median Y6 $i$-band coadded depth in effective survey area,std in Y6 $i$-band coadded depth in effective survey area,Y10 effective survey area,median Y10 $i$-band coadded depth in effective survey area,std in Y10 $i$-band coadded depth in effective survey area
0,agnddf_v1.5_10yrs,15351.10,25.12,0.16,15374.29,25.72,0.14,15282.17,26.07,0.13,15263.18,26.33,0.13
1,alt_dust_v1.5_10yrs,17050.93,25.00,0.17,17670.23,25.65,0.15,17522.46,26.00,0.14,17407.74,26.26,0.14
2,alt_roll_mod2_dust_sdf_0.20_v1.5_10yrs,17087.60,24.99,0.16,16583.87,25.56,0.17,16522.49,25.93,0.15,16887.06,26.21,0.13
3,baseline_2snaps_v1.5_10yrs,15023.62,25.04,0.15,15227.41,25.66,0.13,15134.41,26.00,0.12,15097.53,26.26,0.12
4,baseline_v1.5_10yrs,15297.02,25.12,0.16,15329.49,25.72,0.13,15256.94,26.06,0.12,15238.11,26.32,0.12
...,...,...,...,...,...,...,...,...,...,...,...,...,...
80,wfd_depth_scale0.90_v1.5_10yrs,15228.04,25.10,0.16,15332.06,25.73,0.13,15282.02,26.07,0.12,15250.96,26.33,0.12
81,wfd_depth_scale0.95_noddf_v1.5_10yrs,15306.04,25.16,0.17,15347.90,25.78,0.14,15309.14,26.12,0.13,15280.81,26.39,0.13
82,wfd_depth_scale0.95_v1.5_10yrs,15201.81,25.11,0.16,15318.84,25.74,0.13,15256.00,26.08,0.13,15229.87,26.35,0.12
83,wfd_depth_scale0.99_noddf_v1.5_10yrs,15341.97,25.17,0.17,15356.14,25.80,0.15,15313.07,26.15,0.14,15293.24,26.42,0.13


In [13]:
cols = df_in_order.columns.tolist()
cols

['name',
 'Y1 effective survey area ',
 'median Y1 $i$-band coadded depth in effective survey area',
 'std in Y1 $i$-band coadded depth in effective survey area',
 'Y3 effective survey area ',
 'median Y3 $i$-band coadded depth in effective survey area',
 'std in Y3 $i$-band coadded depth in effective survey area',
 'Y6 effective survey area ',
 'median Y6 $i$-band coadded depth in effective survey area',
 'std in Y6 $i$-band coadded depth in effective survey area',
 'Y10 effective survey area ',
 'median Y10 $i$-band coadded depth in effective survey area',
 'std in Y10 $i$-band coadded depth in effective survey area']

#### realized that there's too much finagling when sharing data so its best to reorder columns to make sure the order of the columns is the same.

In [14]:
new_cols = ['name',
            'median Y1 $i$-band coadded depth in effective survey area',
            'median Y3 $i$-band coadded depth in effective survey area',
            'median Y6 $i$-band coadded depth in effective survey area',
            'median Y10 $i$-band coadded depth in effective survey area',
            'std in Y1 $i$-band coadded depth in effective survey area',
            'std in Y3 $i$-band coadded depth in effective survey area',
            'std in Y6 $i$-band coadded depth in effective survey area',
            'std in Y10 $i$-band coadded depth in effective survey area',
            'Y1 effective survey area ',
            'Y3 effective survey area ',
            'Y6 effective survey area ',
            'Y10 effective survey area ',]
new_cols

['name',
 'median Y1 $i$-band coadded depth in effective survey area',
 'median Y3 $i$-band coadded depth in effective survey area',
 'median Y6 $i$-band coadded depth in effective survey area',
 'median Y10 $i$-band coadded depth in effective survey area',
 'std in Y1 $i$-band coadded depth in effective survey area',
 'std in Y3 $i$-band coadded depth in effective survey area',
 'std in Y6 $i$-band coadded depth in effective survey area',
 'std in Y10 $i$-band coadded depth in effective survey area',
 'Y1 effective survey area ',
 'Y3 effective survey area ',
 'Y6 effective survey area ',
 'Y10 effective survey area ']

In [15]:
df_in_order = df_in_order[new_cols]
df_in_order

Unnamed: 0,name,median Y1 $i$-band coadded depth in effective survey area,median Y3 $i$-band coadded depth in effective survey area,median Y6 $i$-band coadded depth in effective survey area,median Y10 $i$-band coadded depth in effective survey area,std in Y1 $i$-band coadded depth in effective survey area,std in Y3 $i$-band coadded depth in effective survey area,std in Y6 $i$-band coadded depth in effective survey area,std in Y10 $i$-band coadded depth in effective survey area,Y1 effective survey area,Y3 effective survey area,Y6 effective survey area,Y10 effective survey area
0,agnddf_v1.5_10yrs,25.12,25.72,26.07,26.33,0.16,0.14,0.13,0.13,15351.10,15374.29,15282.17,15263.18
1,alt_dust_v1.5_10yrs,25.00,25.65,26.00,26.26,0.17,0.15,0.14,0.14,17050.93,17670.23,17522.46,17407.74
2,alt_roll_mod2_dust_sdf_0.20_v1.5_10yrs,24.99,25.56,25.93,26.21,0.16,0.17,0.15,0.13,17087.60,16583.87,16522.49,16887.06
3,baseline_2snaps_v1.5_10yrs,25.04,25.66,26.00,26.26,0.15,0.13,0.12,0.12,15023.62,15227.41,15134.41,15097.53
4,baseline_v1.5_10yrs,25.12,25.72,26.06,26.32,0.16,0.13,0.12,0.12,15297.02,15329.49,15256.94,15238.11
...,...,...,...,...,...,...,...,...,...,...,...,...,...
80,wfd_depth_scale0.90_v1.5_10yrs,25.10,25.73,26.07,26.33,0.16,0.13,0.12,0.12,15228.04,15332.06,15282.02,15250.96
81,wfd_depth_scale0.95_noddf_v1.5_10yrs,25.16,25.78,26.12,26.39,0.17,0.14,0.13,0.13,15306.04,15347.90,15309.14,15280.81
82,wfd_depth_scale0.95_v1.5_10yrs,25.11,25.74,26.08,26.35,0.16,0.13,0.13,0.12,15201.81,15318.84,15256.00,15229.87
83,wfd_depth_scale0.99_noddf_v1.5_10yrs,25.17,25.80,26.15,26.42,0.17,0.15,0.14,0.13,15341.97,15356.14,15313.07,15293.24


In [16]:
# save the metrics
fname = 'static_metrics.csv'
df_in_order.to_csv('%s/%s' % (outdir, fname), index=False)

#### now read and save lss-fom and ngal

In [17]:
# clean up the give_list dataframe
for key in df_in_order.keys():
    if key.__contains__('Y'):
        df_in_order = df_in_order.drop(key, axis=1)
        
df_in_order

Unnamed: 0,name
0,agnddf_v1.5_10yrs
1,alt_dust_v1.5_10yrs
2,alt_roll_mod2_dust_sdf_0.20_v1.5_10yrs
3,baseline_2snaps_v1.5_10yrs
4,baseline_v1.5_10yrs
...,...
80,wfd_depth_scale0.90_v1.5_10yrs
81,wfd_depth_scale0.95_noddf_v1.5_10yrs
82,wfd_depth_scale0.95_v1.5_10yrs
83,wfd_depth_scale0.99_noddf_v1.5_10yrs


#### read in the lss fom values

In [18]:
data_dir_base = '/global/cscratch1/sd/awan/lsst_output/post_wp_output_v1.5_-0.1cuts/lss_fom/'

In [19]:
data_dir = '%s/os_bias_overplots/' % data_dir_base
[f for f in os.listdir(data_dir) if f.__contains__('fomdata')] 

['2020-12-01_fomdata_Y10_i<25.2_snfom_egfootprint_85cadences_0.66<z<1.0_th-r<25.60_100<ell<300_logtcuts.csv',
 '2020-12-01_fomdata_Y1_i<23.95_snfom_egfootprint_85cadences_0.66<z<1.0_th-r<24.35_100<ell<300_logtcuts.csv']

In [20]:
fname = [f for f in os.listdir(data_dir) if f.__contains__('fomdata') if f.__contains__('_Y1_')][0]
print(fname)
data_file = '%s/%s' % (data_dir, fname)
lss_fom = pd.read_csv(data_file)

# find the indices to get the order
ind = []
for db in df_in_order['name']:
    ind_temp = np.where( lss_fom['dbname'].values == db.split('.db')[0] )[0]

    if len(ind_temp) == 0:
        print('%s not found in data dict' % db)
        #ind_temp = np.array([ 0 ])
        
    if ind_temp in ind:
        print('%s ind already in. %s' % (ind_temp, db))
    ind +=  list( ind_temp )
    
ind = np.array(ind)
print(ind)

df_in_order['Y1 lss fom'] = lss_fom['FoM'].values[ind]

2020-12-01_fomdata_Y1_i<23.95_snfom_egfootprint_85cadences_0.66<z<1.0_th-r<24.35_100<ell<300_logtcuts.csv
[56 61 60 27  2 28  7 71 43  1 81 31 32 24  3 67 76 68 22 38 82 48 47 10
 54 84 11 70 62 16 41 63  8 18 45 30 12 55 17 72 34 25 33 40 69 50 74 37
 53 52 58 64 79 29 75 42 23 19 35  9 49  0 57 78 14  6 77 73 83 66 15 65
 21 51  5 26 20 46 80 36 44 39 13  4 59]


In [21]:
fname = [f for f in os.listdir(data_dir) if f.__contains__('fomdata') if f.__contains__('_Y10_')][0]
print(fname)
data_file = '%s/%s' % (data_dir, fname)
lss_fom = pd.read_csv(data_file)

# find the indices to get the order
ind = []
for db in df_in_order['name']:
    ind_temp = np.where( lss_fom['dbname'].values == db.split('.db')[0] )[0]

    if len(ind_temp) == 0:
        print('%s not found in data dict' % db)
        #ind_temp = np.array([ 0 ])
        
    if ind_temp in ind:
        print('%s ind already in. %s' % (ind_temp, db))
    ind +=  list( ind_temp )
    
ind = np.array(ind)
print(ind)

df_in_order['Y10 lss fom'] = lss_fom['FoM'].values[ind]

2020-12-01_fomdata_Y10_i<25.2_snfom_egfootprint_85cadences_0.66<z<1.0_th-r<25.60_100<ell<300_logtcuts.csv
[56 61 60 27  2 28  7 71 43  1 81 31 32 24  3 67 76 68 22 38 82 48 47 10
 54 84 11 70 62 16 41 63  8 18 45 30 12 55 17 72 34 25 33 40 69 50 74 37
 53 52 58 64 79 29 75 42 23 19 35  9 49  0 57 78 14  6 77 73 83 66 15 65
 21 51  5 26 20 46 80 36 44 39 13  4 59]


In [22]:
df_in_order


Unnamed: 0,name,Y1 lss fom,Y10 lss fom
0,agnddf_v1.5_10yrs,0.943630,0.980773
1,alt_dust_v1.5_10yrs,0.966895,1.045453
2,alt_roll_mod2_dust_sdf_0.20_v1.5_10yrs,0.968856,1.031297
3,baseline_2snaps_v1.5_10yrs,0.917741,0.975307
4,baseline_v1.5_10yrs,0.937553,0.980542
...,...,...,...
80,wfd_depth_scale0.90_v1.5_10yrs,0.936639,0.980681
81,wfd_depth_scale0.95_noddf_v1.5_10yrs,0.942152,0.982210
82,wfd_depth_scale0.95_v1.5_10yrs,0.937664,0.980511
83,wfd_depth_scale0.99_noddf_v1.5_10yrs,0.940929,0.982148


#### now read in the ngal values

In [23]:
ngal_data_dir = data_dir_base

ngal_data = {}
for yr in [1, 10]:
    ngal_data[yr] =  np.zeros(len(df_in_order.name))
    folders = [f for f in os.listdir( ngal_data_dir ) if f.__contains__('_Y%s_' % yr) and f.__contains__('i<') ]
    for i, folder in enumerate( folders ):
        out = pd.read_csv('%s/%s/ngal-final_total/ngal_NoDither.txt' % (ngal_data_dir, folder), header=None, delimiter=' ') 
        dbname, ngal = out[0].values[0], out[1].values[0]
        ind = np.where( df_in_order['name'].values == '%s' % dbname)[0]
        ngal_data[yr][ind] = ngal
    print('read in %s folders for Y%s' % (i+1, yr) )

read in 85 folders for Y1
read in 85 folders for Y10


In [24]:
for yr in ngal_data:
    df_in_order['Y%s Ngal (0.66<z<1.0)' % yr] = ngal_data[yr]

In [25]:
df_in_order

Unnamed: 0,name,Y1 lss fom,Y10 lss fom,Y1 Ngal (0.66<z<1.0),Y10 Ngal (0.66<z<1.0)
0,agnddf_v1.5_10yrs,0.943630,0.980773,449000000.0,9.560000e+08
1,alt_dust_v1.5_10yrs,0.966895,1.045453,497000000.0,1.090000e+09
2,alt_roll_mod2_dust_sdf_0.20_v1.5_10yrs,0.968856,1.031297,498000000.0,1.050000e+09
3,baseline_2snaps_v1.5_10yrs,0.917741,0.975307,438000000.0,9.440000e+08
4,baseline_v1.5_10yrs,0.937553,0.980542,447000000.0,9.540000e+08
...,...,...,...,...,...
80,wfd_depth_scale0.90_v1.5_10yrs,0.936639,0.980681,445000000.0,9.550000e+08
81,wfd_depth_scale0.95_noddf_v1.5_10yrs,0.942152,0.982210,448000000.0,9.590000e+08
82,wfd_depth_scale0.95_v1.5_10yrs,0.937664,0.980511,444000000.0,9.540000e+08
83,wfd_depth_scale0.99_noddf_v1.5_10yrs,0.940929,0.982148,449000000.0,9.600000e+08


In [26]:
fname = 'lss_metrics.csv'
df_in_order.to_csv('%s/%s' % (outdir, fname), index=False)