In [29]:
import matplotlib
import numpy as np
import os
import sys
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import colors
from hydra import initialize, compose
import pickle
from glob import glob
from pprint import pprint
from tqdm import tqdm
import datetime

# set paths to various directories based on the machine this code is being executed on
try:
    with initialize(version_base=None, config_path='config'):
        config = compose(config_name='config.yaml')  # overrides=['machine=uzay']
except:
    with initialize(version_base=None, config_path='../../config'):
        config = compose(config_name='config.yaml')  # overrides=['machine=uzay']

array_dir, data_dir, figure_dir, pickle_dir, repo_dir = config.machine.array_dir, config.machine.data_dir, config.machine.figure_dir, config.machine.pickle_dir, config.machine.repo_dir

# enable use of local modules
if repo_dir not in sys.path:
    sys.path.append(repo_dir)

# set matplotlib style
plt.style.use(f'{repo_dir}/mejiro/mplstyle/science.mplstyle')

from mejiro.utils import util

survey_params = util.hydra_to_dict(config.survey)

In [30]:
pipeline_params = util.hydra_to_dict(config.pipeline)
debugging = pipeline_params['debugging']
# debugging = True

if debugging:
    pipeline_dir = f'{config.machine.pipeline_dir}_dev'
else:
    pipeline_dir = config.machine.pipeline_dir

print(f'pipeline_dir: {pipeline_dir}')

pipeline_dir: /data/bwedig/mejiro/pipeline


# Detectable and candidate lens counts

Number of detectable strong lenses per square degree of simulated area, candidate strong lenses

In [31]:
# combine CSVs from each run into one CSV
data_dir = os.path.join(pipeline_dir, '00')

detectable_csv = os.path.join(data_dir, 'detectable_pop.csv')
util.delete_if_exists(detectable_csv)
detectable_df = util.combine_all_csvs(data_dir, 'detectable', detectable_csv)

if survey_params['total_population']:
    total_csv = os.path.join(data_dir, 'total_pop.csv')
    util.delete_if_exists(total_csv)
    total_df = util.combine_all_csvs(data_dir, 'total', total_csv)

Wrote combined CSV to /data/bwedig/mejiro/pipeline/00/detectable_pop.csv
Wrote combined CSV to /data/bwedig/mejiro/pipeline/00/total_pop.csv


In [32]:
# rows_with_inf_snr = detectable_df[np.isinf(detectable_df['snr'])]
# print(rows_with_inf_snr)

In [33]:
# Calculate the number of rows before filtering
rows_before_detectable = len(detectable_df)

# Remove rows with 'snr' as np.inf
detectable_df = detectable_df[~np.isinf(detectable_df['snr'])]
rows_removed_detectable = rows_before_detectable - len(detectable_df)
print(f'Rows removed from detectable_df: {rows_removed_detectable}')

Rows removed from detectable_df: 50


In [34]:
survey_area = survey_params['area']
runs = survey_params['runs']

total_area = survey_area * runs
print(f'{runs} run(s) of {survey_area} sq. deg. each gives {total_area:.2f} sq. deg. total')

342 run(s) of 0.5 sq. deg. each gives 171.00 sq. deg. total


In [35]:
print(f'{len(detectable_df)} detectable strong lenses')

if survey_params['total_population']:
    print(f'{len(total_df)} total candidate strong lenses')
    fraction_detectable = len(detectable_df) / len(total_df)
    print(f'Fraction of candidate strong lenses that are detectable: {fraction_detectable:.5f}')

16214 detectable strong lenses
1952101 total candidate strong lenses
Fraction of candidate strong lenses that are detectable: 0.00831


In [36]:
det_per_sq_deg = len(detectable_df) / total_area
print(f'Detectable strong lenses per sq. deg.: {det_per_sq_deg:.2f}')

if survey_params['total_population']:
    total_per_sq_deg = len(total_df) / total_area
    print(f'Candidate strong lenses per sq. deg.: {round(total_per_sq_deg)}')

Detectable strong lenses per sq. deg.: 94.82
Candidate strong lenses per sq. deg.: 11416


In [37]:
det_per_exposure = det_per_sq_deg * 0.281
print(f'Detectable strong lenses per exposure: {det_per_exposure:.2f}')

Detectable strong lenses per exposure: 26.64


In [38]:
snr_threshold = 200
high_snr_df = detectable_df[detectable_df['snr'] > snr_threshold]
high_snr_det_per_sq_deg = len(high_snr_df) / total_area
high_snr_det_per_exposure = high_snr_det_per_sq_deg * 0.281

print(f'{len(high_snr_df)} characterizable strong lenses (SNR > {snr_threshold})')
print(f'{high_snr_det_per_sq_deg:.2f} characterizable per sq. deg.')
print(f'Characterizable strong lenses per exposure: {high_snr_det_per_exposure:.2f}')

48 characterizable strong lenses (SNR > 200)
0.28 characterizable per sq. deg.
Characterizable strong lenses per exposure: 0.08


In [39]:
# HLWAS
print(f'{round(det_per_sq_deg * 1700)} detectable strong lenses in HLWAS')
print(f'{round(high_snr_det_per_sq_deg * 1700)} characterizable strong lenses in HLWAS')

161192 detectable strong lenses in HLWAS
477 characterizable strong lenses in HLWAS


# How much work are the various filters doing?

`filter_1` is Einstein radius and half-light radius, and `filter_2` is SNR. The other detectability criteria are combined into the difference between candidate and detectable.

In [40]:
# if survey_params['total_population']:
#     filters = util.unpickle_all(data_dir, 'filtered_sample_')

#     num_filter_1 = np.sum([d['num_filter_1'] for d in filters])
#     num_filter_2 = np.sum([d['num_filter_2'] for d in filters])

#     percent_filter_1 = num_filter_1 / len(total_df) * 100
#     percent_filter_2 = num_filter_2 / len(total_df) * 100

#     print(f'{num_filter_1} ({percent_filter_1:.2f}%) candidate strong lenses caught in filter 1')
#     print(f'{num_filter_2} ({percent_filter_2:.2f}%) candidate strong lenses caught in filter 2')

# Computation time

## Overall

In [41]:
json_path = os.path.join(pipeline_dir, 'execution_times.json')

import json

with open(json_path, 'r') as f:
    execution_times = json.load(f)

pprint(execution_times)

{'00': '3 days, 6:53:20',
 '01': '0:00:57',
 '02': '0:21:26',
 '03': '19:21:21',
 '04': '0:32:39',
 '05': '0:01:37'}


In [42]:
total_time = 0

for script_name, times in execution_times.items():
    h, m, s = times.split(':')
    if 'days' in h:
        d, h = h.split('days, ')
        h = int(d) * 24 + int(h)
    elif 'day' in h:
        d, h = h.split('day, ')
        h = int(d) * 24 + int(h)
    time = (int(h) * 3600) + (int(m) * 60) + int(s)
    total_time += time

print(
    f'Total pipeline execution time: {total_time} seconds or {datetime.timedelta(seconds=total_time)} ({total_time / 3600:.2f} hours)')

Total pipeline execution time: 357080 seconds or 4 days, 3:11:20 (99.19 hours)


In [43]:
percentage_dict = {}

for script_name, times in execution_times.items():
    h, m, s = times.split(':')
    if 'days' in h:
        d, h = h.split('days, ')
        h = int(d) * 24 + int(h)
    elif 'day' in h:
        d, h = h.split('day, ')
        h = int(d) * 24 + int(h)
    time = (int(h) * 3600) + (int(m) * 60) + int(s)
    percentage = time / total_time * 100
    percentage_dict[script_name] = f'{percentage:.2f}%'

pprint(percentage_dict)

{'00': '79.53%',
 '01': '0.02%',
 '02': '0.36%',
 '03': '19.51%',
 '04': '0.55%',
 '05': '0.03%'}


## Survey simulation

In [44]:
h, m, s = execution_times['00'].split(':')
if 'days' in h:
    d, h = h.split('days, ')
    h = int(d) * 24 + int(h)
elif 'day' in h:
    d, h = h.split('day, ')
    h = int(d) * 24 + int(h)
survey_sim_seconds = (int(h) * 3600) + (int(m) * 60) + int(s)

print(
    f'Survey simulation time per square degree: {survey_sim_seconds / total_area:.2f} seconds or {datetime.timedelta(seconds=round(survey_sim_seconds / total_area))} ({survey_sim_seconds / total_area / 60:.2f} minutes)')

Survey simulation time per square degree: 1660.82 seconds or 0:27:41 (27.68 minutes)


## Image simulation

In [45]:
total_image_sim = 0
image_sim_scripts = ['01', '02', '03', '04']

for script_name in image_sim_scripts:
    h, m, s = execution_times[script_name].split(':')
    if 'day' in h:
        d, h = h.split('day, ')
        h = int(d) * 24 + int(h)
    time = (int(h) * 3600) + (int(m) * 60) + int(s)
    total_image_sim += time

print(f'Image simulation time per image: {total_image_sim / len(detectable_df):.2f} seconds')

Image simulation time per image: 4.50 seconds


In [46]:
image_sim_percentage_dict = {}

for script_name in image_sim_scripts:
    h, m, s = execution_times[script_name].split(':')
    if 'day' in h:
        d, h = h.split('day, ')
        h = int(d) * 24 + int(h)
    time = (int(h) * 3600) + (int(m) * 60) + int(s)
    percentage = time / total_image_sim * 100
    image_sim_percentage_dict[script_name] = f'{percentage:.2f}%'

pprint(image_sim_percentage_dict)

{'01': '0.08%', '02': '1.76%', '03': '95.48%', '04': '2.68%'}


# Subhalo statistics

In [47]:
# stats_list = util.unpickle_all(os.path.join(config.machine.dir_02, 'stats'), 'subhalo_stats_')

In [48]:
# original_einstein_radii = [d['original_einstein_radius'] for d in stats_list]
# adjusted_einstein_radii = [d['adjusted_einstein_radius'] for d in stats_list]
# percent_change_einstein_radii = [d['percent_change_einstein_radius'] for d in stats_list]
# effective_lensing_masses = [d['effective_lensing_mass'] for d in stats_list]
# adjusted_lensing_masses = [d['adjusted_lensing_mass'] for d in stats_list]
# percent_change_lensing_masses = [d['percent_change_lensing_mass'] for d in stats_list]
# total_masses_subhalos_within_einstein_radius = [d['total_mass_subhalos_within_einstein_radius'] for d in stats_list]
# total_subhalo_masses = [d['total_subhalo_mass'] for d in stats_list]
# percent_subhalo_masses_within_einstein_radius = [d['percent_subhalo_mass_within_einstein_radius'] for d in stats_list]

In [49]:
# f, ax = plt.subplots(1, 3, figsize=(12, 3))
# ax[0].hist(percent_change_einstein_radii)
# ax[0].set_xlabel('Percent Change in Einstein Radius')
# ax[0].set_ylabel('Number of Lenses')
# ax[1].hist(percent_change_lensing_masses)
# ax[1].set_xlabel('Percent Change in Lensing Mass')
# ax[1].set_ylabel('Number of Lenses')
# ax[2].hist(percent_subhalo_masses_within_einstein_radius)
# ax[2].set_xlabel('Percent of Subhalo Mass within Einstein Radius')
# ax[2].set_ylabel('Number of Lenses')
# plt.tight_layout()
# plt.show()

In [50]:
# mean = np.mean(percent_change_einstein_radii)
# stdev = np.std(percent_change_einstein_radii)

# print(f'Mean percent change in Einstein radius: {mean:.2f}')
# print(f'Standard deviation of percent change in Einstein radius: {stdev:.2f}')