In [1]:
import os
import shutil
import logging
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

import numpy as np
import bilby

import sifce.parser as sparse

In [2]:
arguments, parser = sparse.parse(config_file="test_config.cfg")

In [3]:
simset = sparse.construct_simulationset_from_parsed_arguments(arguments)
simset.distribution['luminosity_distance'] = bilby.core.prior.DeltaFunction(1)

In [4]:
n_int = 10000
simset.make_clean_simulations_dataframe()
simset.sample_distribution(n_int)

In [5]:
for key in simset.simulations_dataframe.keys():
    if ("snr" not in key and key != "luminosity_distance") or key == "reference_snr_index":
        simset.simulations_dataframe[key] = simset.simulations_dataframe[key].astype("category")
    else:
        simset.simulations_dataframe[key] = simset.simulations_dataframe[key].astype(np.float32)
dummy_df = simset.simulations_dataframe.drop(simset.simulations_dataframe.index)

In [6]:
scatter_dfs = []
for idx, row in tqdm(simset.simulations_dataframe.iterrows()):
    scatter_dfs += [simset.calc_snrs_sky(row, 5000, average=True, dummy_df=dummy_df)]

10000it [04:06, 40.60it/s]


In [7]:
# for key in scatter_dfs[0].keys():
#     category_list = []
#     for df in scatter_dfs:
#         series = df[key]
#         if series.dtype == 'category':
#             category_list += [series]
#     if category_list != []:
#         key_category = pd.api.types.union_categoricals(category_list)
#         for df in scatter_dfs:
#             df[key] = pd.Categorical(df[key], categories = key_category.categories)

In [8]:
simset.simulations_dataframe = pd.concat(scatter_dfs, ignore_index=True)
print(simset.simulations_dataframe)
print(simset.simulations_dataframe.memory_usage(deep=True))

     mass_1 mass_2   spin_1x   spin_1y   spin_1z   spin_2x   spin_2y  \
0      40.0   40.0 -0.400483  0.495246  0.230451 -0.160854  0.307652   
1      40.0   40.0  0.630849 -0.087811  0.165885 -0.034368  0.020622   
2      40.0   40.0 -0.320622  0.124728  0.046654 -0.291738  0.243959   
3      40.0   40.0 -0.016529  0.283064 -0.005669 -0.347821  0.225340   
4      40.0   40.0 -0.218271 -0.440790 -0.095947  0.155326  0.220148   
...     ...    ...       ...       ...       ...       ...       ...   
9995   40.0   40.0 -0.007520  0.151260  0.031651 -0.015540  0.078699   
9996   40.0   40.0  0.372832  0.134106 -0.010933 -0.416596 -0.178943   
9997   40.0   40.0 -0.045167 -0.220246  0.175573 -0.447083 -0.525640   
9998   40.0   40.0  0.026618  0.058266 -0.004380 -0.206619  0.779992   
9999   40.0   40.0 -0.441281 -0.473925 -0.086226  0.304739  0.276564   

       spin_2z  theta_jn     phase  ...  cal_idx    opt_snr_H1     mf_snr_H1  \
0     0.642910  0.325499  5.180014  ...      NaN  15284

In [9]:
# num_samples = np.linspace(50, 5000, 200)

# means = np.empty((n_int, num_samples.shape[0]))
              
# for j in range(n_int):
#     for i, num in enumerate(num_samples):
#         intrinsic_separated_df = simset.simulations_dataframe.loc[simset.simulations_dataframe['intrinsic_index'] == j]
#         opt_snrs_subset = intrinsic_separated_df.head(int(num))['opt_snr_net']
#         opt_snr_mean = np.mean(opt_snrs_subset.values)
#         means[j, i] = opt_snr_mean

# deviations = np.empty((n_int, num_samples.shape[0]))
# for j in range(n_int):
#     deviations[j, :] = (means[j, :] - means[j, -1]) / means[j, :]

In [10]:
# plt.figure(figsize=(16, 8))
# for j in range(n_int):
#     plt.plot(num_samples, deviations[j], label=f"Intrinsic Label {j}")
# plt.ylim(-0.1, 0.1)
# plt.legend()
# plt.title("Fractional Deviation of SNR Mean for Subset vs Full Set")
# plt.xlabel("Number Samples in Subset")
# plt.ylabel("Fractional Deviation")
# plt.show()


In [11]:


distance_scale_dfs = []
for idx, row in tqdm(simset.simulations_dataframe.iterrows()):
    distance_scale_dfs += [simset.sample_distances(row, dummy_df=dummy_df)]
    break

0it [00:00, ?it/s]


In [12]:
simset.simulations_dataframe = pd.concat(distance_scale_dfs, ignore_index=True)

In [13]:
simset.simulations_dataframe.memory_usage(deep=True)

Index                      128
mass_1                    1116
mass_2                    1116
spin_1x                 346232
spin_1y                 346232
spin_1z                 346232
spin_2x                 346232
spin_2y                 346232
spin_2z                 346232
theta_jn                346232
phase                   346232
ra                      346232
dec                     346232
psi                     346232
geocent_time            346232
luminosity_distance       8000
redshift                  1108
chirp_mass                1108
total_mass                1108
mass_ratio                1108
symmetric_mass_ratio      1108
mass_1_source             1108
mass_2_source             1108
chirp_mass_source         1108
total_mass_source         1108
a_1                     346232
a_2                     346232
tilt_1                  346232
tilt_2                  346232
phi_12                  346232
phi_jl                  346232
reference_frequency       1116
intrinsi