In [None]:
import numpy as np
from scipy.stats import binned_statistic
from tqdm import tqdm, trange
import seaborn
import matplotlib.pyplot as plt
import os
import sys


# box = sys.argv[1]
box = 'Box_n50_0_1400'
curr_run_fname = '/oak/stanford/orgs/kipac/aemulus/aemulus_nu/%s/'%(box)
rockstar_dir = curr_run_fname+'output/rockstar/'

f = open(rockstar_dir+'savelist.txt', 'r')
savelist = f.read().split()
f.close()

N_snapshots = len(savelist)


import pickle

In [None]:
NvM_fname = '/oak/stanford/orgs/kipac/users/delon/aemulusnu_massfunction/'+curr_run_fname.split('/')[-2]+'_NvsM.pkl'
NvM_f = open(NvM_fname, 'rb')
NvMs = pickle.load(NvM_f)
NvM_f.close()


In [None]:
Ns = {a: NvMs[a]['N'] for a in NvMs}
bin_idxs = {a: NvMs[a]['bin_idx'] for a in NvMs}


In [None]:
import random

In [None]:
tot_n_bins = sum([len(Ns[a]) for a in Ns])

In [None]:
displacement = {}
for a in Ns:
    curr_idx = sum([len(Ns[ap]) for ap in displacement])
    displacement[a] = curr_idx

In [None]:
total_data = [N_bin for N_bin in Ns[a] for a in Ns]
total_data = np.array(total_data)

total_bin_idx = [bin_idx+displacement[a]-1 for bin_idx in bin_idxs[a] for a in bin_idxs if bin_idx != 0]
total_bin_idx = np.array(total_bin_idx)

shuffled = total_bin_idx

np.random.shuffle(shuffled)
N_SUBS = 2**14
subsamples = []
sample_size = len(shuffled) // N_SUBS  # Number of points in each subsample

for i in trange(N_SUBS):
    start_idx = i * sample_size
    end_idx = start_idx + sample_size
    if i == N_SUBS - 1:
        end_idx = len(shuffled)  # For the last subsample, adjust end index to include remaining points
    subsample = shuffled[start_idx:end_idx]
    subsamples.append(subsample)

In [None]:
print(len(total_bin_idx))
print(sum(total_data))

In [None]:
import numpy as np
from multiprocessing import Pool
from tqdm import tqdm

# Define a function for calculating jackknife estimator for a subsample
def calc_jackknife_estimator(subsample):
    c_data = np.zeros_like(total_data)
    for curr_bin in subsample:
        c_data[curr_bin] += 1
    jack_subsample = np.outer(total_data-c_data, total_data-c_data)
    return jack_subsample

jackcov = np.zeros((tot_n_bins, tot_n_bins)) # Initialize jackknife estimator

# Define a function for parallel processing using multiprocessing
def process_subsample(subsample):
    return calc_jackknife_estimator(subsample)

# Create a Pool of worker processes for multiprocessing
with Pool() as pool:
    # Map the function to each subsample in parallel and accumulate the results
    results = list(tqdm(pool.imap_unordered(process_subsample, subsamples), total=len(subsamples)))
    jackcov = np.sum(results, axis=0)


In [None]:
# Step 4: Jackknife Estimator
jackcov /= N_SUBS # Average outer products

# Step 5: Bias Correction
jackcov_bias = np.outer(total_data, total_data) # Calculate outer product of original histogram counts
bias = (N_SUBS-1)/N_SUBS * (jackcov - jackcov_bias) # Calculate bias

# Step 6: Unbiased Estimator
jackcov_unbiased = jackcov - bias # Subtract bias from jackknife estimator

print("Unbiased Estimator of Covariance Matrix between Mass Histogram Bins:")
print(jackcov_unbiased.shape)


jackknife_covs_fname = '/oak/stanford/orgs/kipac/users/delon/aemulusnu_massfunction/'+curr_run_fname.split('/')[-2]+'_jackknife_covs.pkl'
jackknife_covs_f = open(jackknife_covs_fname, 'wb')
pickle.dump(jackcov, jackknife_covs_f)
jackknife_covs_f.close()

In [None]:
np.diagonal(jackcov)

In [None]:
# jackknife_covs_fname = '/oak/stanford/orgs/kipac/users/delon/aemulusnu_massfunction/'+curr_run_fname.split('/')[-2]+'_jackknife_covs.pkl'
# jackknife_covs_f = open(jackknife_covs_fname, 'wb')
# pickle.dump(jackknife, jackknife_covs_f)
# jackknife_covs_f.close()