In [1]:
from pathlib import Path
from itertools import product

import numpy as np
import healpy as hp
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
import matplotlib.transforms as transforms

from tqdm import tqdm  # For progress bars

from handle_data import get_map_dtype, get_planck_obs_data, get_planck_noise_data

In [2]:
import logging

In [3]:
logger = logging.getLogger("handle_data")
logger.setLevel(logging.DEBUG)

In [4]:
DATA_ROOT = "/data/jim/CMB_Data/"
ASSETS_DIRECTORY = f"{DATA_ROOT}/Assets/Planck/"
PLANCK_NOISE_DIR = f"{DATA_ROOT}/Planck_Noise/"

DETECTORS = [30, 44, 70, 100, 143, 217, 353, 545, 857]
N_PLANCK_SIMS = 50

In [5]:
def get_lmax_for_nside(nside):
    """Helper function: Max ell for a given nside; to be considered a parameter"""
    return 3 * nside - 1

# Setup

# Planck Sims

In [None]:
combos = product(DETECTORS, range(N_PLANCK_SIMS))

for det, sim_num in combos:
    src_map_fn = get_planck_noise_data(detector=det, 
                                       assets_directory=ASSETS_DIRECTORY, 
                                       realization=sim_num, 
                                       progress=True)
print("All maps acquired!")

In [13]:
def get_ps_data(detector):
    if detector in [30, 44, 70]:
        nside = 1024
    else:
        nside = 2048
    lmax = get_lmax_for_nside(nside)  # Defined above as 3*Nside-1
    # Getting power spectra for 100 maps at 100 GHz takes ~50 minutes
    src_cls = []
    for i in tqdm(range(N_PLANCK_SIMS)):
        src_map_fn = get_planck_noise_data(detector=detector, assets_directory=ASSETS_DIRECTORY, realization=i, progress=True)
        t_src_map = hp.read_map(src_map_fn) * 1e6
        src_cls.append(hp.anafast(t_src_map, lmax=lmax))

    # Determine parameters for approximating the distribution

    # Use log scaling for the power spectra; otherwise it's dominated by low ells
    log_src_cls = np.log10(src_cls)

    # We want to find the components that explain the majority of the variance
    #   We don't have enough maps to fully determine the distribution, but a full
    #   covariance matrix is overkill anyways. PCA gives a good, concise summary.
    pca = PCA().fit(log_src_cls)

    # We need the mean, the components (eigenvectors), and the variance (eigenvalues)
    #   These are surrogates for the full covariance matrix
    mean_ps = pca.mean_
    components = pca.components_  
    variance = pca.explained_variance_

    # Save the results; delete the variables so we know we test loading them
    np.savez(f"noise_pca_{detector}GHz.npz", mean=mean_ps, components=components, variance=variance)

100%|██████████| 50/50 [23:19<00:00, 27.99s/it]


In [14]:
for det in DETECTORS:
    get_ps_data(det)