### **2D Power Spectrum with Model Fits:**

This notebook features an example calculation of the 2D power spectrum using the *baseDC2_snapshot_z***_v0.1* and one and two parameter model fits.

#### **Import Files and Setup Configuration:**

Initializes cosmology parameters from the loaded catalog which will be used extensively throughout the notebook. Computes $\sigma_\chi$ defined as $\sigma_\chi = \frac{c}{H(z)}\sigma_z,$ and $\sigma_z \approx 0.05(1+z_{redshift})$.

In [None]:
import GCRCatalogs
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import pyccl
import sys

from scipy.constants import speed_of_light
from scipy.stats import chi2 as chi2func

sns.set(style='ticks')

%config IPCompleter.greedy = True
%config InlineBackend.figure_format = 'retina'

# Load Appropriate Catalog
# cat = GCRCatalogs.load_catalog("baseDC2_snapshot_z0.15_v0.1_small")
# cat = GCRCatalogs.load_catalog("baseDC2_snapshot_z0.15_v0.1")
cat = GCRCatalogs.load_catalog("baseDC2_snapshot_z1.01_v0.1")

# Global constants to be used in various computations
pi = np.pi
COSMO = cat.cosmology
Z_RED_SHIFT = cat.redshift
COSMO_CCL = pyccl.Cosmology(h=COSMO.h, sigma8=COSMO.sigma8,
                            Omega_g=COSMO.Ogamma0, Omega_k=COSMO.Ok0,
                            Omega_c=COSMO.Odm0, Omega_b=COSMO.Ob0,
                            n_s=COSMO.n_s, Neff=COSMO.Neff)

# Compute sigma chi
SIGMA_Z = 0.05*(1+Z_RED_SHIFT)
H_Z = COSMO.H(Z_RED_SHIFT).value

SIGMA_CHI = (speed_of_light/10**3)/H_Z*SIGMA_Z

#### **Load Position Data, Apply Preliminary Cuts, and Filter with Gaussian Kernel:**

Load the data appropriately depending on user-defined cuts. Additionally, removes all galaxies with $r_{mag} > 24.5$. Apply a Gaussian kernel to the z position data centered at $\bar{z}$ with standard deviation $\sigma_\chi$, hence selecting galaxies acccording to $e^{-\frac{(z-\bar{z})^2}{2\sigma_\chi^2}}$.

**Small Catalog Limits\*:**

| X | Y | Z |
| --- | --- | --- |
| [0, 354.7] | [0, 425.0] | [0, 846.5] |

**Full Catalog Limits:**

| X | Y | Z |
| --- | --- | --- |
| [0, 4231.7] | [0, 4232.25] | [0, 4231.6] |

\* The small catalog has 40 position entries with X, Y, or Z around 4000 Mpc, but after removing these entries, the above boundaries are identified.

In [None]:
def load_galaxy_positions(cat, x_bounds, y_bounds, z_bounds, mag_cut):
    """
    Loads and applies preliminary filters to galaxy position data in Mpc
    from a specified catalog

    Parameters:
    cat (GCRCatalog): catalog to load data from
    x_bounds (float tuple): 2 element tuple with (x_min, x_max)
    y_bounds (float tuple): 2 element tuple with (y_min, y_max)
    z_bounds (float tuple): 2 element tuple with (z_min, z_max)
    mag_cut (float): apparent magnitude cut value (max value)

    Returns:
    x_data (float array): np array containing x positions of galaxies in Mpc
    y_data (float array): np array containing y positions of galaxies in Mpc
    z_ data (float array): np array containing z positions of galaxies in Mpc
    """

    # Setup box bounds in Mpc
    min_x, max_x = x_bounds
    min_y, max_y = y_bounds
    min_z, max_z = z_bounds
    
    print("Loading Catalog Data: ")

    cat_vals = cat.get_quantities(["position_x", "position_y", "position_z",
                                   "Mag_true_r_lsst_z0"],
                                  filters=["position_z > {}".format(min_z),
                                           "position_z < {}".format(max_z),
                                           "position_x < {}".format(max_x),
                                           "position_x > {}".format(min_x),
                                           "position_y < {}".format(max_y),
                                           "position_y > {}".format(min_y)])
    
    print("Applying Apparent Magnitude Cut: ")

    # Convert absolute to apparent magnitude
    r_Mag = cat_vals["Mag_true_r_lsst_z0"]
    r_mag = r_Mag+cat.cosmology.distmod(Z_RED_SHIFT).value

    # Remove all entries below mag_cut
    filtered_indices = np.where(r_mag < mag_cut)[0]

    x_data = cat_vals["position_x"][filtered_indices]
    y_data = cat_vals["position_y"][filtered_indices]
    z_data = cat_vals["position_z"][filtered_indices]

    return x_data, y_data, z_data


def mask_galaxy_positions(x_data, y_data, z_data):
    """
    Applies a gaussian mask to select galaxies based on their z position

    Parameters:
    x_data (float array): np array containing x positions of galaxies in Mpc
    y_data (float array): np array containing y positions of galaxies in Mpc
    z_ data (float array): np array containing z positions of galaxies in Mpc

    Returns:
    x_data (float array): np array containing x positions of masked galaxies
    in Mpc
    y_data (float array): np array containing y positions of masked galaxies 
    in Mpc
    z_ data (float array): np array containing z positions of masked galaxies 
    in Mpc
    """

    # Apply Gaussian kernel
    z_bar = np.mean(z_data)
    cutoffs = np.random.uniform(0, 1, len(z_data))

    mask_ind = np.where(cutoffs <
                        np.exp(-(z_data-z_bar)**2/(2*SIGMA_CHI**2)))[0]

    x_masked = x_data[mask_ind]
    y_masked = y_data[mask_ind]
    z_masked = z_data[mask_ind]

    return x_masked, y_masked, z_masked

# Load position data
x_bounds = (0, 4232)
y_bounds = (0, 4232)
z_bounds = (1116, 3116)
mag_cut = 24.5

x_data, y_data, z_data = load_galaxy_positions(cat, x_bounds, y_bounds,
                                               z_bounds, mag_cut)

# Apply Gaussian mask
x_masked, y_masked, z_masked = mask_galaxy_positions(x_data, y_data, z_data)

# Compute N_BAR (average number of galaxies per Mpc^2)
x_width = np.max(x_masked)-np.min(x_masked)
y_width = np.max(y_masked)-np.min(y_masked)
N_BAR = len(x_masked)/(x_width*y_width)

#### **Plot Loaded and Masked Position Data:**

Plot position data with an appropriatly normalized overlayed Gaussian, $e^{-\frac{(z-\bar{z})^2}{2\sigma_\chi^2}}$, indicating galaxy selection.

In [None]:
# Plot masked and unmasked histograms
fig = plt.figure(figsize=(10, 6.6))
n_bins = 75

plt.subplot(2, 3, 1)
plt.hist(x_data, bins=n_bins, alpha=0.5, color="navy")
plt.title("X Distribution", fontweight="bold")
plt.xlabel("X Coordinate [Mpc]")
plt.ylabel("Entries ")

plt.subplot(2, 3, 2)
plt.hist(y_data, bins=n_bins, alpha=0.5)
plt.title("Y Distribution", fontweight="bold")
plt.xlabel("Y Coordinate [Mpc]")
plt.ylabel("Entries ")

plt.subplot(2, 3, 3)
plt.hist(z_data, bins=n_bins, alpha=0.5, color="silver")
plt.title("Z Distribution", fontweight="bold")
plt.xlabel("Z Coordinate [Mpc]")
plt.ylabel("Entries ")

plt.subplot(2, 3, 4)
plt.hist(x_masked, bins=n_bins, alpha=0.5, color="navy")
plt.title("X Distribution (Mask)", fontweight="bold")
plt.xlabel("X Coordinate [Mpc]")
plt.ylabel("Entries ")

plt.subplot(2, 3, 5)
plt.hist(y_masked, bins=n_bins, alpha=0.5)
plt.title("Y Distribution (Mask)", fontweight="bold")
plt.xlabel("Y Coordinate [Mpc]")
plt.ylabel("Entries ")

plt.subplot(2, 3, 6)
plt.hist(z_masked, bins=n_bins, alpha=0.5, color="silver")
plt.title("Z Distribution (Mask)", fontweight="bold")
plt.xlabel("Z Coordinate [Mpc]")
plt.ylabel("Entries ")

sigma_chi_str = r"$\sigma_\chi$ = {}".format(np.round(SIGMA_CHI, 1))

# Overlay Gaussian
min_z, max_z = z_bounds

z_vals = np.linspace(min_z, max_z, 200)
z_bar = np.mean(z_data)

exp_z = 1/(np.sqrt(2*pi)*SIGMA_CHI)*np.exp(-(z_vals-z_bar)**2/(2*SIGMA_CHI**2))

bin_width = (max_z - min_z)/n_bins

std_offset = (max_z-min_z)/(2*SIGMA_CHI)
rel_area = scipy.stats.norm.cdf(std_offset)-scipy.stats.norm.cdf(-std_offset)
exp_z *= (bin_width*len(z_masked))/rel_area

plt.plot(z_vals, exp_z, '--', color='k')

plt.tight_layout()
plt.savefig("gaussian_kernel_position.png", dpi=300)

#### **Get Matter Power-Spectrum From Data:**

Code cell below defines a method for calculating the 2D power spectrum for x and y data with a specified resolution size *N* by computing the over-density field, 2-point correlation function, and then applying a 2D FFT. Additionally, a method is defined for averaging power spectrum values in bins of wavenumbers.

In [None]:
def calculate_pow_spec(x_data, y_data,  N):
    """ 
    Calculates the 2D power spectrum from data with N*N grid. Removes zero
    order Fourier mode.

    Parameters:
    x_data (np float array): Array containing x positions of galaxies in Mpc
    y_data (np float array): Array containing y positions of galaxies in Mpc
    N (int): Integer representing the grid size for the power spec calculation

    Returns:
    k_vals (np float array): array containing the wave numbers in Mpc^-1
    p_k (np float array): array containing the returned power spectrum values in
    Mpc^2
    """

    x_min = np.floor(np.min(x_data))
    x_max = np.ceil(np.max(x_data))
    x_width = x_max-x_min
    
    y_min = np.floor(np.min(y_data))
    y_max = np.ceil(np.max(y_data))
    y_width = y_max-y_min

    # Initialize Grid
    grid_matrix = np.histogram2d(x_data, y_data, N)[0]

    # Get the number of galaxies and set amount of data points per bin
    n_g = len(x_data)

    delta_x = x_width/N
    delta_y = y_width/N

    # Convert grid to represent over_density
    p_bar = n_g/N**2  # No. galaxies expected per bin
    p_bar_mat = p_bar*np.ones_like(grid_matrix)

    delta = 1/p_bar*(grid_matrix-p_bar_mat)

    # Get frequency values
    f_value = np.fft.fftfreq(N)
    kx_mat = np.outer(np.ones(N), 2.0*np.pi*f_value/delta_x)
    ky_mat = np.outer(2.0*np.pi*f_value/delta_y, np.ones(N))

    # Construct matrix of wave numbers
    k_mat = np.sqrt(kx_mat**2+(ky_mat)**2)

    # Perform fourier transform
    delta_k = np.fft.fft2(delta)

    # Calculate power spectrum
    pow_spec = np.real(delta_k*np.conj(delta_k))
    p_k = pow_spec.flatten()/(N**2*N**2/(x_width*y_width))

    # Construct and rescale wave number axis
    k_vals = k_mat.flatten()
    k_vals = k_vals[0:len(p_k)]

    # Remove zero order mode on return
    return k_vals[1:], p_k[1:]


def average_pow_spec(k_vals, p_vals, n_bins):
    """
    Averages power spectrum into n_bins based on k
    
    Parameters:
    k_vals (np float array): array containing the unaverged wave numbers in 
    Mpc^-1
    p_k (np float array): array containing the unaveraged power spectrum values in
    Mpc^2

    Returns:
    averaged_k (np float array): array containing the averaged wave numbers in
    Mpc^-1
    averaged_p (np float array): array containing the averged power spectrum values in
    Mpc^2
    n_modes (np int array): array containing the number of modes in each bin of averaged
    k/p

    """

    # Sort power spectrum by k
    k_p_k = (np.vstack((k_vals, p_vals))).T
    k_p_k = k_p_k[k_p_k[:, 0].argsort()]

    # Extract sorted values
    k_vals = k_p_k.T[0]
    p_k = k_p_k.T[1]

    n_modes, hist_bins = np.histogram(k_vals, bins=n_bins)
    averaged_k = hist_bins+hist_bins[1]/2  # Average wavenumber of each bin
    averaged_k = averaged_k[0:n_bins]  # Remove last bin which is upper limit
    averaged_p = np.zeros_like(averaged_k)

    # Remove bins with no modes
    averaged_k = averaged_k[np.where(n_modes != 0)]
    averaged_p = averaged_p[np.where(n_modes != 0)]
    n_modes = n_modes[np.where(n_modes != 0)]

    low_ind = 0

    # Fill each averaged power spectrum bin
    for ind, n_mode in enumerate(n_modes):
        averaged_p[ind] = np.mean(p_k[low_ind:low_ind+n_mode])
        low_ind += n_mode

    return averaged_k, averaged_p, n_modes


def progressBar(cur_val, final_val):
    """ 
    Simple function to keep track of progress during computations

    Parameters:
    cur_val (int/float): current iteration/value calculation is on
    final_val (int/float): final iteration/value that calculation will take
    """

    bar_length = 20
    percent = float(cur_val) / final_val
    arrow = '-' * int(round(percent * bar_length)-1) + '>'
    spaces = ' ' * (bar_length - len(arrow))

    sys.stdout.write("\rProgress: [{0}]"
                     " {1}%".format(arrow + spaces, int(round(percent * 100))))
    sys.stdout.flush()

#### **Compute and Average Power Spectrum Over a Range of N:**

Calculate the power spectrum for a specified gridsize $N^2$ and then average into n_bins according to wavenumbers.

In [None]:
# Array of dimension, N, to compute the power spectrum over
N = 8192

# Construct arrays to store total data from each grid size
k_bar_total, p_k_bar_total = calculate_pow_spec(x_masked, y_masked,  N)

# Re-average Data into N_avg data points
N_avg = 1024
k_bar_avg, p_k_bar_avg, n_modes_avg = average_pow_spec(k_bar_total, p_k_bar_total,
                                                   N_avg)

#### **Define Power Spectrum Models**

Code cell below contains definitions for computing the 2D power spectrum for one an two bias parameter models defined as follows:

1. Define the power spectrum $P(k_\parallel, k_\perp)$ using the linear/non-linear matter power spectrum function using CCL, where $k = \sqrt{k_\parallel^2+k_\perp^2}$
2. Integrate over a range of values for $k_\parallel$ using the integral defined below to compute a theoretical estimate for the 2D matter power spectrum using a one or two bias parameter model
    * **Single Parameter Model:**
$$P(k_\perp) = \frac{b_1^2}{2\pi}\int\limits_{0}^{\infty}P(k_\parallel, k_\perp)e^{-k_\parallel^2\sigma_\chi^2}dk_\parallel+\frac{1}{\overline{n}}$$

    * **Two Parameter Model:**
$$P(k_\perp) = \frac{1}{2\pi}\int\limits_{0}^{\infty}(b_1+b_1'k)^2P(k_\parallel, k_\perp)e^{-k_\parallel^2\sigma_\chi^2}dk_\parallel+\frac{1}{\overline{n}}$$


In [None]:
def get_pow_2d(k_perp, b, is_linear):
    """
    Computes the theoretical 2d galaxy power spectrum with one or two parameter
    linear/non-linear model depending on input parameters

    Parameters:
    k_perp (float array): array containing the wave numbers over which to
    compute the power spectrum
    b (float or list): float if single bias parameter, list [b1, b1'] if
    calculating with two parameter model
    is_linear (boolean): boolean representing whether or not to use the
    CCL linear MPS (alternative is non-linear MPS)

    Returns:
    p_theory (float array): array containing the theoretical power spectrum
    values associated with the inputed k_perp
    
    """

    def integrand_func(k_par, k_perp, b):

        k = np.sqrt(k_perp**2+k_par**2)

        if is_linear:
            P_k = pyccl.linear_matter_power(COSMO_CCL, k,
                                            COSMO.scale_factor(Z_RED_SHIFT))

        else:
            P_k = pyccl.nonlin_matter_power(COSMO_CCL, k,
                                            COSMO.scale_factor(Z_RED_SHIFT))

        if type(b) == np.ndarray and len(b)==2:
            return (b[0]+b[1]*k)**2*P_k*np.exp(-(k_par*SIGMA_CHI)**2)

        else:
            return b**2*P_k*np.exp(-(k_par*SIGMA_CHI)**2)             

    integrator = np.vectorize(lambda k_perp: 1/(2*np.pi)*scipy.integrate.quad(
        integrand_func, 0, 0.3, args=(k_perp, b))[0])

    p_theory = integrator(k_perp)+1/N_BAR

    return p_theory

#### **Fit Power Spectrum using MLE:**

Determines the optimal parameter/parameters to fit the above defined models by maximum likelihood estimation given the following likelihood definition
* $\log{(\mathcal{L})} = -(\frac{\chi^2}{2}+ \sum\limits_{i=1}^{nbins} \log{(\sigma_i}))$
* $ \chi^2= \sum \frac{(P_{theory}(k_\perp) - P_{data}(k_\perp))^2}{\sigma^2}$
* $\sigma = P_{theory}(k_\perp)\sqrt\frac{2}{Nmodes}$

Includes a function to fit upto various values of kmax to quantify the effectiveness of the model over larger intervals.

In [None]:
def neg_log_like(b, k_perp, p_k, n_modes, is_linear):
    """
    Returns the negative log likelihood for a given bias parameter associated with
    linear or non-linear power spectrum. Used for fitting model parameters

    Parameters:
    b (float or list): float if single bias parameter, list [b1, b1'] if
    calculating with two parameter model
    k_perp (float array): array containing the perpendicular wave numbers over which to
    compute the power spectrum
    p_k (float array): array containing power spectrum data associated with k_perp
    n_modes (int array): array containing number of Fourier modes in each k_perp bin
    is_linear (boolean): boolean representing whether or not to use the
    CCL linear MPS (alternative is non-linear MPS)

    Returns:
    neg_log_like (float): the negative of the log-likelihood of the data given the 
    model specified by input parameters
    
    """
    
    # Compute theoretical variance
    p_theory = get_pow_2d(k_perp, b, is_linear)
    
    var_t = 2*p_theory**2/n_modes
    
    chi_sq = np.sum((p_k-p_theory)**2/var_t)
    
    neg_log_like = chi_sq/2+np.sum(np.log(np.sqrt(var_t)))
    
    return neg_log_like


def calc_chi_sq(b, k, p_k, n_modes, linear):
    """    
    Returns the chi_sq values for a given bias parameter associated with linear 
    or non-linear power spectrum for each data point associated with k_perp

    Parameters:
    b (float or list): float if single bias parameter, list [b1, b1'] if
    calculating with two parameter model
    k_perp (float array): array containing the perpendicular wave numbers over which to
    compute the power spectrum
    p_k (float array): array containing power spectrum data associated with k_perp
    n_modes (int array): array containing number of Fourier modes in each k_perp bin
    is_linear (boolean): boolean representing whether or not to use the
    CCL linear MPS (alternative is non-linear MPS)

    Returns:
    chi_sq (float array): array containing the computed chi_2 of each data point for
    input parameter specified model

    """
    
    # Compute theoretical variance
    p_theory = get_pow_2d(k, b, is_linear)
    
    var_t = 2*p_theory**2/n_modes
    
    chi_sq = (p_k-p_theory)**2/var_t
    
    return chi_sq

def fit_to_kmax(k_max_arr, k_perp, p_k, n_modes, b_guess, is_linear):
    """
    A function to fit data with specified model over a wavenumber interval upto 
    a particular kmax and determine statistics of each fit (chi^2cdf and reduced 
    chi^2)

    Parameters:
    k_max_arr (flaot array): array containing the maximum k_perps over which to compete 
    the power spectrum on subsets of k_perp
    k_perp (float array): array containing the perpendicular wave numbers over which to
    compute the power spectrum
    p_k (float array): array containing power spectrum data associated with k_perp
    n_modes (int array): array containing number of Fourier modes in each k_perp bin
    b_guess (float or list): float if single bias parameter, list [b1, b1'] if
    calculating with two parameter model does not need to be particularly accurate,
    just used to specify how many parameters the model has
    is_linear (boolean): boolean representing whether or not to use the
    CCL linear MPS (alternative is non-linear MPS)

    Returns:
    b_arr (2d array): 2d array containing optimal parameters for each fit up to k_max, 
    e.g. for one parameter each entry is of the form [b1] and for two parameters each entry
    is of the form [b1, b2]
    chi_sq_cdf_arr (float array): returns the computed chi_sq cdf associated with each model 
    fit to k_max
    reduced_chi_sq_arr (float array): returns the reduced chi_sq associated with each model
    fit to k_max
    """    
    
    # Create empty arrays to fill with optimal parameters
    b_arr = np.empty_like(k_max_arr, dtype = np.ndarray)
    chi_sq_cdf_arr = np.zeros_like(k_max_arr)
    reduced_chi_sq_arr = np.zeros_like(k_max_arr)
    

    for ind, k_max in enumerate(k_max_arr):
        progressBar(ind, len(k_max_arr))

        # Pick appropriate subset of values less than k_max_arr
        subset_indices = np.where(k_perp < k_max)
        k_subset = k_perp[subset_indices]
        p_k_subset = p_k[subset_indices]
        n_modes_subset = n_modes[subset_indices]

        # Maxmize log likelihood
        res = scipy.optimize.minimize(neg_log_like, b_guess,
                                      method='Nelder-Mead',
                                      args=(k_subset, p_k_subset,
                                            n_modes_subset, is_linear))

        # Compute total chi^2
        chi_2 = np.sum(calc_chi_sq(res.x, k_subset, p_k_subset, n_modes_subset,
                                  is_linear))
        # Fill arrays
        b_arr[ind] = res.x
        chi_sq_cdf_arr[ind] = chi2func.cdf(chi_2, df=len(k_subset))
        reduced_chi_sq_arr[ind] = chi_2/len(k_subset)

        progressBar(ind+1, len(k_max_arr))

    return b_arr, chi_sq_cdf_arr, reduced_chi_sq_arr

#### **Fit One Parameter Models:**

Fits power spectrum data to previously defined one parameter model over a range of $k_{max}$ values.

In [None]:
# Define lowest and highest k_max value to fit up to
k_max_low = 0.05
k_max_high = 2

k_max_arr = np.linspace(k_max_low, k_max_high, 100)
    
b_guess = 1.7
is_linear = True

b_arr, cdf_arr, reduced_chi_sq = fit_to_kmax(k_max_arr, k_bar_avg, p_k_bar_avg,
                                           n_modes_avg, b_guess, is_linear)

#### **Plot One Parameter Model Fit Data:**

Make various plots about the best fit data. First set of plots shows the results of fits up to various levels of $k_max$ by plotting the model parameter, reduced $\chi^2$ and $\chi^2$ cdf values, as a function of $k_\perp^{max}$. Second plot looks at a particular model fit plotted over the data with an included residuals plot.

In [None]:
plt.figure(figsize=(16, 4.5))

# Model Parameter Plot
plt.subplot(1, 3, 1)
plt.plot(k_max_arr, b_arr, 'o', markersize=3, markeredgecolor="crimson",
         markerfacecolor="None", label=r"$b_1$", markeredgewidth=1)
plt.title(r"Optimal Parameter Values for Various $k_\perp^{max} $  ", fontweight="bold")
plt.xlabel("$k_\perp^{max} \ [Mpc^{-1}]$")
plt.ylabel("Model Parameter")
plt.legend(loc="upper left", fontsize=12)

# Reduced Chi_2 Plot
plt.subplot(1, 3, 2)
plt.plot(k_max_arr, reduced_chi_sq, 'o', markersize = 3, markerfacecolor = "None", 
         markeredgecolor = "k", markeredgewidth=1)
plt.title(r"Reduced $\chi^2$ Values for Various $k_\perp^{max}$ ", fontweight = "bold")
plt.xscale("log")
plt.yscale("log")
plt.xlabel("$k_\perp^{max} \ [Mpc^{-1}]$")
plt.ylabel("Reduced $\chi^2$")

# Chi_2 CDF Plot
plt.subplot(1, 3, 3)
plt.plot(k_max_arr, cdf_arr[0:], 'o', markersize = 3,
         markerfacecolor = "None", markeredgecolor = "k", markeredgewidth=1)
plt.title(r"$\chi^2$ CDF Values for Various $k_\perp^{max}$ ", fontweight = "bold")
plt.xscale("log")
plt.xlabel("$k_\perp^{max} \ [Mpc^{-1}]$")
plt.ylabel("$\chi^2$ CDF")

plt.tight_layout()
plt.show()

#### **Analyze Particular Model Fit:**

Calculates the optimal one parameter model fit over a specified subset of k values and plots the optimal fit with residuals over a specified subset of k values.

In [None]:
# Fit over a specified k interval
k_min_fit = 0.0
k_max_fit = 0.1

fit_indices = np.where((k_bar_avg < k_max_fit) & (k_bar_avg > k_min_fit))
k_fit = k_bar_avg[fit_indices]
p_k_fit = p_k_bar_avg[fit_indices]
n_modes_fit = n_modes_avg[fit_indices]

b_guess = 1.7
res = scipy.optimize.minimize(neg_log_like, b_guess, method='Nelder-Mead',
                              args=(k_fit, p_k_fit, n_modes_fit, 
                                    is_linear))




# Compute variances
p_theory = get_pow_2d(k_bar_avg, res.x, is_linear)
var_p_k_bar_avg = 2*p_theory**2/n_modes_avg

# Plot over a specified k interval
k_min_plot = 0.0
k_max_plot = 10.0

subset_indices = np.where((k_bar_avg < k_max_plot) & (k_bar_avg > k_min_plot))
k_subset = k_bar_avg[subset_indices]
p_k_subset  = p_k_bar_avg[subset_indices]
n_modes_subset = n_modes_avg[subset_indices]
p_theory_subset = p_theory[subset_indices]
var_subset = var_p_k_bar_avg[subset_indices]

In [None]:
# Plot data with residuals
plt.figure(figsize = (12,14))
fig_1 = plt.figure(1)
frame_1 = fig_1.add_axes((0.0, 0.3, 1.0, 0.6))

ax = plt.gca()

k_theoretical = np.linspace(np.min(k_subset), np.max(k_subset), 4096)
plt.plot(k_theoretical, get_pow_2d(k_theoretical, res.x, is_linear=True), 
         color = "k",  linestyle  = "dashed",label = "PyCCL 2D Linear MPS")

plt.plot(k_theoretical, get_pow_2d(k_theoretical, res.x, is_linear=False), 
         color = "r" , linestyle = ":",label = "PyCCL 2D Non-Linear MPS")

plt.errorbar(k_subset, p_k_subset, yerr=np.sqrt(var_subset), marker = '.', 
             color = "lightslategrey", linestyle = "none", markersize = 6, 
             ecolor = 'k', elinewidth = 1, capsize=2, markeredgewidth=1,
             label = "Averaged Power Spectrum")
ax.axhline(1/N_BAR, label = r"$\frac{1}{\overline{n}}$", linewidth = 1, color = "k",
           linestyle = "dashed")

plt.title("PyCCL 2D Power Spectrum Models with Residual Plot", fontweight = "bold")
plt.xlabel(r"$k_\perp [Mpc^{-1}]$")
plt.ylabel(r"$\left\langle P_{2D}(k)\right\rangle [Mpc^{2}]$")

leg = plt.legend(loc = "upper right", fontsize = 12) 
plt.xscale("log")
plt.yscale("log")

model_str = (r"$P_{2D}^{Galaxy}(k_\perp)= \frac{1}{2\pi}\int_{0}^{\infty}b_1^2$"
             r"$P(k_\parallel, k_\perp)e^{-k_\parallel^2\sigma_\chi^2}dk_\parallel+$"
             r"$\frac{1}{\overline{n}}$")
plt.text(1.1, 28, model_str, fontsize = 12)

limits = ax.get_xlim()

# Add residual plot
frame_2 = fig_1.add_axes((0.0, 0.1, 1.0, 0.2))

ax = plt.gca()

p_measure = p_k_subset
residual  = (p_measure-p_theory_subset)/p_theory_subset

# Compute error associated with residual
sigma_t = np.sqrt(2/n_modes_subset)*p_theory_subset

plt.errorbar(k_subset, residual, yerr=sigma_t/p_theory_subset, marker = '.', 
             color = "lightslategrey", linestyle = "none", markersize = 6, 
             ecolor = 'k', elinewidth = 1, capsize=2, markeredgewidth=1)

ax.axhline(0, linestyle = ":", color = "k", linewidth = 0.8)
ax.set_xlim(limits)

plt.xlabel(r"$k_\perp [Mpc^{-1}]$")
plt.ylabel(r"Residuals $[Mpc^{2}]$")
plt.xscale("log")

#### **Fit Power Spectrum with Two Parameters:**

Fits power spectrum data to previously defined two parameter model over a range of $k_{max}$ values.

In [None]:
# Define lowest and highest k_max value to fit up to
k_max_low = 0.05
k_max_high = 2

k_max_arr = np.linspace(k_max_low, k_max_high, 100)
    
b_guess = [1.7, 1.0]
is_linear = True

b_arr, cdf_arr, reduced_chi_sq = fit_to_kmax(k_max_arr, k_bar_avg, p_k_bar_avg,
                                           n_modes_avg, b_guess, is_linear)
b_1_arr = [b[0] for b in b_arr]
b_1_p_arr = [b[1] for b in b_arr]

#### **Plot Two Parameter Model Fit Data:**

Make various plots about the best fit data. First set of plots shows the results of fits up to various levels of $k_max$ by plotting the model parameter, reduced $\chi^2$ and $\chi^2$ cdf values, as a function of $k_\perp^{max}$. Second plot looks at a particular model fit plotted over the data with an included residuals plot.

In [None]:
plt.figure(figsize=(16, 4.5))

b_1_arr = [b[0] for b in b_arr]
b_1_p_arr = [b[1] for b in b_arr]

# Model Parameter Plot
plt.subplot(1, 3, 1)
plt.plot(k_max_arr, b_1_arr, 'o', markersize=3, markeredgecolor="crimson",
         markerfacecolor="None", label=r"$b_1$", markeredgewidth=1)
plt.plot(k_max_arr, b_1_p_arr, 'o', markersize=3, markeredgecolor="k",
         markerfacecolor="None", label=r"$b_1'$", markeredgewidth=1)
plt.title(r"Optimal Parameter Values for Various $k_\perp^{max} $  ", fontweight="bold")
plt.xlabel("$k_\perp^{max} \ [Mpc^{-1}]$")
plt.ylabel("Model Parameter")
plt.legend(loc="upper left", fontsize=12)

# Reduced Chi_2 Plot
plt.subplot(1, 3, 2)
plt.plot(k_max_arr, reduced_chi_sq, 'o', markersize = 3, markerfacecolor = "None", 
         markeredgecolor = "k", markeredgewidth=1)
plt.title(r"Reduced $\chi^2$ Values for Various $k_\perp^{max}$ ", fontweight = "bold")
plt.xscale("log")
plt.yscale("log")
plt.xlabel("$k_\perp^{max} \ [Mpc^{-1}]$")
plt.ylabel("Reduced $\chi^2$")

# Chi_2 CDF Plot
plt.subplot(1, 3, 3)
plt.plot(k_max_arr, cdf_arr[0:], 'o', markersize = 3,
         markerfacecolor = "None", markeredgecolor = "k", markeredgewidth=1)
plt.title(r"$\chi^2$ CDF Values for Various $k_\perp^{max}$ ", fontweight = "bold")
plt.xscale("log")
plt.xlabel("$k_\perp^{max} \ [Mpc^{-1}]$")
plt.ylabel("$\chi^2$ CDF")

plt.tight_layout()
plt.show()


plt.savefig("optimal_param.png", dpi = 300)

#### **Analyze Particular Model Fit:**

Calculates the optimal two parameter model fit over a specified subset of k values and plots the optimal fit with residuals over a specified subset of k values.

In [None]:
# Fit over a specified k interval
k_min_fit = 0.0
k_max_fit = 0.1

fit_indices = np.where((k_bar_avg < k_max_fit) & (k_bar_avg > k_min_fit))
k_fit = k_bar_avg[fit_indices]
p_k_fit = p_k_bar_avg[fit_indices]
n_modes_fit = n_modes_avg[fit_indices]

b_guess = [1.7, 1.0]
res = scipy.optimize.minimize(neg_log_like, b_guess, method='Nelder-Mead',
                              args=(k_fit, p_k_fit, n_modes_fit, 
                                    is_linear))

# Compute variances
p_theory = get_pow_2d(k_bar_avg, res.x, is_linear)
var_p_k_bar_avg = 2*p_theory**2/n_modes_avg

# Define interval to plot fit over
k_min_plot = 0.0
k_max_plot = 10.0

subset_indices = np.where((k_bar_avg < k_max_plot) & (k_bar_avg > k_min_plot))
k_subset = k_bar_avg[subset_indices]
p_k_subset  = p_k_bar_avg[subset_indices]
n_modes_subset = n_modes_avg[subset_indices]
p_theory_subset = p_theory[subset_indices]
var_subset = var_p_k_bar_avg[subset_indices]

In [None]:
# Plot data with model fit
plt.figure(figsize = (12,14))
fig_1 = plt.figure(1)
frame_1 = fig_1.add_axes((0.0, 0.3, 1.0, 0.6))

ax = plt.gca()

k_theoretical = np.linspace(np.min(k_subset), np.max(k_subset), 4096)
plt.plot(k_theoretical, get_pow_2d(k_theoretical, res.x, is_linear=True), 
         color = "k",  linestyle  = "dashed",label = "PyCCL 2D Linear MPS")

plt.plot(k_theoretical, get_pow_2d(k_theoretical, res.x, is_linear=False), 
         color = "r" , linestyle = ":",label = "PyCCL 2D Non-Linear MPS")

plt.errorbar(k_subset, p_k_subset, yerr=np.sqrt(var_subset), marker = '.', 
             color = "lightslategrey", linestyle = "none", markersize = 6, 
             ecolor = 'k', elinewidth = 1, capsize=2, markeredgewidth=1,
             label = "Averaged Power Spectrum")
ax.axhline(1/N_BAR, label = r"$\frac{1}{\overline{n}}$", linewidth = 1, color = "k",
           linestyle = "dashed")

plt.title("PyCCL 2D Power Spectrum Models with Residual Plot", fontweight = "bold")
plt.xlabel(r"$k_\perp [Mpc^{-1}]$")
plt.ylabel(r"$\left\langle P_{2D}(k)\right\rangle [Mpc^{2}]$")

leg = plt.legend(loc = "upper right", fontsize = 12) 
plt.xscale("log")
plt.yscale("log")

model_str = (r"$P_{2D}^{Galaxy}(k_\perp)= \frac{1}{2\pi}\int_{0}^{\infty}(b_1+b_1'k)^2$"
             r"$P(k_\parallel, k_\perp)e^{-k_\parallel^2\sigma_\chi^2}dk_\parallel+$"
             r"$\frac{1}{\overline{n}}$")
plt.text(0.7, 28, model_str, fontsize = 12)


limits = ax.get_xlim()

# Compute/plot residuals
frame_2 = fig_1.add_axes((0.0, 0.1, 1.0, 0.2))

ax = plt.gca()

p_measure = p_k_subset
residual  = (p_measure-p_theory_subset)/p_theory_subset

# Compute error associated with residual
sigma_t = np.sqrt(2/n_modes_subset)*p_theory_subset

plt.errorbar(k_subset, residual, yerr=sigma_t/p_theory_subset, marker = '.', 
             color = "lightslategrey", linestyle = "none", markersize = 6, 
             ecolor = 'k', elinewidth = 1, capsize=2, markeredgewidth=1)

ax.axhline(0, linestyle = ":", color = "k", linewidth = 0.8)
ax.set_xlim(limits)

plt.xlabel(r"$k_\perp [Mpc^{-1}]$")
plt.ylabel(r"Residuals $[Mpc^{2}]$")
plt.xscale("log")