### Imports
- numpy For scientific computing
- matplotlib: For visualizing data
- h5py: Pythonic interface to the HDF5 binary data format

In [96]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import h5py

### Helper Functions to Aid in Data Analysis

In [97]:
def mean(data):
    count = 0                       # Track sample size
    summation = 0                   # Keep a running sum
    for num in data:                # Loop through data set
        summation += data[count]    # Sum the data values
        count += 1                  # Increment count
    return summation / count        # Return the mean

def median(data):
    count = len(data)            # Get the length of data
    sorted_data = sorted(data)   # Sort data

    if count % 2 == 1:                  # Check if even or odd
        mid = count // 2                # Find middle data point index
        return sorted_data[mid]         # Return median
    else:
        mid = count // 2                     # Find middle data point index
        left = float(sorted_data[mid - 1])   # Get left data point
        right = float(sorted_data[mid])      # Get right data point
        return (left + right) / 2.0          # Return median

def std(data):
    count = 0                   # Track sample size
    squared_diff = float(0.0)   # Keep running sum of diff^2 (cast to float to prevent type error)
    data_mean = mean(data)      # Get the mean
    for num in data:                                    # Loop through data set
        squared_diff += (data[count] - data_mean) ** 2  # Calculate the squared difference between point and mean
        count += 1                                      # Count points
    variance = squared_diff / count   # Calculate the variance
    return np.sqrt(variance)

### Read Data Set into Variables
- Condensed version of original data set contains 10, 1024 x 1024 images along with their respective tau values.

In [98]:
# Explicitly open the hdf5 file as read only
h5f = h5py.File("data/ten_snapshots.hdf5","r")

# Print the key values for reference
keys = h5f.keys()
print(f"Keys: {keys}\n")

# List to hold images and tau values
images = h5f["ksz_maps"]
tau_values = h5f["tau_values"][:].tolist()
print(f"Images List Shape: {images.shape}")
print(f"Tau Values List:   {tau_values[:]}")

# Explicitly close hdf5 file
h5f.close()

Keys: <KeysViewHDF5 ['ksz_maps', 'tau_values']>

Images List Shape: (10, 1024, 1024)
Tau Values List:   [0.04934259014229634, 0.06491457314571873, 0.05260325395518275, 0.05625422805928001, 0.06860118745208645, 0.05420070520958668, 0.05896275116315667, 0.05879498791108795, 0.055460806198125545, 0.06162772510729051]


### Analyze the Tau Values
- Calculate the mean, median and standard deviation of the tau values.

In [99]:
tau_mean = mean(tau_values)
tau_median = median(tau_values)
tau_std = std(tau_values)
print(f"Tau Mean:   {tau_mean}")
print(f"Tau Median: {tau_median}")
print(f"Tau STD:    {tau_std}")


Tau Mean:   0.05807628083438117
Tau Median: 0.05752460798518398
Tau STD:    0.005503299679404095
