# Ham Data Pickle Generator

This notebook loads ALL ham CDF data and saves it to a pickle file for lightning-fast loading in future sessions.

In [None]:
import cdflib
import numpy as np
import pickle
import os
from glob import glob
from datetime import datetime

In [None]:
# Find all ham CDF files
cdf_dir = '../data/cdf_files/Hamstrings'
cdf_files = sorted(glob(os.path.join(cdf_dir, 'hamstring_*_v*.cdf')))
print(f"Found {len(cdf_files)} ham CDF files")

In [None]:
# Load ALL data from ALL files
all_data = {}
all_times = []

for i, cdf_path in enumerate(cdf_files):
    print(f"Loading {i+1}/{len(cdf_files)}: {os.path.basename(cdf_path)}")
    
    with cdflib.CDF(cdf_path) as cdf:
        info = cdf.cdf_info()
        
        # Get epoch/time variable
        epoch = cdf.varget('Epoch')
        all_times.append(epoch)
        
        # Get ALL other variables
        for var_name in info.zVariables:
            if var_name == 'Epoch':
                continue
            try:
                data = cdf.varget(var_name)
                if var_name not in all_data:
                    all_data[var_name] = []
                all_data[var_name].append(data)
            except Exception as e:
                print(f"  Warning: Could not load {var_name}: {e}")

print(f"\nLoaded {len(all_data)} variables from {len(cdf_files)} files")

In [None]:
# Load all ham variables
# This is the slow part - only needs to be done once!

get_data(trange, ham.n_core)

In [None]:
# Check how much data was loaded
print(f"\n=== Ham Data Summary ===")
print(f"datetime_array length: {len(ham.datetime_array) if ham.datetime_array is not None else 0}")
if ham.datetime_array is not None and len(ham.datetime_array) > 0:
    print(f"Time range: {ham.datetime_array[0]} to {ham.datetime_array[-1]}")

In [None]:
# Save to pickle
save_simple_snapshot('all_ham_data.pkl')
print("\nDone! Use load_simple_snapshot('all_ham_data.pkl') in future sessions.")