# PSP Automated Magnetic Hole Finder
## By: Robert Alexander + Jaye Verniero


### 0.1) Import Packages, Define Helper Functions, and Set Save Directory

In [17]:
# Cell 1: Imports and Initial Configuration

# --- Plotbot Core Imports ---
try:
    from plotbot import print_manager as pm
    # Configure Print Manager Verbosity (adjust as needed for debugging)
    pm.show_error = True
    pm.show_warnings = True
    pm.show_status = True     # Good for seeing high-level progress
    pm.show_debug = False     # Set to True for very detailed internal Plotbot logs
    pm.show_datacubby = False # Set to True to debug DataCubby interactions
    pm.show_processing = False# Set to True for Plotbot data processing steps
    print("✅ Successfully imported and configured Plotbot's print_manager.")
except ImportError:
    print("⚠️ Could not import Plotbot's print_manager. Some log messages may be missing.")
    pm = None

# Import for direct data loading/populating DataCubby & the specific global instances
from plotbot import get_data as plotbot_get_data 
from plotbot import mag_rtn  # For MAG_RTN data (1 sample/cycle)
# from plotbot import mag_rtn_4sa # If you also need 4sa MAG data in snapshots
# from plotbot import proton      # Example: if you want proton data
# from plotbot import epad        # Example: if you want electron PAD data

# Import for snapshotting
from plotbot.data_snapshot import save_data_snapshot, load_data_snapshot

# --- Local Application Imports ---
# For running your analysis algorithm
from magnetic_hole_finder.magnetic_hole_finder_core import HoleFinderSettings, detect_magnetic_holes_and_generate_outputs

# --- Standard Library Imports ---
import os
import json # For settings files, though core function handles its own now
from datetime import datetime
from collections import Counter # For handling the returned counter from analysis

# --- Third-Party Data Science Libraries (primarily for ad-hoc notebook use if needed) ---
import numpy as np 
import pandas as pd
# import matplotlib.pyplot as plt # If doing custom plots in the notebook

# --- Warnings Handling ---
from warnings import simplefilter
import warnings
simplefilter(action='ignore', category=DeprecationWarning) # Ignore general deprecation warnings
warnings.filterwarnings("ignore", message="invalid value encountered in divide") # Example specific warning
warnings.filterwarnings("ignore", message="invalid value encountered in scalar divide") # Example
warnings.filterwarnings("ignore", category=DeprecationWarning, module="IPython.core.pylabtools") # For IPython

# --- Final Import Confirmation ---
current_time_consolidated = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f'{current_time_consolidated} - 📚 All libraries imported and environment configured.')

✅ Successfully imported and configured Plotbot's print_manager.
2025-05-06 19:34:44 - 📚 All libraries imported and environment configured.


In [18]:
import os # Ensure os is imported

# Define the main directory where all outputs will go
# This is what the user might change.
BASE_SAVE_DIRECTORY = os.path.abspath("MH_Scan_Output") # Path relative to project root
os.makedirs(BASE_SAVE_DIRECTORY, exist_ok=True) # Create it if it doesn't exist
print(f'🛟 Algorithm output base directory set to: {BASE_SAVE_DIRECTORY}')

🛟 Algorithm output base directory set to: /Users/robertalexander/GitHub/Plotbot/MH_Scan_Output


In [25]:
# Cell 3: Configure HoleFinderSettings for the Run

# Instantiate settings with defaults from the class
mh_run_settings = HoleFinderSettings()

# === Customize parameters for THIS SPECIFIC RUN ===
# Values shown here are examples if you want to override the defaults 
# defined in the HoleFinderSettings class. If the default is fine, you don't need to set it here.

# --- Core Algorithm Parameters ---
mh_run_settings.INSTRUMENT_SAMPLING_RATE = 292.9 
mh_run_settings.use_calculated_sampling_rate = True
mh_run_settings.depth_percentage_threshold = 0.25
mh_run_settings.smoothing_window_seconds = 8.0
mh_run_settings.derivative_window_seconds = 0.2
mh_run_settings.min_max_finding_smooth_window = 0.3
mh_run_settings.mean_threshold = 0.8
mh_run_settings.search_in_progress_output = True  # For verbose logging during detection
mh_run_settings.additional_seconds_for_min_search = 0.2
mh_run_settings.asymetric_peak_threshold = 0.25
mh_run_settings.symmetrical_peak_scan_window_in_secs = 2.0
mh_run_settings.Bave_scan_seconds = 0.1
mh_run_settings.Bave_window_seconds = 20.0
mh_run_settings.wide_angle_threshold = 15.0
mh_run_settings.small_threshold_cross_flag_samples = 10
mh_run_settings.small_threshold_cross_adjustment_samples = 10

# --- Algorithm Breaking Condition Flags ---
mh_run_settings.break_for_shallow_hole = True
mh_run_settings.break_for_assymettry = False 
mh_run_settings.break_for_wide_angle = False 
mh_run_settings.break_for_small_threshold_cross = False
mh_run_settings.break_for_complex_hole = False 
mh_run_settings.threshold_for_derivative_0_crossings_flag = 1000
mh_run_settings.break_for_derivative_crossings = False

# --- Output Generation Control Flags (for outputs handled by the core .py function) ---
mh_run_settings.OUTPUT_MAIN_PLOT = True 
mh_run_settings.SAVE_MAIN_PLOT = True   
mh_run_settings.PLOT_HOLE_MINIMUM_ON_MAIN_PLOT = True 
mh_run_settings.PLOT_THRESH_CROSS_ON_MAIN_PLOT = True 

mh_run_settings.OUTPUT_ZERO_CROSSING_PLOT = False # For the specific plot in zero_crossing_analysis

mh_run_settings.IZOTOPE_MARKER_FILE_OUTPUT_MAX_AND_MIN = True 
mh_run_settings.IZOTOPE_MARKER_FILE_OUTPUT_GENERAL = False
mh_run_settings.MARKER_FILE_VERSION = 3
mh_run_settings.MARKER_FILES_WITH_ANNOTATED_MARKERS = False
mh_run_settings.MARKER_FILES_WITH_HOLE_NUMBERS = False

mh_run_settings.EXPORT_AUDIO_FILES = True
mh_run_settings.AUDIO_SAMPLING_RATE = 22000

mh_run_settings.download_only = False # Set to True to only download

print("HoleFinderSettings configured for this run. Current settings:")
# Pretty print the settings for verification
# import json
# print(json.dumps(mh_run_settings.__dict__, indent=4))

HoleFinderSettings configured for this run. Current settings:


### Load The Snapshot!

In [20]:
load_data_snapshot('Magnetic_Hole_Multi_Encounter_Snapshot.pkl') 

Starting load from data_snapshots/Magnetic_Hole_Multi_Encounter_Snapshot.pkl
Detected compression extension: .pkl
Using no compression
Data snapshot loaded from file. Keys: ['mag_RTN_segment_1', 'mag_RTN_segment_2', 'mag_RTN_segment_3', 'mag_RTN_segment_4', 'mag_RTN_segments_meta']
Processing 1 segment groups for merging...
  Merging segments for mag_RTN (4 segments)
    Processing segment 1 (mag_RTN_segment_1) for mag_RTN
      Reconstructed 'psp_fld_l2_mag_RTN' (shape (1054684, 3)) for segment 1.
    LOAD_SNAPSHOT_DEBUG: About to call update_global_instance for segment 1 of mag_RTN.
        segment_data_for_cubby.times is None: False
        segment_data_for_cubby.times len: 1054684
        segment_data_for_cubby.data keys: ['psp_fld_l2_mag_RTN', 'bn', 'bmag', 'pmag', 'br', 'bt', 'all']
    Successfully processed/merged segment 1 into mag_RTN
        Segment 1 original datetime_array range: 2021-04-28 00:00:00.000528768 to 2021-04-28 00:59:59.998349696
    Processing segment 2 (mag_R

True

In [26]:
trange_E9 = ['2021-08-10 00:00:00', '2021-08-10 02:00:00']
trange_E10 = ['2021-11-22 00:30:00', '2021-11-22 03:30:00']
trange_E11 = ['2022-02-25 12:00:00', '2022-02-25 13:00:00']
trange_E15_1 = ['2023-03-16 02:15:00', '2023-03-16 02:30:00']
trange_E15_2 = ['2023-03-17 20:30:00', '2023-03-17 21:45:00']
trange_E17 = ['2023-09-28 06:32:00', '2023-09-28 06:45:00']

TIME_RANGE_TO_ANALYZE = trange_E17
# Or any other trange you want to test

print(f"Starting analysis for trange: {TIME_RANGE_TO_ANALYZE}...")
print(f"Outputs will be saved within base directory: {BASE_SAVE_DIRECTORY}")

# The main call to the refactored orchestrator function
# It now handles sub_save_dir creation, detection, and all standard outputs internally.
analysis_results = detect_magnetic_holes_and_generate_outputs(
    TIME_RANGE_TO_ANALYZE,
    BASE_SAVE_DIRECTORY, # Pass the top-level save directory
    mh_run_settings      # Pass the configured settings object
)

# The function returns the primary scientific results for optional inspection
if analysis_results:
    magnetic_holes, hole_minima, hole_maxima_pairs, times_clipped, bmag, magnetic_hole_details, returned_hole_counter = analysis_results
    print(f"\n✅ Analysis complete. {returned_hole_counter.get('confirmed', 0)} holes confirmed.")
    # You can still do a quick print of the counter here
    print("\n--- Magnetic Hole Detection Summary (from returned counter) ---")
    for key, value in returned_hole_counter.items():
        print(f"{key}: {value}")
    print("---------------------------------------------------------------")
    print(f"All outputs (plots, markers, settings JSON) saved in the run-specific subdirectory.")
else:
    print("Analysis aborted or returned no results (check logs for errors).")


Starting analysis for trange: ['2023-09-28 06:32:00', '2023-09-28 06:45:00']...
Outputs will be saved within base directory: /Users/robertalexander/GitHub/Plotbot/MH_Scan_Output
Starting analysis for trange: ['2023-09-28 06:32:00', '2023-09-28 06:45:00']. Download_only mode: False
Entering setup_output_directory with base_save_dir: /Users/robertalexander/GitHub/Plotbot/MH_Scan_Output
Encounter directory path: /Users/robertalexander/GitHub/Plotbot/MH_Scan_Output/E17
Creating encounter directory: /Users/robertalexander/GitHub/Plotbot/MH_Scan_Output/E17
Final subdirectory path: /Users/robertalexander/GitHub/Plotbot/MH_Scan_Output/E17/E17_PSP_FIELDS_2023-09-28_063200_to_064500_Bmag_Holes
Exiting setup_output_directory, returning: /Users/robertalexander/GitHub/Plotbot/MH_Scan_Output/E17/E17_PSP_FIELDS_2023-09-28_063200_to_064500_Bmag_Holes
✅ Outputs for this run will be saved in: /Users/robertalexander/GitHub/Plotbot/MH_Scan_Output/E17/E17_PSP_FIELDS_2023-09-28_063200_to_064500_Bmag_Holes
E

ValueError: min_periods 1 must be <= window 0

In [22]:
# Cell 3: Prepare for Snapshotting - Define what to include
trange_E9 = ['2021-08-10 00:00:00', '2021-08-10 02:00:00']
trange_E10 = ['2021-11-22 00:30:00', '2021-11-22 03:30:00']
trange_E11 = ['2022-02-25 12:00:00', '2022-02-25 13:00:00']
trange_E15_1 = ['2023-03-16 02:15:00', '2023-03-16 02:30:00']
trange_E15_2 = ['2023-03-17 20:30:00', '2023-03-17 21:45:00']
trange_E17 = ['2023-09-28 06:32:00', '2023-09-28 06:45:00']


# --- 1. Create a list of all the tranges you want to process for this snapshot ---
tranges_for_snapshot = [ 
    trange_E9,
    trange_E10,
    # trange_E11,
    # trange_E15_1,
    # trange_E15_2,
    # trange_E17,
]

# --- 2. Define which Plotbot global data instances you want to populate and save ---
classes_for_snapshot = [
    mag_rtn, 
    # proton, 
    # epad,   
]

# --- 3. Define a name for your snapshot file (can be None for fully auto name) ---
snapshot_name = "Magnetic_Hole_Multi_Encounter_Snapshot" # RENAMED (or None)

print(f"Snapshot will process {len(tranges_for_snapshot)} time range(s).") # Updated print
print(f"Snapshot will include data for: {[type(inst).__name__ for inst in classes_for_snapshot]}") # Updated print


# Cell 4: Execute Snapshot Creation
# snapshot_name, classes_for_snapshot, tranges_for_snapshot defined in the previous cell

print(f"\nAttempting to populate data and save snapshot (name: {snapshot_name or 'auto'})...") 

pm.show_datacubby = True
pm.show_data_snapshot = True

snapshot_filepath = save_data_snapshot(
    filename=snapshot_name, 
    classes=classes_for_snapshot,
    trange_list=tranges_for_snapshot,
    # compression="medium",
)

# The save_data_snapshot function now handles detailed success/failure printing.
# You can have a minimal confirmation in the notebook.
if snapshot_filepath:
    print(f"Notebook: Snapshot process finished. See logs above. Path: {snapshot_filepath}")
else:
    print("Notebook: Snapshot process finished (likely failed or nothing to save). See logs above.")

Snapshot will process 2 time range(s).
Snapshot will include data for: ['mag_rtn_class']

Attempting to populate data and save snapshot (name: Magnetic_Hole_Multi_Encounter_Snapshot)...
[SNAPSHOT SAVE] Attempting to populate and save data for 1 class type(s) across 2 time range(s).
[SNAPSHOT SAVE] Target classes: ['mag_RTN']
[SNAPSHOT SAVE] Initiating data population for 1 class type(s) across 2 time range(s)...
  Populating/updating data for: mag_RTN
    Processing trange: ['2021-08-10 00:00:00', '2021-08-10 02:00:00'] for mag_RTN
Getting data for time range: 2021-08-10 00:00:00 to 2021-08-10 02:00:00
Initial check for variable: <class 'plotbot.data_classes.psp_mag_classes.mag_rtn_class'>
Data types to process: {'mag_RTN'}
[CUBBY] 
=== Retrieving mag_rtn from data_cubby ===
[CUBBY] GRAB CALLER: /Users/robertalexander/GitHub/Plotbot/plotbot/get_data.py:254
[CUBBY] GRAB SUCCESS - Retrieved mag_rtn with type <class 'plotbot.data_classes.psp_mag_classes.mag_rtn_class'>
[CUBBY] GRAB OUTPUT

### DATA LENGTH CHECKER

In [23]:
import pickle
import os
import numpy as np # For checking array properties
import pandas as pd # For datetime/timestamp checks

# Make sure Plotbot's custom classes are importable
# This might require sys.path adjustments if your notebook isn't in the root
# import sys
# sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..'))) # Example if notebook is in a subdir

from plotbot.data_classes.psp_mag_classes import mag_rtn_4sa_class # Adjust import as needed
from plotbot.plot_manager import plot_manager # if plot_manager objects are part of the class
from plotbot.ploptions import ploptions # if ploptions objects are part of the class


filepath = "data_snapshots/full_mission_mag_rtn_4sa.pkl"
loaded_data = None

try:
    with open(filepath, 'rb') as f:
        loaded_data = pickle.load(f)
    print(f"Successfully loaded: {filepath}")
except Exception as e:
    print(f"Error loading snapshot: {e}")

if loaded_data:
    print(f"Type of loaded data: {type(loaded_data)}")
    if isinstance(loaded_data, dict):
        print(f"Keys in snapshot: {list(loaded_data.keys())}")
        for key, obj_instance in loaded_data.items():
            print(f"--- Inspecting: {key} (Type: {type(obj_instance)}) ---")
            if hasattr(obj_instance, 'datetime_array') and obj_instance.datetime_array is not None:
                print(f"  datetime_array len: {len(obj_instance.datetime_array)}")
                if len(obj_instance.datetime_array) > 0:
                    print(f"  datetime_array first element type: {type(obj_instance.datetime_array[0])}")
            else:
                print(f"  datetime_array: Not found or None")

            if hasattr(obj_instance, 'time') and obj_instance.time is not None:
                # Check if it's a numpy array to get shape, otherwise len
                if isinstance(obj_instance.time, np.ndarray):
                    print(f"  time (TT2000) shape: {obj_instance.time.shape}, size: {obj_instance.time.size}")
                    if obj_instance.time.size > 0:
                         print(f"  time first element type: {type(obj_instance.time[0]) if obj_instance.time.ndim > 0 else type(obj_instance.time.item())}")

                elif hasattr(obj_instance.time, '__len__'):
                    print(f"  time (TT2000) len: {len(obj_instance.time)}")
                    if len(obj_instance.time) > 0:
                         print(f"  time first element type: {type(obj_instance.time[0])}")
                else: # Scalar or other
                    print(f"  time (TT2000): {obj_instance.time} (Type: {type(obj_instance.time)})")
            else:
                print(f"  time (TT2000): Not found or None")

            if hasattr(obj_instance, 'field') and obj_instance.field is not None:
                 if isinstance(obj_instance.field, np.ndarray):
                    print(f"  field shape: {obj_instance.field.shape}")
                 elif hasattr(obj_instance.field, '__len__'): # e.g. list of arrays
                    print(f"  field (list) len: {len(obj_instance.field)}")
                    if len(obj_instance.field) > 0 and hasattr(obj_instance.field[0], 'shape'):
                        print(f"  field component 0 shape: {obj_instance.field[0].shape}")

            else:
                print(f"  field: Not found or None")

            if hasattr(obj_instance, 'raw_data') and isinstance(obj_instance.raw_data, dict):
                print(f"  raw_data keys: {list(obj_instance.raw_data.keys())}")
                for r_key, r_val in obj_instance.raw_data.items():
                    if isinstance(r_val, np.ndarray):
                        print(f"    raw_data['{r_key}'] shape: {r_val.shape}")
                    elif isinstance(r_val, list) and r_val and hasattr(r_val[0], 'shape'):
                         print(f"    raw_data['{r_key}'] (list) len: {len(r_val)}, component 0 shape: {r_val[0].shape}")
                    elif hasattr(r_val, '__len__'):
                        print(f"    raw_data['{r_key}'] len: {len(r_val)}")

    # You might need to adjust the key if mag_rtn_4sa was saved under a specific one
    # mag_data = loaded_data.get('mag_rtn_4sa') # Or whatever key it was saved under
    # if mag_data:
    #     # Inspect mag_data attributes as above
    #     pass

Successfully loaded: data_snapshots/full_mission_mag_rtn_4sa.pkl
Type of loaded data: <class 'dict'>
Keys in snapshot: ['mag_rtn_4sa']
--- Inspecting: mag_rtn_4sa (Type: <class 'plotbot.data_classes.psp_mag_classes.mag_rtn_4sa_class'>) ---
  datetime_array len: 54578256
  datetime_array first element type: <class 'numpy.datetime64'>
  time (TT2000) shape: (2373042,), size: 2373042
  time first element type: <class 'numpy.int64'>
  field shape: (2373042, 3)
  raw_data keys: ['bt', 'pmag', 'bmag', 'bn', 'br', 'all']
    raw_data['bt'] shape: (54578256,)
    raw_data['pmag'] shape: (54578256,)
    raw_data['bmag'] shape: (54578256,)
    raw_data['bn'] shape: (54578256,)
    raw_data['br'] shape: (54578256,)
    raw_data['all'] (list) len: 3, component 0 shape: (54578256,)


Data Type Check:

In [24]:
import plotbot
from plotbot.data_classes.psp_data_types import data_types as psp_data_types_config
from plotbot import print_manager

# Ensure debug prints are on to see the output from import_data_function
print_manager.show_debug = True
print_manager.show_variable_testing = True # If import_data_function uses this

# Define a short, common time range likely to have data for many types
# (Adjust if needed, ensure corresponding CDFs exist locally for types you want to test)
test_trange = ['2021-04-28 00:00:00', '2021-04-28 01:00:00'] 

# List of data types to test (can be all keys from psp_data_types_config or a subset)
# For now, let's focus on 'mag_RTN' as it was problematic, and maybe a couple of others.
types_to_test = {
    'mag_RTN': plotbot.mag_rtn,
    'mag_RTN_4sa': plotbot.mag_rtn_4sa,
    # Add other types and their corresponding global plotbot instances if you want to test more
    # 'mag_SC': plotbot.mag_sc,
    # 'spe_sf0_pad': plotbot.epad, # Assuming epad is the global instance for spe_sf0_pad
    # 'spi_sf00_l3_mom': plotbot.proton # Assuming proton is the global instance
}

print(f"--- Starting Data Load Test for Multiple Types ---")
for type_key, global_instance in types_to_test.items():
    print(f"--- Testing data_type: {type_key} ---")
    try:
        # We are primarily interested in what import_data_function returns.
        # The call to plotbot.get_data will trigger it.
        # The KeyError would happen inside calculate_variables if the key is missing.
        plotbot.get_data(test_trange, global_instance)
        print(f"  ✅ plotbot.get_data call completed for {type_key}")
        
        # Optional: Check the state of the global instance if needed,
        # but the main goal is to see the *** IMPORT_DATA_DEBUG *** prints
        # print(f"    {type_key}.datetime_array len: {len(global_instance.datetime_array) if global_instance.datetime_array is not None else 'None'}")

    except KeyError as ke:
        print(f"  🔴 KeyError for {type_key}: {ke}")
    except Exception as e:
        print(f"  🔴 Other Error for {type_key}: {e}")
    print(f"--- Finished testing data_type: {type_key} ---\n")

print(f"--- Data Load Test Complete ---")

--- Starting Data Load Test for Multiple Types ---
--- Testing data_type: mag_RTN ---
Getting data for time range: 2021-04-28 00:00:00 to 2021-04-28 01:00:00
Initial check for variable: <class 'plotbot.data_classes.psp_mag_classes.mag_rtn_class'>
Data types to process: {'mag_RTN'}
🛰️ mag_RTN - acquiring all variables
Processing Data Type: mag_RTN...
[CUBBY] 
=== Retrieving mag_rtn from data_cubby ===
[CUBBY] GRAB CALLER: /Users/robertalexander/GitHub/Plotbot/plotbot/get_data.py:254
[CUBBY] GRAB SUCCESS - Retrieved mag_rtn with type <class 'plotbot.data_classes.psp_mag_classes.mag_rtn_class'>
[CUBBY] GRAB OUTPUT - datetime_array type=ndarray, elem_type=datetime64, shape=(6561302,), range=2051-04-28T11:58:50.816528 to 2053-09-27T18:43:58.813071
[CUBBY] GRAB OUTPUT - raw_data keys=['bn', 'bmag', 'pmag', 'br', 'bt', 'all'] | bn: type=ndarray, shape=(6561302,) | bmag: type=ndarray, shape=(6561302,) | pmag: type=ndarray, shape=(6561302,) | br: type=ndarray, shape=(6561302,) | bt: type=ndarra