# merge_data

In [None]:
# -*- coding: UTF-8 -*-
"""
Run Data Concatenation and Verification (Jupyter Notebook Version)
==================================================================

This script, intended to be run in a Jupyter Notebook cell, orchestrates
the data assembly process for a given Hartmann number (Ha).

It performs the following steps:
1.  Sets configuration variables for the data directory and Ha number.
2.  Constructs the paths to the four required input pickle files
    (vxyz_jxyz_p_f, du, dv, dw).
3.  Calls the `load_and_concatenate_data` function to load, validate, and
    merge these datasets.
4.  Defines a path for the final output and saves the concatenated dataset
    as a new pickle file.
5.  Includes a verification step that loads the newly created file and
    prints a summary to confirm its integrity.
"""

from pathlib import Path
import numpy as np

# --- Import project-specific functions ---
from mhd_cae_koopman.utils import (
    save_object,
    load_object,
)
from mhd_cae_koopman.data_processing.utils import print_timeseries_info
from mhd_cae_koopman.data_processing.concatenation import load_and_concatenate_data

# --- 1. Define Configuration ---
# Modify these variables to match your setup.
Ha = 300
data_root = Path("../../../../../data") # Adjust this path as needed

# --- 2. Construct Paths ---
file_names = ['vxyz_jxyz_p_f', 'du', 'dv', 'dw']
base_data_dir = data_root / f"re1000_ha{Ha}/3d/pkl/"

# Check if the base directory exists
if not base_data_dir.is_dir():
    print(f"Error: Data directory not found at {base_data_dir.resolve()}")
    print("Please ensure the 'data_root' path is correct.")
else:
    input_paths = {name: base_data_dir / f"{name}.pkl" for name in file_names}

    # --- 3. Run Concatenation ---
    try:
        concatenated_data = load_and_concatenate_data(
            path_vxyz_jxyz_p_f=input_paths['vxyz_jxyz_p_f'],
            path_du=input_paths['du'],
            path_dv=input_paths['dv'],
            path_dw=input_paths['dw'],
        )

        # --- 4. Store the Final Object ---
        output_file_name = "vxyz_jxyz_p_f_du_dv_dw.pkl"
        output_path = base_data_dir / output_file_name
        
        print(f"\n--- Storing final concatenated data to: {output_path} ---")
        save_object(concatenated_data, output_path)

        # --- 5. Verification Step ---
        print("\n--- Running Verification ---")
        loaded_final_data = load_object(output_path)
        # Use the imported function to print a detailed summary
        print_timeseries_info(
            loaded_final_data['timeseries'],
            loaded_final_data['labels']
        )
        print("\nVerification successful. Process complete.")

    except (ValueError, FileNotFoundError) as e:
        print(f"\nAn error occurred during data processing: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

