## Data loading script

This script loads the volumes issued by the Cassini CDA instrument. It assigns lables and handles missing values given by the .lbl-files of the data base. All volumes are combined into a compact .csv file for later processing.

In [None]:
import pandas as pd
import re
from pathlib import Path

# --- column positions and their labels according to the LBL-files ---
colspecs = [
    (0,10), (11,28), (29,43), (44,52), (53,54),
    (55,63), (64,65), (66,74), (75,76), (77,85),
    (86,87), (88,96), (97,105), (106,114), (115,116),
    (117,124), (125,132), (133,139), (140,147),
    (148,155), (156,164), (165,171), (172,178),
    (179,185), (186,193), (194,201), (202,204),
    (205,208), (209,214), (215,219), (220,228),
    (229,233), (234,242), (243,251), (252,253)
]

colnames = [
    "EVENT_ID","EVENT_TIME","EVENT_JULIAN_DATE",
    "QP_AMPLITUDE","QP_SIGNAL_FLAG","QI_AMPLITUDE","QI_SIGNAL_FLAG",
    "QT_AMPLITUDE","QT_SIGNAL_FLAG","QC_AMPLITUDE","QC_SIGNAL_FLAG",
    "QI_RISE_TIME","QT_RISE_TIME","QC_RISE_TIME","TARGET_FLAG",
    "SPACECRAFT_RA","SPACECRAFT_DEC","SPACECRAFT_SUN_DISTANCE",
    "SPACECRAFT_SIII_LONG","SPACECRAFT_SIII_LAT","SPACECRAFT_SAT_DIST",
    "SC_X_VEL","SC_Y_VEL","SC_Z_VEL","DETECTOR_RA","DETECTOR_DEC",
    "COUNTER_NUMBER","EVENT_QUALITY","PARTICLE_SPEED","PARTICLE_SPEED_FACTOR",
    "PARTICLE_MASS","PARTICLE_MASS_FACTOR","PARTICLE_CHARGE","PARTICLE_CHARGE_ERROR",
    "SPECTRUM_FLAG"
]

# --- Missing value conventions ---
missing_values = {
    "EVENT_ID": -999999999,
    "EVENT_TIME": "9999-999T99:99:99",
    "EVENT_JULIAN_DATE": -999999.999999,
    "QP_AMPLITUDE": -9.9E-99,
    "QI_AMPLITUDE": -9.9E-99,
    "QT_AMPLITUDE": -9.9E-99,
    "QC_AMPLITUDE": -9.9E-99,
    "QI_RISE_TIME": -9.9E-99,
    "QT_RISE_TIME": -9.9E-99,
    "QC_RISE_TIME": -9.9E-99,
    "SPACECRAFT_RA": -999.99,
    "SPACECRAFT_DEC": -999.99,
    "SPACECRAFT_SUN_DISTANCE": 9.9999,
    "SPACECRAFT_SIII_LONG": -999.99,
    "SPACECRAFT_SIII_LAT": -999.99,
    "SPACECRAFT_SAT_DIST": -9999.99,
    "SC_X_VEL": -99.99,
    "SC_Y_VEL": -99.99,
    "SC_Z_VEL": -99.99,
    "DETECTOR_RA": -999.99,
    "DETECTOR_DEC": -999.99,
    "COUNTER_NUMBER": -9,
    "EVENT_QUALITY": -9,
    "PARTICLE_SPEED": -99.9,
    "PARTICLE_SPEED_FACTOR": -9.9,
    "PARTICLE_MASS": -9.9E-99,
    "PARTICLE_MASS_FACTOR": -9.9,
    "PARTICLE_CHARGE": -9.9E-99,
    "PARTICLE_CHARGE_ERROR": -9.9E-99,
    "SPECTRUM_FLAG": 9
}

all_dfs = []
all_files = []

root = Path("../Dataset")

# Regex for volume recognition
vol_pattern = re.compile(r"COCDA_\d{4}")
volumes = [d for d in root.iterdir() if d.is_dir() and vol_pattern.fullmatch(d.name)]


for vol in volumes:
    files = sorted(vol.glob("DATA/**/CDAEVENTS_*.TAB"))
    if not files:
        print(f"[WARNING] No event file found in {vol}/DATA")
    all_files.extend(files)

    for f in files:
        df = pd.read_fwf(
            f, colspecs=colspecs, names=colnames,
            na_values=missing_values
        )

        # convert time (year–doy format -> datetime)
        df["EVENT_TIME"] = pd.to_datetime(
            df["EVENT_TIME"], errors="coerce", format="%Y-%jT%H:%M:%S"
        )
        
        # Append source info
        df["SOURCE_VOLUME"] = vol.name
        df["SOURCE_FILE"] = f.name
        all_dfs.append(df)

if not all_dfs:
    raise RuntimeError("No tables were loaded. Check paths or volume names.")

# Concatenate all DataFrames
final_df = pd.concat(all_dfs, ignore_index=True)

print(f"Loaded {len(final_df):,} events from {len(all_files)} files "
      f"across volumes {[v.name for v in volumes]}.")

# Save the DataFrame
final_df.to_csv("CompiledDataSet/Cassini_CDA_Events.csv", index=True)

Loaded 367,995 events from 15 files across volumes ['COCDA_0007', 'COCDA_0012', 'COCDA_0045', 'COCDA_0050', 'COCDA_0052', 'COCDA_0053', 'COCDA_0059', 'COCDA_0061', 'COCDA_0068', 'COCDA_0069', 'COCDA_0070', 'COCDA_0074', 'COCDA_0075', 'COCDA_0076', 'COCDA_0094'].


In [2]:
# To check whether the data looks correct
final_df.head()

Unnamed: 0,EVENT_ID,EVENT_TIME,EVENT_JULIAN_DATE,QP_AMPLITUDE,QP_SIGNAL_FLAG,QI_AMPLITUDE,QI_SIGNAL_FLAG,QT_AMPLITUDE,QT_SIGNAL_FLAG,QC_AMPLITUDE,...,EVENT_QUALITY,PARTICLE_SPEED,PARTICLE_SPEED_FACTOR,PARTICLE_MASS,PARTICLE_MASS_FACTOR,PARTICLE_CHARGE,PARTICLE_CHARGE_ERROR,SPECTRUM_FLAG,SOURCE_VOLUME,SOURCE_FILE
0,398120,2005-01-01 00:02:42,2453372.0,0.0,1,0.0,1,5.8e-15,1,0.0,...,,,,,,,,0,COCDA_0007,CDAEVENTS_05001_05090.TAB
1,398121,2005-01-01 00:04:55,2453372.0,0.0,1,0.0,1,4.7e-15,1,0.0,...,,,,,,,,0,COCDA_0007,CDAEVENTS_05001_05090.TAB
2,398122,2005-01-01 00:07:36,2453372.0,0.0,1,0.0,1,0.0,1,0.0,...,,,,,,,,0,COCDA_0007,CDAEVENTS_05001_05090.TAB
3,398123,2005-01-01 00:09:24,2453372.0,0.0,1,0.0,1,0.0,1,0.0,...,,,,,,,,0,COCDA_0007,CDAEVENTS_05001_05090.TAB
4,398124,2005-01-01 00:13:05,2453372.0,0.0,1,0.0,1,1.7e-14,1,0.0,...,,,,,,,,0,COCDA_0007,CDAEVENTS_05001_05090.TAB
