In [1]:
import cdflib
import numpy as np
import os

In [3]:
import spacepy.pycdf as pycdf
from spacepy.pycdf import CDFError

# Path to the data directory
data_dir = r"D:\mms\Data\mms\mms1\fpi\fast\l2\dis-dist\2017\11\labeled"
label_cdf = cdflib.CDF(r"D:\mms\OLSHEVSKY\mmslearning\labels_human\labels_fpi_fast_dis_dist_201711.cdf")

regions = {-1: 'Unknown', 0: 'Solar Wind', 1: 'Foreshock', 2: 'Magnetosheath', 3: 'Magnetosphere'}

# Track corrupted files
corrupted_time_ids = []

# Get all variables from the label CDF
info = label_cdf.cdf_info()
variables = info.zVariables

for var in variables:
    if var[0] == 'l':  # only process label variables
        print(f"\nProcessing variable: {var}")
        time_id = var.split('_')[-1]
        print(f"Time ID: {time_id}")    

        # Extract label array
        label = np.array(label_cdf.varget(var), dtype=np.int8)
        print(f"Label shape: {label.shape}")

        # Find the matching data file
        for file in os.listdir(data_dir):
            if time_id in file:
                filepath = os.path.join(data_dir, file)
                print(f"Found file: {file}")

                try:
                    # Try to open existing CDF in write mode
                    with pycdf.CDF(filepath, readonly=False) as data_cdf:
                        if "label" in data_cdf:
                            print("Variable 'label' already exists, overwriting...")
                            data_cdf["label"][...] = label
                        else:
                            print("Creating new variable 'label'...")
                            data_cdf.new("label", data=label, type=pycdf.const.CDF_INT1)

                        # Set attributes
                        data_cdf["label"].attrs["VAR_NOTES"] = "Predicted label " + str(regions)
                        data_cdf.attrs["filename"] = f"mms1_fpi_fast_l2_dis-dist_{time_id}_v3.4.0.cdf"

                    print(f"✅ Written variable 'label' to file '{file}'")

                except CDFError as e:
                    print(f"❌ Skipping corrupted file: {file} ({e})")
                    corrupted_time_ids.append(time_id)

                break
    elif var[0] == 'e': #process epoch variables
        print(f"\nProcessing epoch variable: {var}")
        time_id = var.split('_')[-1]
        print(f"Time ID: {time_id}")    

        # Extract epoch array
        epoch = np.array(label_cdf.varget(var))
        print(f"Epoch shape: {epoch.shape}")

        # Find the matching data file
        for file in os.listdir(data_dir):
            if time_id in file:
                filepath = os.path.join(data_dir, file)
                print(f"Found file: {file}")

                try:
                    # Try to open existing CDF in write mode
                    with pycdf.CDF(filepath, readonly=False) as data_cdf:
                        print("Creating new variable 'label_epoch'...")
                        if "label_epoch" in data_cdf:
                            print("Variable 'label_epoch' already exists, overwriting...")
                            data_cdf["label_epoch"][...] = epoch
                        else:
                            print("Creating new variable 'label_epoch'...")
                            data_cdf.new("label_epoch", data=epoch, type=pycdf.const.CDF_EPOCH)

                        # Set attributes
                        data_cdf["label_epoch"].attrs["VAR_NOTES"] = "Epoch time variable"
                        data_cdf.attrs["filename"] = f"mms1_fpi_fast_l2_dis-dist_{time_id}_v3.4.0.cdf"

                    print(f"✅ Written variable 'label_epoch' to file '{file}'")

                except CDFError as e:
                    print(f"❌ Skipping corrupted file: {file} ({e})")
                    corrupted_time_ids.append(time_id)

                break
        

# Print summary of corrupted files
if corrupted_time_ids:
    print("\n⚠️ Corrupted files detected:")
    for tid in corrupted_time_ids:
        print(f" - Time ID: {tid}")
else:
    print("\n✅ No corrupted files detected.")



Processing variable: label_mms1_fpi_fast_dis_dist_20171109180000
Time ID: 20171109180000
Label shape: (1600,)
Found file: mms1_fpi_fast_l2_dis-dist_20171109180000_v3.4.0.cdf
Variable 'label' already exists, overwriting...
✅ Written variable 'label' to file 'mms1_fpi_fast_l2_dis-dist_20171109180000_v3.4.0.cdf'

Processing epoch variable: epoch_mms1_fpi_fast_dis_dist_20171109180000
Time ID: 20171109180000
Epoch shape: (1600,)
Found file: mms1_fpi_fast_l2_dis-dist_20171109180000_v3.4.0.cdf
Creating new variable 'label_epoch'...
Creating new variable 'label_epoch'...
✅ Written variable 'label_epoch' to file 'mms1_fpi_fast_l2_dis-dist_20171109180000_v3.4.0.cdf'

Processing variable: label_mms1_fpi_fast_dis_dist_20171102020000
Time ID: 20171102020000
Label shape: (1600,)
Found file: mms1_fpi_fast_l2_dis-dist_20171102020000_v3.4.0.cdf
Variable 'label' already exists, overwriting...
✅ Written variable 'label' to file 'mms1_fpi_fast_l2_dis-dist_20171102020000_v3.4.0.cdf'

Processing epoch vari