In [1]:
import os
import sys

# caution: path[0] is reserved for script path (or '' in REPL).
sys.path.insert(1, os.path.abspath("./../src"))


import datetime
import importlib

import astropy.time
import numpy as np
import pandas as pd
import tqdm
from cdflib.epochs_astropy import CDFAstropy as cdfepoch
from dateutil import rrule

import data_loader
import rbsp_chorus_tool

importlib.reload(data_loader)
importlib.reload(rbsp_chorus_tool)

pdata_folder = os.path.abspath(r"./../processed_data/chorus_neural_network/")

%matplotlib qt

In [2]:
%%time
# RBSP Chorus Preprocessing, Obtains clean chorus amplitudes


for year in range(2019, 2020):
    output_folder = os.path.join(pdata_folder, "observed_chorus")
    lower_band = True
    
    start = datetime.datetime(year=year, month=1, day=1, tzinfo=datetime.UTC)
    end = datetime.datetime(year=year+1, month=1, day=1, tzinfo=datetime.UTC)
    
    WNA_A = data_loader.load_raw_data_from_config(
        id=["RBSP", "EMFISIS", "L4", "WNA_SURVEY"],
        start=start,
        end=end,
        satellite="a",
        root_data_dir="/project/rbsp/data/",
        use_config_keys_in_subdir=False,
    )
    
    WNA_B = data_loader.load_raw_data_from_config(
        id=["RBSP", "EMFISIS", "L4", "WNA_SURVEY"],
        start=start,
        end=end,
        satellite="b",
        root_data_dir="/project/rbsp/data/",
        use_config_keys_in_subdir=False,
    )
    
    THRUSTER_EVENTS_DF_A = pd.read_csv(os.path.join(pdata_folder, "THRUSTER_EVENTS_RBSPA.csv"))
    THRUSTER_EVENTS_DF_B = pd.read_csv(os.path.join(pdata_folder, "THRUSTER_EVENTS_RBSPB.csv"))
    
    THRUSTER_START_TIMES_A = pd.to_datetime(THRUSTER_EVENTS_DF_A["Start Time"], utc=True, format="ISO8601").astype(np.int64) // 10**9
    THRUSTER_END_TIMES_A = pd.to_datetime(THRUSTER_EVENTS_DF_A["End Time"], utc=True, format="ISO8601").astype(np.int64) // 10**9
    
    THRUSTER_START_TIMES_B = pd.to_datetime(THRUSTER_EVENTS_DF_B["Start Time"], utc=True, format="ISO8601").astype(np.int64) // 10**9
    THRUSTER_END_TIMES_B = pd.to_datetime(THRUSTER_EVENTS_DF_B["End Time"], utc=True, format="ISO8601").astype(np.int64) // 10**9
    
    RBSP_A = {
    
        "WNA" : WNA_A,
        "THRUSTER_START_TIMES" : THRUSTER_START_TIMES_A,
        "THRUSTER_END_TIMES" : THRUSTER_END_TIMES_A,
        "SATID" : "A"
    }
    
    RBSP_B = {
    
        "WNA" : WNA_B,
        "THRUSTER_START_TIMES" : THRUSTER_START_TIMES_B,
        "THRUSTER_END_TIMES" : THRUSTER_END_TIMES_B,
        "SATID" : "B"
    }
    
    RBSP = [RBSP_A, RBSP_B]
    
    for PROBE in RBSP:
        
        MLT = PROBE["WNA"]["MLT"]
        MLAT = PROBE["WNA"]["MagLat"]
        L = PROBE["WNA"]["L"]
        EPOCH = PROBE["WNA"]["Epoch"]
        THRUSTER_START_TIMES = PROBE["THRUSTER_START_TIMES"]
        THRUSTER_END_TIMES = PROBE["THRUSTER_END_TIMES"]
        SATID = PROBE["SATID"]
        
        TIME = cdfepoch.unixtime(EPOCH)
        
        CHORUS = np.asarray(rbsp_chorus_tool.calculate_chorus_power(WNA_survey=PROBE["WNA"], lower=lower_band))
        
        within_epoch_range = (start.timestamp() < TIME) & (TIME < end.timestamp())
        all_valid_coordinates = (EPOCH > 0) & (0 <= MLT) & (MLT <= 24) & (0 < L) & (L < 10) & (-90 <= MLAT) & (MLAT <= 90)
        
        MLT[~(within_epoch_range & all_valid_coordinates)] = np.nan
        MLAT[~(within_epoch_range & all_valid_coordinates)] = np.nan
        L[~(within_epoch_range & all_valid_coordinates)] = np.nan
        CHORUS[~(within_epoch_range & all_valid_coordinates)] = np.nan
        
        TIME_INTERPOLATED = []
        L_INTERPOLATED = []
        MLT_INTERPOLATED = []
        MLAT_INTERPOLATED = []
        CHORUS_INTERPOLATED = []
        
        for p in tqdm.tqdm(range(len(TIME) - 1)):
        
            t1 = TIME[p]
            t2 = TIME[p + 1]
        
            if t2 - t1 < 60.0:
        
                t_points = np.arange(t1, t2 + 5, step=5, dtype=np.float64)
        
                TIME_INTERPOLATED.extend(t_points)
                L_INTERPOLATED.extend(
                    np.interp(
                        x=t_points,
                        xp=[t1, t2],
                        fp=[L[p], L[p + 1]],
                        left=np.nan,
                        right=np.nan,
                    )
                )
        
                x_int = np.interp(
                    t_points,
                    xp=[t1, t2],
                    fp=[np.cos(MLT[p] * 2 * np.pi / 24.0), np.cos(MLT[p+1] * 2 * np.pi / 24.0)],
                    left=np.nan,
                    right=np.nan,
                )
                
                y_int = np.interp(
                    t_points,
                    xp=[t1, t2],
                    fp=[np.sin(MLT[p] * 2 * np.pi / 24.0), np.sin(MLT[p+1] * 2 * np.pi / 24.0)],
                    left=np.nan,
                    right=np.nan,
                )
                
                angle = np.mod(np.arctan2(y_int, x_int) + 2 * np.pi, 2 * np.pi)
                MLT_INTERPOLATED.extend((angle * 24) / (2 * np.pi))
        
                MLAT_INTERPOLATED.extend(
                    np.interp(
                        x=t_points,
                        xp=[t1, t2],
                        fp=[MLAT[p], MLAT[p + 1]],
                        left=np.nan,
                        right=np.nan,
                    )
                )
        
                CHORUS_INTERPOLATED.extend(
                    np.interp(
                        x=t_points,
                        xp=[t1, t2],
                        fp=[CHORUS[p], CHORUS[p + 1]],
                        left=np.nan,
                        right=np.nan,
                    )
                )
    
        TIME = np.array(TIME_INTERPOLATED)
        L = np.array(L_INTERPOLATED)
        MLT = np.array(MLT_INTERPOLATED)
        MLAT = np.array(MLAT_INTERPOLATED)
        CHORUS = np.array(CHORUS_INTERPOLATED)
    
        print("\nShapes before cleaning thruster events and removing NaNs:")
        print(TIME.shape)
        print(L.shape)
        print(MLT.shape)
        print(MLAT.shape)
        print(CHORUS.shape)
    
        for START_TIME, END_TIME in zip(THRUSTER_START_TIMES, THRUSTER_END_TIMES):
            TIME[(START_TIME <= TIME) & (TIME <= END_TIME)] = np.nan
        
        NOT_NAN = (
            np.isfinite(TIME)
            & np.isfinite(L)
            & np.isfinite(MLT)
            & np.isfinite(MLAT)
            & np.isfinite(CHORUS)
        )
    
        print(f"Number of points in thruster events : {np.sum(np.isnan(TIME))}")
        print(f"Number of CHORUS that were NAN: {np.sum(np.isnan(CHORUS))}")
    
        
        TIME = TIME[NOT_NAN]
        L = L[NOT_NAN]
        MLT = MLT[NOT_NAN]
        MLAT = MLAT[NOT_NAN]
        CHORUS = CHORUS[NOT_NAN]
    
        print("\nShapes after cleaning thruster events and removing NaNs:\n")
        print(TIME.shape)
        print(L.shape)
        print(MLT.shape)
        print(MLAT.shape)
        print(CHORUS.shape)
        
        print("\n")
    
        if lower_band:
            np.savez(
                file=os.path.abspath(os.path.join(output_folder, f"observed_chorus_{year}_{SATID}_LOWER_BAND.npz")),
                UNIX_TIME=TIME,
                MLT=MLT,
                MLAT=MLAT,
                L=L,
                CHORUS=CHORUS,
            )
        else:
            np.savez(
                file=os.path.abspath(os.path.join(output_folder, f"observed_chorus_{year}_{SATID}_UPPER_BAND.npz")),
                UNIX_TIME=TIME,
                MLT=MLT,
                MLAT=MLAT,
                L=L,
                CHORUS=CHORUS,
            )


100%|████████████████████████████████████████████████████████████████████████████████████████| 4118381/4118381 [19:05<00:00, 3594.80it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████| 4118380/4118380 [01:39<00:00, 41214.69it/s]



Shapes before cleaning thruster events and removing NaNs:
(12355097,)
(12355097,)
(12355097,)
(12355097,)
(12355097,)
Number of points in thruster events : 97205
Number of CHORUS that were NAN: 4788436

Shapes after cleaning thruster events and removing NaNs:

(7501859,)
(7501859,)
(7501859,)
(7501859,)
(7501859,)




100%|████████████████████████████████████████████████████████████████████████████████████████| 2829324/2829324 [13:11<00:00, 3574.00it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████| 2829323/2829323 [01:08<00:00, 41541.48it/s]



Shapes before cleaning thruster events and removing NaNs:
(8487963,)
(8487963,)
(8487963,)
(8487963,)
(8487963,)
Number of points in thruster events : 76322
Number of CHORUS that were NAN: 3266300

Shapes after cleaning thruster events and removing NaNs:

(5170780,)
(5170780,)
(5170780,)
(5170780,)
(5170780,)


CPU times: user 35min 29s, sys: 50.7 s, total: 36min 20s
Wall time: 37min 23s


In [None]:
# Removing solar proton events!

VERSION = "v1"
MODEL_TYPE = "LOWER_BAND"


pdata_folder = os.path.abspath(r"./../processed_data/chorus_neural_network/")
model_folder = os.path.join(pdata_folder, "models", VERSION)

dataset = np.load(
    os.path.join(model_folder, rf"dataset_{VERSION}_{MODEL_TYPE}.npz")
)

CONJUNCTIONS = dataset["CONJUNCTIONS"]

dataset.close()

SOLAR_PROTON_EVENT_LIST = pd.read_csv(
    os.path.join(pdata_folder, r"SOLAR_PROTON_EVENT_LIST_1976_2024.csv")
)

In [None]:
"""CONJUNCTION = [
    CHUNK_TIME + (T_SIZE / 2.0),
    AVG_L_POES[x_bin, y_bin],
    AVG_MLT_POES[x_bin, y_bin],
    AVG_FLUX_0[x_bin, y_bin],
    AVG_FLUX_1[x_bin, y_bin],
    AVG_FLUX_2[x_bin, y_bin],
    AVG_FLUX_3[x_bin, y_bin],
    AVG_FLUX_4[x_bin, y_bin],
    AVG_FLUX_5[x_bin, y_bin],
    AVG_FLUX_6[x_bin, y_bin],
    AVG_FLUX_7[x_bin, y_bin],
    CHUNK_TIME + (T_SIZE / 2.0),
    AVG_L_RBSP[x_bin, y_bin, z_bin],  # LSTAR OF RBSP POINT CHOSEN
    AVG_MLT_RBSP[x_bin, y_bin, z_bin],  # DIFFERENCE IN MLT FOUND
    AVG_MLAT_RBSP[x_bin, y_bin, z_bin],
    AVG_CHORUS[x_bin, y_bin, z_bin],  # CHORUS OBSERVED
    AVG_DENSITY_RBSP[x_bin, y_bin, z_bin],
    SME_MEAN,
    SME_VARIATION,
    OMNI["AVG_B"],
    OMNI["FLOW_SPEED"],
    OMNI["PROTON_DENSITY"],
    OMNI["SYM_H"]]"""

order_to_sort_conjunctions = np.argsort(
    CONJUNCTIONS[:, 0]
)  # Sorted based on POES Conjunction time!
SORTED_CONJUNCTIONS = CONJUNCTIONS[order_to_sort_conjunctions, :]

print(f"Starting shape of conjunctions list: {SORTED_CONJUNCTIONS.shape}")

SORTED_POES_CONJUNCTION_TIMES = SORTED_CONJUNCTIONS[:, 0]

START_OF_SEP_EVENTS_UTC = SOLAR_PROTON_EVENT_LIST["START"]
END_OF_SEP_EVENTS_UTC = SOLAR_PROTON_EVENT_LIST["END"]
ZIPPED_EVENTS = list(zip(START_OF_SEP_EVENTS_UTC, END_OF_SEP_EVENTS_UTC))

print("Removing high energy solar proton events!")

for SEP_EVENT in tqdm.tqdm(range(len(ZIPPED_EVENTS))):

    START = ZIPPED_EVENTS[SEP_EVENT][0].strip()
    END = ZIPPED_EVENTS[SEP_EVENT][1].strip()

    START_YMDHMS = {
        "year": int(START[0:4]),
        "month": int(START[5:7]),
        "day": int(START[8:10]),
        "hour": int(START[11:13]),
        "minute": int(START[13:15]),
        "second": 0,
    }
    END_YMDHMS = {
        "year": int(END[0:4]),
        "month": int(END[5:7]),
        "day": int(END[8:10]),
        "hour": int(END[11:13]),
        "minute": int(END[13:15]),
        "second": 0,
    }

    START_UNIX = astropy.time.Time(START_YMDHMS, format="ymdhms", scale="utc").unix
    END_UNIX = astropy.time.Time(END_YMDHMS, format="ymdhms", scale="utc").unix

    RANGE_TO_REMOVE = np.searchsorted(a=SORTED_POES_CONJUNCTION_TIMES, v=[START_UNIX, END_UNIX])

    SORTED_CONJUNCTIONS = np.vstack(
        (
            SORTED_CONJUNCTIONS[0 : RANGE_TO_REMOVE[0], :],
            SORTED_CONJUNCTIONS[RANGE_TO_REMOVE[1] :, :],
        )
    )

print("Finished removing high energy solar proton events!")

print("Saving!")

CLEANED_CONJUNCTIONS = SORTED_CONJUNCTIONS  # Should be cleaned by now!

np.savez(
    file=os.path.join(model_folder, rf"spe_removed_dataset_{VERSION}_{FIELD_MODEL}_{MODEL_TYPE}.npz"),
    CONJUNCTIONS=CLEANED_CONJUNCTIONS,
)

C_POES_TIME = CLEANED_CONJUNCTIONS[:, 0]
C_POES_LSTAR = CLEANED_CONJUNCTIONS[:, 1]
C_POES_MLT = CLEANED_CONJUNCTIONS[:, 2]
C_POES_FLUX = CLEANED_CONJUNCTIONS[:, 3:-12]
C_RBSP_TIME = CLEANED_CONJUNCTIONS[:, -12]
C_RBSP_LSTAR = CLEANED_CONJUNCTIONS[:, -11]
C_RBSP_MLT = CLEANED_CONJUNCTIONS[:, -10]
C_RBSP_MLAT = CLEANED_CONJUNCTIONS[:, -9]
C_RBSP_CHORUS = CLEANED_CONJUNCTIONS[:, -8]
C_AVG_SME = CLEANED_CONJUNCTIONS[:, -6]
C_VAR_SME = CLEANED_CONJUNCTIONS[:, -5]
C_AVG_AVG_B = CLEANED_CONJUNCTIONS[:, -4]
C_AVG_FLOW_SPEED = CLEANED_CONJUNCTIONS[:, -3]
C_AVG_PROTON_DENSITY = CLEANED_CONJUNCTIONS[:, -2]
C_AVG_SYM_H = CLEANED_CONJUNCTIONS[:, -1]

print("Creating documentation of dataset!")


with open(
    os.path.join(model_folder, rf"spe_removed_dataset_{VERSION}_{FIELD_MODEL}_{MODEL_TYPE}.txt"),
    "w",
) as f:

    f.write("\nConjunctions:\n")
    f.write(f"Number of conjunctions: {CLEANED_CONJUNCTIONS.shape[0]} [#]\n")
    f.write(
        f"Number lost from cleaning solar proton events: {CONJUNCTIONS.shape[0] - CLEANED_CONJUNCTIONS.shape[0]} [#]\n"
    )
    f.write(f"Minimum RBSP Time: {np.min(C_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum RBSP Time: {np.max(C_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Minimum POES Time: {np.min(C_POES_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum POES Time: {np.max(C_POES_TIME)} [seconds since unix epoch]\n")

    f.write("\nL:\n")
    f.write(f"Mean Difference: {np.mean(C_POES_LSTAR - C_RBSP_LSTAR)} [L]\n")
    f.write(f"Standard deviation of Difference {np.std(C_POES_LSTAR - C_RBSP_LSTAR)} [L]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(C_POES_LSTAR - C_RBSP_LSTAR))} [L]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(C_POES_LSTAR - C_RBSP_LSTAR))} [L]\n")

    f.write("\nMLT: \n")
    f.write(f"Mean Absolute Difference: {np.mean(C_POES_MLT - C_RBSP_MLT)} [MLT]\n")
    f.write(f"Standard deviation of Absolute Difference {np.std(C_POES_MLT - C_RBSP_MLT)} [MLT]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(C_POES_MLT - C_RBSP_MLT))} [MLT]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(C_POES_MLT - C_RBSP_MLT))} [MLT]\n")

    f.write("\nMLAT: \n")
    f.write(f"Mean: {np.mean(C_RBSP_MLAT)} [degrees]\n")
    f.write(f"Standard Deviation: {np.std(C_RBSP_MLAT)} [degrees]\n")
    f.write(f"Minimum: {np.min(C_RBSP_MLAT)} [degrees]\n")
    f.write(f"Maximum: {np.max(C_RBSP_MLAT)} [degrees]\n")

    f.write("\nTime: \n")
    f.write(f"Mean Difference: {np.mean(C_POES_TIME - C_RBSP_TIME)} [s]\n")
    f.write(f"Standard deviation of Difference {np.std(C_POES_TIME - C_RBSP_TIME)} [s]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(C_POES_TIME - C_RBSP_TIME))} [s]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(C_POES_TIME - C_RBSP_TIME))} [s]\n")

    f.write(f"\n{MODEL_TYPE} Chorus: \n")
    f.write(f"Mean: {np.mean(C_RBSP_CHORUS)} [pT]\n")
    f.write(f"Standard Deviation: {np.std(C_RBSP_CHORUS)} [pT]\n")
    f.write(f"Minimum: {np.min(C_RBSP_CHORUS)} [pT]\n")
    f.write(f"Maximum: {np.max(C_RBSP_CHORUS)} [pT]\n")

    f.write("\nSME: \n")
    f.write(f"Mean: {np.mean(C_AVG_SME)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_SME)} [nT]\n")
    f.write(f"Minimum: {np.min(C_AVG_SME)} [nT]\n")
    f.write(f"Maximum: {np.max(C_AVG_SME)} [nT]\n")

    f.write("\nSME STD: \n")
    f.write(f"Mean: {np.mean(np.sqrt(C_VAR_SME))} [nT]\n")
    f.write(f"Standard Deviation: {np.std(np.sqrt(C_VAR_SME))} [nT]\n")
    f.write(f"Minimum: {np.min(np.sqrt(C_VAR_SME))} [nT]\n")
    f.write(f"Maximum: {np.max(np.sqrt(C_VAR_SME))} [nT]\n")

    f.write("\nAVG_B: \n")
    f.write(f"Mean: {np.mean(C_AVG_AVG_B)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_AVG_B)} [nT]\n")
    f.write(f"Minimum: {np.min(C_AVG_AVG_B)} [nT]\n")
    f.write(f"Maximum: {np.max(C_AVG_AVG_B)} [nT]\n")

    f.write("\nFlow Speed: \n")
    f.write(f"Mean: {np.mean(C_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Minimum: {np.min(C_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Maximum: {np.max(C_AVG_FLOW_SPEED)} [km/s]\n")

    f.write("\nProton Density: \n")
    f.write(f"Mean: {np.mean(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Minimum: {np.min(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Maximum: {np.max(C_AVG_PROTON_DENSITY)} [n/cc]\n")

    f.write("\nSYM_H: \n")
    f.write(f"Mean: {np.mean(C_AVG_SYM_H)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_SYM_H)} [nT]\n")
    f.write(f"Minimum: {np.min(C_AVG_SYM_H)} [nT]\n")
    f.write(f"Maximum: {np.max(C_AVG_SYM_H)} [nT]\n")

print("Finished!")
print(f"Ending shape of conjunctions : {CLEANED_CONJUNCTIONS.shape}")

In [None]:
# Load POES Data

POES = data_loader.load_raw_data_from_config(
    id=["POES", "SEM", "MPE"],
    start=datetime.datetime(year=2000, month=1, day=1),
    end=datetime.datetime(year=2000, month=1, day=2),
    satellite="n15",
)

ENERGIES = POES["energy"][0]
DIFF_E = ENERGIES[2:8] - ENERGIES[1:7]
# -----------------------------------------------------------------------
# Create datasets used for training, testing, etc


print(CONJUNCTIONS.shape)

mission_start_date = datetime.datetime(year=2012, month=8, day=30, tzinfo=datetime.UTC)
mission_end_date = datetime.datetime(year=2019, month=10, day=19, tzinfo=datetime.UTC)

C_POES_TIME = CONJUNCTIONS[:, 0]

validation_start_date = datetime.datetime(year=2016, month=2, day=1, tzinfo=datetime.UTC)
validation_end_date = datetime.datetime(year=2016, month=3, day=1, tzinfo=datetime.UTC)
validation_times = (validation_start_date.timestamp() < C_POES_TIME) & (C_POES_TIME < validation_end_date.timestamp())

within_mission_time = (mission_start_date.timestamp() < C_POES_TIME) & (C_POES_TIME < mission_end_date.timestamp())

train_test_subset_selected = ~validation_times & within_mission_time
validation_subset_selected = validation_times & within_mission_time

DAY = np.zeros(shape=(C_POES_TIME.shape[0]))
print("Identifying Days of Data Points....")
for DAY_ID, dt in enumerate(rrule.rrule(rrule.DAILY, dtstart=mission_start_date, until=mission_end_date)):

    within_day = (dt.timestamp() <= C_POES_TIME) & (C_POES_TIME < (dt + datetime.timedelta(days=1)).timestamp())
    DAY[within_day] = DAY_ID

print(f"Min day: {np.min(DAY)}")
print(f"Max day: {np.max(DAY)}")

print(f"Number of conjunctions in validation set: {np.count_nonzero(validation_times)}")

C_POES_TIME = np.expand_dims(CONJUNCTIONS[:, 0], axis=1)
C_POES_LSTAR = np.expand_dims(CONJUNCTIONS[:, 1], axis=1)
C_POES_MLT = np.expand_dims(CONJUNCTIONS[:, 2], axis=1)

C_POES_FLUX = CONJUNCTIONS[:, 3:-12][:, 1:7]
C_POES_FLUX_INTEGRATED = np.expand_dims(np.sum(C_POES_FLUX * DIFF_E, axis=1), axis=1)

C_RBSP_TIME = np.expand_dims(CONJUNCTIONS[:, -12], axis=1)
C_RBSP_LSTAR = np.expand_dims(CONJUNCTIONS[:, -11], axis=1)
C_RBSP_MLT = np.expand_dims(CONJUNCTIONS[:, -10], axis=1)
C_RBSP_MLAT = np.expand_dims(CONJUNCTIONS[:, -9], axis=1)
C_RBSP_CHORUS = np.expand_dims(CONJUNCTIONS[:, -8], axis=1)
C_RBSP_DENSITY = np.expand_dims(CONJUNCTIONS[:, -7], axis=1)
C_AVG_SME = np.expand_dims(CONJUNCTIONS[:, -6], axis=1)
C_VAR_SME = np.expand_dims(CONJUNCTIONS[:, -5], axis=1)
C_AVG_AVG_B = np.expand_dims(CONJUNCTIONS[:, -4], axis=1)
C_AVG_FLOW_SPEED = np.expand_dims(CONJUNCTIONS[:, -3], axis=1)
C_AVG_PROTON_DENSITY = np.expand_dims(CONJUNCTIONS[:, -2], axis=1)
C_AVG_SYM_H = np.expand_dims(CONJUNCTIONS[:, -1], axis=1)

print(C_RBSP_TIME.shape)
print(C_RBSP_LSTAR.shape)
print(C_RBSP_CHORUS.shape)
print(C_RBSP_DENSITY.shape)
print(C_RBSP_MLAT.shape)
print(C_POES_TIME.shape)
print(C_POES_LSTAR.shape)
print(C_POES_MLT.shape)
print(C_RBSP_MLT.shape)
print(C_POES_FLUX_INTEGRATED.shape)
print(C_AVG_SME.shape)
print(C_VAR_SME.shape)
print(C_AVG_AVG_B.shape)
print(C_AVG_FLOW_SPEED.shape)
print(C_AVG_PROTON_DENSITY.shape)
print(C_AVG_SYM_H.shape)

mean_LSTAR = np.nanmean(C_POES_LSTAR)
std_LSTAR = np.std(C_POES_LSTAR)

mean_MLAT = np.nanmean(C_RBSP_MLAT)
std_MLAT = np.std(C_RBSP_MLAT)

mean_fluxes = np.log10(np.nanmean(C_POES_FLUX))
std_fluxes = np.log10(np.nanstd(C_POES_FLUX))

mean_density = np.nanmean(C_RBSP_DENSITY)
std_density = np.nanstd(C_RBSP_DENSITY)

mean_sme = np.log10(np.nanmean(C_AVG_SME))
std_sme = np.log10(np.std(C_AVG_SME))

mean_avg_b = np.nanmean(C_AVG_AVG_B)
std_avg_b = np.std(C_AVG_AVG_B)

mean_flow_speed = np.nanmean(C_AVG_FLOW_SPEED)
std_flow_speed = np.std(C_AVG_FLOW_SPEED)

mean_avg_proton_density = np.nanmean(C_AVG_PROTON_DENSITY)
std_avg_proton_density = np.std(C_AVG_PROTON_DENSITY)

mean_avg_sym_h = np.nanmean(C_AVG_SYM_H)
std_avg_sym_h = np.std(C_AVG_SYM_H)


L_MAX = 9

FEATURES = np.hstack(
    (
        C_POES_LSTAR,
        C_POES_LSTAR * np.cos((C_POES_MLT * 2 * np.pi) / 24.0),
        C_POES_LSTAR * np.sin((C_POES_MLT * 2 * np.pi) / 24.0),
        C_RBSP_MLAT,
        C_RBSP_DENSITY,
        C_POES_FLUX_INTEGRATED,
        C_AVG_SME,
        C_VAR_SME,
        C_AVG_AVG_B
    )
)

chorus_greater_than_p1_pT = (1e-1 < C_RBSP_CHORUS.flatten())
poes_greater_than_1000 = (100 < C_POES_FLUX_INTEGRATED.flatten())
FEATURES_T = FEATURES[train_test_subset_selected & chorus_greater_than_p1_pT & poes_greater_than_1000, :]
FEATURES_V = FEATURES[validation_subset_selected & chorus_greater_than_p1_pT & poes_greater_than_1000, :]

MODEL_LABELS = C_RBSP_CHORUS[train_test_subset_selected & chorus_greater_than_p1_pT & poes_greater_than_1000]
MODEL_LABELS_V = C_RBSP_CHORUS[validation_subset_selected & chorus_greater_than_p1_pT & poes_greater_than_1000]

DAY_T = DAY[train_test_subset_selected & chorus_greater_than_p1_pT & poes_greater_than_1000]
DAY_V = DAY[validation_subset_selected & chorus_greater_than_p1_pT & poes_greater_than_1000]

print(FEATURES_T.shape)
print(MODEL_LABELS.shape)
print(FEATURES_V.shape)
print(MODEL_LABELS_V.shape)

np.savez(
    file=os.path.join(model_folder, f"MODEL_READY_dataset_{VERSION}_{FIELD_MODEL}_{MODEL_TYPE}.npz"),
    FEATURES=FEATURES_T,
    LABELS=MODEL_LABELS,
    VALIDATION_FEATURES=FEATURES_V,
    VALIDATION_LABELS=MODEL_LABELS_V,
    TRAINING_DAY_IDS=DAY_T,
    VALIDATION_DAY_IDS=DAY_V,
    TRAINING_MLT=C_POES_MLT,
    MEAN_L=mean_LSTAR,
    STD_L=std_LSTAR,
    MEAN_MLAT=mean_MLAT,
    STD_MLAT=std_MLAT,
    MEAN_DENSITY=mean_density,
    STD_DENSITY=std_density,
    MEAN_FLUXES=mean_fluxes,
    STD_FLUXES=std_fluxes,
    MEAN_SME=mean_sme,
    STD_SME=std_sme,
    MEAN_AVG_B=mean_avg_b,
    STD_AVG_B=std_avg_b,
    MEAN_FLOW_SPEED=mean_flow_speed,
    STD_FLOW_SPEED=std_flow_speed,
    MEAN_AVG_PROTON_DENSITY=mean_avg_proton_density,
    STD_AVG_PROTON_DENSITY=std_avg_proton_density,
    MEAN_AVG_SYM_H=mean_avg_sym_h,
    STD_AVG_SYM_H=std_avg_sym_h,
)