In [1]:
import sys
import os
# caution: path[0] is reserved for script path (or '' in REPL).
sys.path.insert(1, os.path.abspath('./../src'))


from cdflib.epochs_astropy import CDFAstropy as cdfepoch
import astropy.time
import data_loader
import datetime
import matplotlib.pyplot as plt
import multiprocessing as mp
import numpy as np
import pandas as pd
import tqdm

import IRBEM

import data_loader
import rbsp_chorus_tool
import chorus_machine_learning_helper

import importlib
importlib.reload(data_loader)
importlib.reload(rbsp_chorus_tool)

%matplotlib qt

In [None]:
#STAGE 0 DATA VERIFICATION FOR POES LSTAR CALCULATIONS

year = 2012
SATID = "m02"
refs = np.load(fr"./../processed_data_chorus_neural_network/STAGE_0/MPE_DATA_PREPROCESSED_WITH_LSTAR/MPE_PREPROCESSED_DATA_T89_{year}.npz", allow_pickle=True)

DATA = refs["DATA"].flatten()[0]
print(DATA)
SAT = DATA[SATID]
print(type(SAT))

dt_for_all = np.array([datetime.datetime.fromtimestamp(t) for t in SAT["UNIX_TIME"]])

plt.plot(dt_for_all, SAT["Lstar"], label="L*", color = "red", marker="*")
plt.plot(dt_for_all, SAT["L"], label = "IGRF Lm", color = "black", marker="*")
plt.ylabel("|L|")
plt.xlabel("Time")
plt.title(f"Some Orbits for {SATID} in {year}")
plt.legend()

plt.show()

In [12]:
year = 2013
sat = "a"
refs = np.load(fr"./../processed_data/chorus_neural_network/STAGE_1/Lstar/RBSP_{sat.upper()}_T89_{year}.npz", allow_pickle=True)

OMNI = data_loader.load_raw_data_from_config(id = ["OMNI", "ONE_HOUR_RESOLUTION"],
                                                 start = datetime.datetime(year = year, month = 1, day = 1),
                                                 end = datetime.datetime(year = year + 1, month = 1, day = 1), 
                                                 root_data_dir = "./../raw_data/")

OMNI_TIME = cdfepoch.unixtime(OMNI["Epoch"])
KP = OMNI["KP"].astype(np.float64)

invalid_omni_times = (OMNI_TIME < 0) | (KP < 0) | (KP >= 99) | np.isnan(KP) | np.isnan(OMNI_TIME)
KP[invalid_omni_times] = np.nan
    
KP_INTERPOLATED = np.interp(refs["UNIX_TIME"], OMNI_TIME, KP, left = np.nan, right = np.nan)    


fig, axs = plt.subplots(2, 1, sharex=True)

dates = np.array([datetime.datetime.fromtimestamp(t) for t in refs["UNIX_TIME"]])


axs[0].plot(dates, refs["Lstar"])
axs[1].plot(dates, KP_INTERPOLATED)
plt.xlabel("Time (UTC)")
axs[0].set_ylabel("L*")
axs[1].set_ylabel("KP-Index")

plt.show()

In [2]:
#Interface for stage 1, Designed to do a year at a time

year = 2019

In [3]:
#Stage 1 RBSP Chorus Preprocessing, Obtains clean chorus amplitudes

#start = datetime.datetime(year = year, month = 1, day = 1)
#end = datetime.datetime(year = year + 1, month = 1, day = 1)

start = datetime.datetime(year = 2019, month = 1, day = 1)
end = datetime.datetime(year = 2019, month = 10, day = 13, hour = 23, minute = 59, second = 59)

WNA_survey_a = data_loader.load_raw_data_from_config(id=["RBSP", "EMFISIS", "L4", "WNA_SURVEY"],
                                                     start=start,
                                                     end=end,
                                                     satellite="a",
                                                     root_data_dir="/project/rbsp/data/",
                                                     use_config_keys_in_subdir=False)

WNA_survey_b = data_loader.load_raw_data_from_config(id=["RBSP", "EMFISIS", "L4", "WNA_SURVEY"],
                                                     start=start,
                                                     end=end,
                                                     satellite="b",
                                                     root_data_dir="/project/rbsp/data/",
                                                     use_config_keys_in_subdir=False)

WFR_spectral_matrix_a = data_loader.load_raw_data_from_config(id=["RBSP", "EMFISIS", "L2", "WFR_SPECTRAL_MATRIX_DIAGONAL"],
                                                              start=start,
                                                              end=end,
                                                              satellite="a",
                                                              root_data_dir="/project/rbsp/data/",
                                                              use_config_keys_in_subdir=False)

WFR_spectral_matrix_b = data_loader.load_raw_data_from_config(id=["RBSP", "EMFISIS", "L2", "WFR_SPECTRAL_MATRIX_DIAGONAL"],
                                                              start=start,
                                                              end=end,
                                                              satellite="b",
                                                              root_data_dir="/project/rbsp/data/",
                                                              use_config_keys_in_subdir=False)

num_wna_files_A = len(WNA_survey_a["timestamps_per_file"])
num_wna_files_B = len(WNA_survey_b["timestamps_per_file"])
num_wfr_files_A = WFR_spectral_matrix_a["WFR_bandwidth"].shape[0]
num_wfr_files_B = WFR_spectral_matrix_b["WFR_bandwidth"].shape[0]

print(f"Number of files loaded: {num_wna_files_A, num_wna_files_B, num_wfr_files_A, num_wfr_files_B}")

if len({num_wna_files_A, num_wfr_files_A}) != 1:
    raise Exception("The same number of days wasn't loaded for RBSP-A!")

if len({num_wna_files_B, num_wfr_files_B}) != 1:
    raise Exception("The same number of days wasn't loaded for RBSP-B!")

Number of files loaded: (286, 197, 286, 197)


In [4]:
mlt_A = WNA_survey_a["MLT"]
L_A = WNA_survey_a["L"]
epoch_A = WNA_survey_a["Epoch"]

time_A = astropy.time.Time(cdfepoch.to_datetime(epoch_A), format="datetime").utc

chorus_A = rbsp_chorus_tool.iterate_through_days_and_calculate_chorus_amplitudes(WNA_survey = WNA_survey_a,
                                                                                 WFR_spectral_matrix = WFR_spectral_matrix_a)

lower_band_chorus_A = chorus_A["Lower_Band"]
upper_band_chorus_A = chorus_A["Upper_Band"]

within_epoch_range_A = (start < time_A) & (time_A < end)
finite_chorus_A = np.isfinite(lower_band_chorus_A) & np.isfinite(upper_band_chorus_A)
#This line might not be necessary but we want to train on clean data, literally any np.nan will fuck it ALL up. Ill probably double check before training
all_valid_coordinates_A = (epoch_A > 0) & (0 <= mlt_A) & (mlt_A <= 24) & (0 < L_A) & (L_A < 10)

epoch_A = epoch_A[within_epoch_range_A & finite_chorus_A & all_valid_coordinates_A]
L_A = L_A[within_epoch_range_A & finite_chorus_A & all_valid_coordinates_A]
mlt_A = mlt_A[within_epoch_range_A & finite_chorus_A & all_valid_coordinates_A]
lower_band_chorus_A = lower_band_chorus_A[within_epoch_range_A & finite_chorus_A & all_valid_coordinates_A]
upper_band_chorus_A = upper_band_chorus_A[within_epoch_range_A & finite_chorus_A & all_valid_coordinates_A]

mlt_B = WNA_survey_b["MLT"]
L_B = WNA_survey_b["L"]
epoch_B = WNA_survey_b["Epoch"]
time_B = astropy.time.Time(cdfepoch.to_datetime(epoch_B), format="datetime").utc

chorus_B = rbsp_chorus_tool.iterate_through_days_and_calculate_chorus_amplitudes(WNA_survey = WNA_survey_b,
                                                                                 WFR_spectral_matrix = WFR_spectral_matrix_b)

lower_band_chorus_B = chorus_B["Lower_Band"]
upper_band_chorus_B = chorus_B["Upper_Band"]

within_epoch_range_B = (start < time_B) & (time_B < end)
finite_chorus_B = np.isfinite(lower_band_chorus_B) & np.isfinite(upper_band_chorus_B)
all_valid_coordinates_B = (epoch_B > 0) & (0 <= mlt_B) & (mlt_B <= 24) & (0 < L_B) & (L_B < 10)

epoch_B = epoch_B[within_epoch_range_B & finite_chorus_B & all_valid_coordinates_B]
L_B = L_B[within_epoch_range_B & finite_chorus_B & all_valid_coordinates_B]
mlt_B = mlt_B[within_epoch_range_B & finite_chorus_B & all_valid_coordinates_B]
lower_band_chorus_B = lower_band_chorus_B[within_epoch_range_B & finite_chorus_B & all_valid_coordinates_B]
upper_band_chorus_B = upper_band_chorus_B[within_epoch_range_B & finite_chorus_B & all_valid_coordinates_B]


print(epoch_A.shape)
print(lower_band_chorus_A.shape)
print(upper_band_chorus_A.shape)
print(L_A.shape)
print(mlt_A.shape)

print(epoch_B.shape)
print(lower_band_chorus_B.shape)
print(upper_band_chorus_B.shape)
print(L_B.shape)
print(mlt_B.shape)

(2946636,)
(2946636,)
(2946636,)
(2946636,)
(2946636,)
(2043735,)
(2043735,)
(2043735,)
(2043735,)
(2043735,)


In [None]:
#Save the RBSP stage 1 data, might honestly only need one stage
np.savez(file = os.path.abspath(f"./../processed_data/chorus_neural_network/STAGE_1/CHORUS/RBSP_OBSERVED_CHORUS_{year}.npz"), 
         EPOCH_A = epoch_A, 
         MLT_A = mlt_A, 
         L_A = L_A, 
         LOWER_BAND_CHORUS_A = lower_band_chorus_A,
         UPPER_BAND_CHORUS_A = upper_band_chorus_A,
         EPOCH_B = epoch_B, 
         MLT_B = mlt_B, 
         L_B = L_B, 
         LOWER_BAND_CHORUS_B = lower_band_chorus_B,
         UPPER_BAND_CHORUS_B = upper_band_chorus_B)


In [2]:
#Stage 2, clean then combine RBSP, OMNI, and POES Data and find conjunctions between RBSP and POES

VERSION = "v1a"
FIELD_MODEL = "T89"

MAX_L_DIFF = 0.10
MAX_MLT_DIFF = 1.5
MAX_T_DIFF_SEC = 30

L_SCALE = (1.0 / MAX_L_DIFF)**2
MLT_SCALE = (1.0 / MAX_MLT_DIFF)**2
TIME_SCALE = (1.0 / MAX_T_DIFF_SEC)**2

CONJUNCTIONS_TOTAL = []

for _year in range(2012, 2020, 1):
    
    print(f"Began processing year : {_year}")
    
    #LOAD THE OBSERVED CHORUS
    print(f"Began loading RBSP Data for year: {_year}")
    refs = np.load(fr"./../chorus_neural_network/STAGE_1/CHORUS/RBSP_OBSERVED_CHORUS_{_year}.npz", allow_pickle=True)
    RBSP_A = {}
    RBSP_A["EPOCH"] = refs["EPOCH_A"]
    RBSP_A["MLT"] = refs["MLT_A"]
    RBSP_A["L"] = refs["L_A"]
    RBSP_A["LOWER_BAND"] = refs["LOWER_BAND_CHORUS_A"]
    RBSP_A["UPPER_BAND"] = refs["UPPER_BAND_CHORUS_A"]

    RBSP_B = {}
    RBSP_B["EPOCH"] = refs["EPOCH_B"]
    RBSP_B["MLT"] = refs["MLT_B"]
    RBSP_B["L"] = refs["L_B"]
    RBSP_B["LOWER_BAND"] = refs["LOWER_BAND_CHORUS_B"]
    RBSP_B["UPPER_BAND"] = refs["UPPER_BAND_CHORUS_B"]
    
    refs.close()
    
    RBSP_A["UNIX_TIME"] = cdfepoch.unixtime(RBSP_A["EPOCH"])
    RBSP_B["UNIX_TIME"] = cdfepoch.unixtime(RBSP_B["EPOCH"])
    
    #LOAD THE LSTAR AND INTERPOLATE
    refs_A = np.load(fr"./../chorus_neural_network/STAGE_1/Lstar/RBSP_A_{FIELD_MODEL}_{_year}.npz", allow_pickle=True)
    
    MAGEPHEM_TIME_A = refs_A["UNIX_TIME"]
    MAGEPHEM_LSTAR_A = refs_A["Lstar"]
    MAGEPHEM_L_A = refs_A["L"]
    
    refs_A.close()
    
    refs_B = np.load(fr"./../chorus_neural_network/STAGE_1/Lstar/RBSP_B_{FIELD_MODEL}_{_year}.npz", allow_pickle=True)
    
    MAGEPHEM_TIME_B = refs_B["UNIX_TIME"]
    MAGEPHEM_LSTAR_B = refs_B["Lstar"]
    MAGEPHEM_L_B = refs_B["L"]
    
    refs_B.close()
    
    #PREPROCESS DATA
    
    RBSP_A["LSTAR"] = np.interp(RBSP_A["UNIX_TIME"], MAGEPHEM_TIME_A, MAGEPHEM_LSTAR_A, left = np.nan, right = np.nan)
    RBSP_B["LSTAR"] = np.interp(RBSP_B["UNIX_TIME"], MAGEPHEM_TIME_B, MAGEPHEM_LSTAR_B, left = np.nan, right = np.nan)
    
    order_A = np.argsort(RBSP_A["UNIX_TIME"])
    order_B = np.argsort(RBSP_B["UNIX_TIME"])
    
    RBSP_A["UNIX_TIME"] = RBSP_A["UNIX_TIME"][order_A]
    RBSP_A["EPOCH"] = RBSP_A["EPOCH"][order_A]
    RBSP_A["MLT"] = RBSP_A["MLT"][order_A]
    RBSP_A["L"] = RBSP_A["L"][order_A]
    RBSP_A["LSTAR"] = RBSP_A["LSTAR"][order_A]
    RBSP_A["LOWER_BAND"] = RBSP_A["LOWER_BAND"][order_A]
    RBSP_A["UPPER_BAND"] = RBSP_A["UPPER_BAND"][order_A]

    RBSP_B["UNIX_TIME"] = RBSP_B["UNIX_TIME"][order_B]
    RBSP_B["EPOCH"] = RBSP_B["EPOCH"][order_B]
    RBSP_B["MLT"] = RBSP_B["MLT"][order_B]
    RBSP_B["L"] = RBSP_B["L"][order_B]
    RBSP_B["LSTAR"] = RBSP_B["LSTAR"][order_B]
    RBSP_B["LOWER_BAND"] = RBSP_B["LOWER_BAND"][order_B]
    RBSP_B["UPPER_BAND"] = RBSP_B["UPPER_BAND"][order_B]
    
    RBSP = [RBSP_A, RBSP_B]
    print(f"RBSP Data loaded for year : {_year}")
    
    print(f"Began loading POES Data for year : {_year}")
    
    POES = {}
    
    refs = np.load(fr"./../chorus_neural_network/STAGE_0/MPE_DATA_PREPROCESSED_WITH_LSTAR/MPE_PREPROCESSED_DATA_T89_{_year}.npz", allow_pickle=True)
    POES_DATA = refs["DATA"].flatten()[0]
    
    for SATID in POES_DATA:
        
        SAT = POES_DATA[SATID]
        
        UNIX_TIME = []
        LSTAR = []
        MLT = []
        BLC_FLUX_0 = []
        BLC_FLUX_1 = []
        BLC_FLUX_2 = []
        BLC_FLUX_3 = []
        BLC_FLUX_4 = []
        BLC_FLUX_5 = []
        BLC_FLUX_6 = []
        BLC_FLUX_7 = []
        
        for p in range(len(SAT["UNIX_TIME"]) - 1):
            
            t1 = SAT["UNIX_TIME"][p]
            t2 = SAT["UNIX_TIME"][p + 1]
                        
            if t2 - t1 < 30.0:
                
                t_points = np.arange(t1, t2 + 1, step=2, dtype=np.float64)
                
                UNIX_TIME.append(t_points)
                LSTAR.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["Lstar"][p], SAT["Lstar"][p+1]], left=np.nan, right = np.nan))
                
                X_INTERPOLATED = np.interp(t_points, xp = [t1, t2], fp = [np.cos(SAT["MLT"][p] * 2 * np.pi / 24.0), np.cos(SAT["MLT"][p+1] * 2 * np.pi / 24.0)], left=np.nan, right=np.nan)
                Y_INTERPOLATED = np.interp(t_points, xp = [t1, t2], fp = [np.sin(SAT["MLT"][p] * 2 * np.pi / 24.0), np.sin(SAT["MLT"][p+1] * 2 * np.pi / 24.0)], left=np.nan, right=np.nan)
                ANGLE_IN_RADIANS = np.mod(np.arctan2(Y_INTERPOLATED, X_INTERPOLATED) + 2 * np.pi, 2 * np.pi)
                
                MLT.append((ANGLE_IN_RADIANS * 24.0) / (2 * np.pi))
                                
                BLC_FLUX_0.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 0], SAT["BLC_Flux"][p + 1, 0]], left=np.nan, right=np.nan))
                BLC_FLUX_1.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 1], SAT["BLC_Flux"][p + 1, 1]], left=np.nan, right=np.nan))
                BLC_FLUX_2.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 2], SAT["BLC_Flux"][p + 1, 2]], left=np.nan, right=np.nan))
                BLC_FLUX_3.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 3], SAT["BLC_Flux"][p + 1, 3]], left=np.nan, right=np.nan))
                BLC_FLUX_4.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 4], SAT["BLC_Flux"][p + 1, 4]], left=np.nan, right=np.nan))
                BLC_FLUX_5.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 5], SAT["BLC_Flux"][p + 1, 5]], left=np.nan, right=np.nan))
                BLC_FLUX_6.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 6], SAT["BLC_Flux"][p + 1, 6]], left=np.nan, right=np.nan))
                BLC_FLUX_7.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 7], SAT["BLC_Flux"][p + 1, 7]], left=np.nan, right=np.nan))

        UNIX_TIME = np.hstack(UNIX_TIME)
        LSTAR = np.hstack(LSTAR)
        MLT = np.hstack(MLT)
        BLC_FLUX_0 = np.hstack(BLC_FLUX_0)
        BLC_FLUX_1 = np.hstack(BLC_FLUX_1)
        BLC_FLUX_2 = np.hstack(BLC_FLUX_2)
        BLC_FLUX_3 = np.hstack(BLC_FLUX_3)
        BLC_FLUX_4 = np.hstack(BLC_FLUX_4)
        BLC_FLUX_5 = np.hstack(BLC_FLUX_5)
        BLC_FLUX_6 = np.hstack(BLC_FLUX_6)
        BLC_FLUX_7 = np.hstack(BLC_FLUX_7)
        BLC_FLUX = np.hstack([np.expand_dims(BLC_FLUX_0, axis = 1),
                              np.expand_dims(BLC_FLUX_1, axis = 1),
                              np.expand_dims(BLC_FLUX_2, axis = 1), 
                              np.expand_dims(BLC_FLUX_3, axis = 1),
                              np.expand_dims(BLC_FLUX_4, axis = 1),
                              np.expand_dims(BLC_FLUX_5, axis = 1),
                              np.expand_dims(BLC_FLUX_6, axis = 1),
                              np.expand_dims(BLC_FLUX_7, axis = 1)])
        
        POES[SATID] = {"UNIX_TIME" : UNIX_TIME, 
                         "MLT" : MLT, 
                         "BLC_Flux" : BLC_FLUX, 
                         "LSTAR" : LSTAR}
    
    if not POES:
        print(f"No POES satellite coverage found for year : {_year}")
        print(f"SKIPPING YEAR : {_year}")
        continue
    
    refs.close()
    
    print(f"Finished loading POES data for year : {_year}")
    
    OMNI = chorus_machine_learning_helper.load_OMNI_year(_year)
    SUPERMAG = chorus_machine_learning_helper.load_SUPERMAG_SME_year(_year)
    
    
    #FINALLY FIND THE CONJUNCTIONS
    
    print(f"Finding CONJUNCTIONS for year : {_year}")
    CONJUNCTIONS_YEAR = []
    for SATID in POES:
                
        NUMBER_OF_RECORDS = len(POES[SATID]["UNIX_TIME"])
        CONJUNCTIONS = []
        
        print(f"Number of records: {NUMBER_OF_RECORDS} for POES SATELLITE: {SATID}")
                
        for T in tqdm.tqdm(range(NUMBER_OF_RECORDS)):
            
            UNIX_TIME = POES[SATID]["UNIX_TIME"][T]
            LSTAR = POES[SATID]["LSTAR"][T]
            MLT = POES[SATID]["MLT"][T]
            FLUX_SPECTRUM = POES[SATID]["BLC_Flux"][T, :]

            for RBSP_PROBE in RBSP:
                
                TIME_RANGE = np.searchsorted(a = RBSP_PROBE["UNIX_TIME"], v = [(UNIX_TIME - MAX_T_DIFF_SEC), (UNIX_TIME + MAX_T_DIFF_SEC)])

                NUM_CANDIDATES = 0
                TOTAL_TIME = 0
                TOTAL_LSTAR = 0
                TOTAL_DEL_MLT = 0
                TOTAL_UPPER_BAND = 0
                TOTAL_LOWER_BAND = 0
                
                for POINT in range(TIME_RANGE[0], TIME_RANGE[1], 1):
                    
                    DEL_LSTAR = (LSTAR - RBSP_PROBE["LSTAR"][POINT])
                    DEL_MLT = np.min( [(max(MLT, RBSP_PROBE["MLT"][POINT]) -  min(MLT, RBSP_PROBE["MLT"][POINT])),
                                      ((24 - max(MLT, RBSP_PROBE["MLT"][POINT])) + (min(MLT, RBSP_PROBE["MLT"][POINT]) - 0))])
                    
                    if (DEL_LSTAR**2 < MAX_L_DIFF**2) and (DEL_MLT**2 < MAX_MLT_DIFF**2):
                                                
                        NUM_CANDIDATES += 1
                        TOTAL_TIME += RBSP_PROBE["UNIX_TIME"][POINT]
                        TOTAL_LSTAR += RBSP_PROBE["LSTAR"][POINT]
                        TOTAL_DEL_MLT += DEL_MLT
                        TOTAL_UPPER_BAND += RBSP_PROBE["UPPER_BAND"][POINT]
                        TOTAL_LOWER_BAND += RBSP_PROBE["LOWER_BAND"][POINT]
                        
                if NUM_CANDIDATES == 0:
                    continue
                
                TIME_RANGE = np.searchsorted(a = SUPERMAG["UNIX_TIME"], v = [(UNIX_TIME - MAX_T_DIFF_SEC), (UNIX_TIME + MAX_T_DIFF_SEC)])
                AVG_SME = np.nanmean(SUPERMAG["SME"][TIME_RANGE[0]:TIME_RANGE[1]])

                TIME_RANGE = np.searchsorted(a = OMNI["UNIX_TIME"], v = [(UNIX_TIME - MAX_T_DIFF_SEC), (UNIX_TIME + MAX_T_DIFF_SEC)])
                AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
                AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
                AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
                AVG_SYM_H = np.nanmean(OMNI["SYM_H"][TIME_RANGE[0]:TIME_RANGE[1]])
                
                if np.isfinite(AVG_SME) & np.isfinite(AVG_AVG_B) & np.isfinite(AVG_FLOW_SPEED) & np.isfinite(AVG_PROTON_DENSITY) & np.isfinite(AVG_SYM_H):
                    
                    
                    CONJUNCTION =  [UNIX_TIME, 
                                    LSTAR, 
                                    MLT,
                                    *FLUX_SPECTRUM,
                                    TOTAL_TIME / NUM_CANDIDATES, #TIME OF RBSP POINT CHOSEN
                                    TOTAL_LSTAR / NUM_CANDIDATES, #LSTAR OF RBSP POINT CHOSEN
                                    TOTAL_DEL_MLT / NUM_CANDIDATES, #DIFFERENCE IN MLT FOUND
                                    TOTAL_UPPER_BAND / NUM_CANDIDATES, #UPPER BAND CHORUS OBSERVED
                                    TOTAL_LOWER_BAND / NUM_CANDIDATES, #LOWER BAND CHORUS OBSERVED
                                    AVG_SME, 
                                    AVG_AVG_B,
                                    AVG_FLOW_SPEED, 
                                    AVG_PROTON_DENSITY,
                                    AVG_SYM_H]
                                                
                    CONJUNCTIONS.append(CONJUNCTION)
        

        print(f"Number of conjunctions: {len(CONJUNCTIONS)}")
        
        CONJUNCTIONS_YEAR.extend(CONJUNCTIONS)
    
    
    CONJUNCTIONS_TOTAL.extend(CONJUNCTIONS_YEAR)
    
    print(f"Total number of conjunctions so far: {len(CONJUNCTIONS_TOTAL)}")
    
CONJUNCTIONS_TO_BE_SAVED = np.vstack(CONJUNCTIONS_TOTAL)

print(f"Conjunctions to be saved: {CONJUNCTIONS_TO_BE_SAVED.shape}")

np.savez(f"./../chorus_neural_network/STAGE_2/{VERSION}/CONJUNCTIONS_{VERSION}_{FIELD_MODEL}.npz", CONJUNCTIONS = CONJUNCTIONS_TO_BE_SAVED)


Began processing year : 2012
Began loading RBSP Data for year: 2012
RBSP Data loaded for year : 2012
Began loading POES Data for year : 2012


KeyboardInterrupt: 

In [None]:
#Stage 3, Look at the data and make sure its good enough, then remove solar proton events
version = "v5a"

CONJUNCTIONS_REFS = np.load(f"./../processed_data_chorus_neural_network/STAGE_2/{version}/CONJUNCTIONS_{version}.npz")

CONJUNCTIONS_TESTING = CONJUNCTIONS_REFS["CONJUNCTIONS"]

CONJUNCTIONS_REFS.close()

In [None]:

'''PERCENTAGES_COVERED = [25.7899, 
                       25.7872,
                       16.9213,
                       16.8872,
                       23.2865,
                       23.2174,
                       21.2469,
                       21.1556,
                       17.7844,
                       17.9721,
                       22.3004,
                       22.5147,
                       23.2432,
                       23.2491,
                       20.0671,
                       19.0985] these were for v2a'''

'''CONJUNCTION =  [UNIX_TIME, 
                    L, 
                    MLT,
                    *FLUX_SPECTRUM,
                    candidate[0], #TIME
                    candidate[1], #L
                    candidate[2], #MLT
                    candidate[3], #del_MLT
                    candidate[4], #CHORUS
                    AVG_SME, 
                    AVG_AVG_B,
                    AVG_FLOW_SPEED, 
                    AVG_PROTON_DENSITY,
                    AVG_SYM_H]'''

CONJUNCTIONS_POES_TIME = CONJUNCTIONS_TESTING[:, 0]
CONJUNCTIONS_POES_L = CONJUNCTIONS_TESTING[:, 1]
CONJUNCTIONS_POES_MLT = CONJUNCTIONS_TESTING[:, 2]
CONJUNCTIONS_POES_FLUX = CONJUNCTIONS_TESTING[:, 3:-10]
CONJUNCTIONS_RBSP_TIME = CONJUNCTIONS_TESTING[:, -10]
CONJUNCTIONS_RBSP_L = CONJUNCTIONS_TESTING[:, -9]
CONJUNCTIONS_RBSP_MLT = CONJUNCTIONS_TESTING[:, -8]
CONJUNCTIONS_RBSP_DEL_MLT = CONJUNCTIONS_TESTING[:, -7]
CONJUNCTIONS_RBSP_CHORUS = CONJUNCTIONS_TESTING[:, -6]
CONJUNCTIONS_AVG_SME = CONJUNCTIONS_TESTING[:, -5]
CONJUNCTIONS_AVG_AVG_B = CONJUNCTIONS_TESTING[:, -4]
CONJUNCTIONS_AVG_FLOW_SPEED = CONJUNCTIONS_TESTING[:, -3]
CONJUNCTIONS_AVG_PROTON_DENSITY = CONJUNCTIONS_TESTING[:, -2]
CONJUNCTIONS_AVG_SYM_H = CONJUNCTIONS_TESTING[:, -1]

with open(f"./../processed_data_chorus_neural_network/STAGE_2/{version}/CONJUNCTIONS_{version}.txt", "w") as f:
    f.write("\nConjunctions:\n")
    f.write(f"Number of conjunctions: {CONJUNCTIONS_TESTING.shape[0]} [#]\n")
    f.write(f"Minimum RBSP Time: {np.min(CONJUNCTIONS_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum RBSP Time: {np.max(CONJUNCTIONS_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Minimum POES Time: {np.min(CONJUNCTIONS_POES_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum POES Time: {np.max(CONJUNCTIONS_POES_TIME)} [seconds since unix epoch]\n")
    
    f.write(f"\nL:\n")
    f.write(f"Mean Difference: {np.mean(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)} [L]\n")
    f.write(f"Standard deviation of Difference {np.std(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)} [L]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L))} [L]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L))} [L]\n")

    f.write("\nMLT: \n")
    f.write(f"Mean Absolute Difference: {np.mean(CONJUNCTIONS_RBSP_DEL_MLT)} [MLT]\n")
    f.write(f"Standard deviation of Absolute Difference {np.std(CONJUNCTIONS_RBSP_DEL_MLT)} [MLT]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(CONJUNCTIONS_RBSP_DEL_MLT))} [MLT]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(CONJUNCTIONS_RBSP_DEL_MLT))} [MLT]\n")

    f.write(f"\nTime: \n")
    f.write(f"Mean Difference: {np.mean(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME)} [s]\n")
    f.write(f"Standard deviation of Difference {np.std(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME)} [s]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME))} [s]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME))} [s]\n")

    f.write(f"\nChorus: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")


    f.write(f"\nSME: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_SME)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_SME)} [nT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_SME)} [nT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_SME)} [nT]\n")

    f.write(f"\nAVG_B: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")

    f.write(f"\nFlow Speed: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    
    f.write(f"\nProton Density: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    
    f.write(f"\nSYM_H: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")


In [None]:
plt.title("RBSP - Closest POES L Shell Comparison")
plt.xlabel("RBSP L-Shell")
plt.ylabel("Closest POES L-Shell")
plt.hlines(y = 4, xmin=1, xmax=7, color="black")
plt.vlines(x = 4, ymin=1, ymax=7, color="black")

plt.scatter(CONJUNCTIONS_RBSP_L, CONJUNCTIONS_POES_L)

print(f"Mean difference: {np.mean(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)}")
print(f"Standard deviation of difference {np.std(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)}")
print(f"Maximum difference : {np.max(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)}")


In [None]:
plt.title("RBSP MLT - Closest POES MLT")
plt.xlabel("RBSP MLT")
plt.ylabel("Closest POES MLT")
plt.hlines(y = 12, xmin=0, xmax=25, color="black")
plt.vlines(x = 12, ymin=0, ymax=25, color="black")

plt.scatter(CONJUNCTIONS_RBSP_MLT, CONJUNCTIONS_POES_MLT)

print(f"Mean difference: {np.mean(CONJUNCTIONS_RBSP_DEL_MLT)}")
print(f"Standard deviation of difference {np.std(CONJUNCTIONS_RBSP_DEL_MLT)}")
print(f"Maximum difference : {np.max(CONJUNCTIONS_RBSP_DEL_MLT)}")

In [None]:
#Stage 3 Continued, Removing solar proton events!

version = "v5a"

CONJUNCTIONS_REFS = np.load(f"./../processed_data_chorus_neural_network/STAGE_2/{version}/CONJUNCTIONS_{version}.npz")

CONJUNCTIONS = CONJUNCTIONS_REFS["CONJUNCTIONS"]

CONJUNCTIONS_REFS.close()

SOLAR_PROTON_EVENT_LIST = pd.read_csv(f"./../processed_data_chorus_neural_network/SOLAR_PROTON_EVENT_LIST_1976_2024.csv")


In [None]:
'''CONJUNCTION =  [UNIX_TIME, 
                    L, 
                    MLT,
                    *FLUX_SPECTRUM,
                    candidate[0], #TIME
                    candidate[1], #L
                    candidate[2], #MLT
                    candidate[3], #del_MLT
                    candidate[4], #CHORUS
                    AVG_SME, 
                    AVG_AVG_B,
                    AVG_FLOW_SPEED, 
                    AVG_PROTON_DENSITY,
                    AVG_SYM_H]'''


order_to_sort_conjunctions = np.argsort(CONJUNCTIONS[:, 0]) #Sorted based on POES Conjunction time!
SORTED_CONJUNCTIONS = CONJUNCTIONS[order_to_sort_conjunctions, :]

print(f"Starting shape of conjunctions list: {SORTED_CONJUNCTIONS.shape}")

SORTED_POES_CONJUNCTION_TIMES = SORTED_CONJUNCTIONS[:, 0]

START_OF_SEP_EVENTS_UTC = SOLAR_PROTON_EVENT_LIST["START"]
END_OF_SEP_EVENTS_UTC = SOLAR_PROTON_EVENT_LIST["END"]
ZIPPED_EVENTS = list(zip(START_OF_SEP_EVENTS_UTC, END_OF_SEP_EVENTS_UTC))

print(f"Removing high energy solar proton events!")

for SEP_EVENT in tqdm.tqdm(range(len(ZIPPED_EVENTS))):
    
    START = ZIPPED_EVENTS[SEP_EVENT][0].strip()
    END = ZIPPED_EVENTS[SEP_EVENT][1].strip()
    
    START_YMDHMS = {'year': int(START[0:4]), 'month': int(START[5:7]), 'day': int(START[8:10]), 'hour': int(START[11:13]), 'minute': int(START[13:15]), 'second': 0}
    END_YMDHMS = {'year': int(END[0:4]), 'month': int(END[5:7]), 'day': int(END[8:10]), 'hour': int(END[11:13]), 'minute': int(END[13:15]), 'second': 0}
    
    START_UNIX = astropy.time.Time(START_YMDHMS, format="ymdhms", scale='utc').unix
    END_UNIX = astropy.time.Time(END_YMDHMS, format="ymdhms", scale='utc').unix

    RANGE_TO_REMOVE = np.searchsorted(a = SORTED_POES_CONJUNCTION_TIMES, v = [START_UNIX, END_UNIX])
    
    SORTED_CONJUNCTIONS = np.vstack((SORTED_CONJUNCTIONS[0:RANGE_TO_REMOVE[0], :], SORTED_CONJUNCTIONS[RANGE_TO_REMOVE[1]:, :]))

print(f"Finished removing high energy solar proton events!")

print(f"Saving!")

CLEANED_CONJUNCTIONS = SORTED_CONJUNCTIONS #Should be cleaned by now!

np.savez(f"./../processed_data_chorus_neural_network/STAGE_3/{version}/CLEANED_CONJUNCTIONS_{version}.npz",
        CONJUNCTIONS=CLEANED_CONJUNCTIONS)

CONJUNCTIONS_POES_TIME = CLEANED_CONJUNCTIONS[:, 0]
CONJUNCTIONS_POES_L = CLEANED_CONJUNCTIONS[:, 1]
CONJUNCTIONS_POES_MLT = CLEANED_CONJUNCTIONS[:, 2]
CONJUNCTIONS_POES_FLUX = CLEANED_CONJUNCTIONS[:, 3:-10]
CONJUNCTIONS_RBSP_TIME = CLEANED_CONJUNCTIONS[:, -10]
CONJUNCTIONS_RBSP_L = CLEANED_CONJUNCTIONS[:, -9]
CONJUNCTIONS_RBSP_MLT = CLEANED_CONJUNCTIONS[:, -8]
CONJUNCTIONS_RBSP_DEL_MLT = CLEANED_CONJUNCTIONS[:, -7]
CONJUNCTIONS_RBSP_CHORUS = CLEANED_CONJUNCTIONS[:, -6]
CONJUNCTIONS_AVG_SME = CLEANED_CONJUNCTIONS[:, -5]
CONJUNCTIONS_AVG_AVG_B = CLEANED_CONJUNCTIONS[:, -4]
CONJUNCTIONS_AVG_FLOW_SPEED = CLEANED_CONJUNCTIONS[:, -3]
CONJUNCTIONS_AVG_PROTON_DENSITY = CLEANED_CONJUNCTIONS[:, -2]
CONJUNCTIONS_AVG_SYM_H = CLEANED_CONJUNCTIONS[:, -1]

print(f"Creating documentation of dataset!")


with open(f"./../processed_data_chorus_neural_network/STAGE_3/{version}/CLEANED_CONJUNCTIONS_{version}.txt", "w") as f:
    f.write("\nConjunctions:\n")
    f.write(f"Number of conjunctions: {CLEANED_CONJUNCTIONS.shape[0]} [#]\n")
    f.write(f"Number lost from cleaning solar proton events: {CONJUNCTIONS.shape[0] - CLEANED_CONJUNCTIONS.shape[0]} [#]\n")
    f.write(f"Minimum RBSP Time: {np.min(CONJUNCTIONS_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum RBSP Time: {np.max(CONJUNCTIONS_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Minimum POES Time: {np.min(CONJUNCTIONS_POES_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum POES Time: {np.max(CONJUNCTIONS_POES_TIME)} [seconds since unix epoch]\n")
    
    f.write(f"\nL:\n")
    f.write(f"Mean Difference: {np.mean(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)} [L]\n")
    f.write(f"Standard deviation of Difference {np.std(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)} [L]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L))} [L]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L))} [L]\n")

    f.write("\nMLT: \n")
    f.write(f"Mean Absolute Difference: {np.mean(CONJUNCTIONS_RBSP_DEL_MLT)} [MLT]\n")
    f.write(f"Standard deviation of Absolute Difference {np.std(CONJUNCTIONS_RBSP_DEL_MLT)} [MLT]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(CONJUNCTIONS_RBSP_DEL_MLT))} [MLT]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(CONJUNCTIONS_RBSP_DEL_MLT))} [MLT]\n")

    f.write(f"\nTime: \n")
    f.write(f"Mean Difference: {np.mean(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME)} [s]\n")
    f.write(f"Standard deviation of Difference {np.std(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME)} [s]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME))} [s]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME))} [s]\n")

    f.write(f"\nChorus: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")


    f.write(f"\nSME: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_SME)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_SME)} [nT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_SME)} [nT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_SME)} [nT]\n")

    f.write(f"\nAVG_B: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")

    f.write(f"\nFlow Speed: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    
    f.write(f"\nProton Density: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    
    f.write(f"\nSYM_H: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")

print(f"Finished!")
print(f"Ending shape of conjunctions : {CLEANED_CONJUNCTIONS.shape}")

In [None]:
#Stage 4, Create datasets used for training, testing, etc

version = "v5a"

CONJUNCTIONS_REFS = np.load(f"./../processed_data_chorus_neural_network/STAGE_3/{version}/CLEANED_CONJUNCTIONS_{version}.npz")

CONJUNCTIONS = CONJUNCTIONS_REFS["CONJUNCTIONS"]

CONJUNCTIONS_REFS.close()

In [None]:
print(CONJUNCTIONS.shape)

CONJUNCTIONS_CHORUS = CONJUNCTIONS[:, -6]

CONJUNCTIONS_RBSP_TIME = CONJUNCTIONS[:, -10]

where_chorus_greater_zero = (0 < CONJUNCTIONS_CHORUS)

jan1_unix = astropy.time.Time({"year":2016, "month":1, "day":1, "hour":0, "minute":0, "second":0}, format="ymdhms", scale="utc").unix
apr1_unix = astropy.time.Time({"year":2016, "month":4, "day":1, "hour":0, "minute":0, "second":0}, format="ymdhms", scale="utc").unix

where_between_feb1_apr1_2013 = (jan1_unix < CONJUNCTIONS_RBSP_TIME) & (CONJUNCTIONS_RBSP_TIME < apr1_unix)

train_test_subset_selected = where_chorus_greater_zero & ~where_between_feb1_apr1_2013
validation_subset_selected = where_chorus_greater_zero & where_between_feb1_apr1_2013

print(f"Number of conjunctions between feb1 and apr1 2013: {np.count_nonzero(where_between_feb1_apr1_2013)}")
print(f"Number of conjunctions with non-zero chorus: {np.count_nonzero(where_chorus_greater_zero)}")

CONJUNCTIONS_POES_TIME = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, 0], axis = 1)
CONJUNCTIONS_POES_L = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, 1], axis = 1)
CONJUNCTIONS_POES_MLT = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, 2], axis = 1)
CONJUNCTIONS_POES_FLUX = CONJUNCTIONS[train_test_subset_selected, 3:9]
CONJUNCTIONS_RBSP_TIME = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -10], axis = 1)
CONJUNCTIONS_RBSP_L = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -9], axis = 1)
CONJUNCTIONS_RBSP_MLT = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -8], axis = 1)
CONJUNCTIONS_RBSP_DEL_MLT = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -7], axis = 1)
CONJUNCTIONS_RBSP_CHORUS = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -6], axis = 1)
CONJUNCTIONS_AVG_SME = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -5], axis = 1)
CONJUNCTIONS_AVG_AVG_B = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -4], axis = 1)
CONJUNCTIONS_AVG_FLOW_SPEED = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -3], axis = 1)
CONJUNCTIONS_AVG_PROTON_DENSITY = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -2], axis = 1)
CONJUNCTIONS_AVG_SYM_H = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -1], axis = 1)

print(CONJUNCTIONS_RBSP_TIME.shape)
print(CONJUNCTIONS_RBSP_L.shape)
print(CONJUNCTIONS_RBSP_MLT.shape)
print(CONJUNCTIONS_RBSP_CHORUS.shape)
print(CONJUNCTIONS_POES_TIME.shape)
print(CONJUNCTIONS_POES_L.shape)
print(CONJUNCTIONS_POES_MLT.shape)
print(CONJUNCTIONS_RBSP_DEL_MLT.shape)
print(CONJUNCTIONS_POES_FLUX.shape)
print(CONJUNCTIONS_AVG_SME.shape)
print(CONJUNCTIONS_AVG_AVG_B.shape)
print(CONJUNCTIONS_AVG_FLOW_SPEED.shape)
print(CONJUNCTIONS_AVG_PROTON_DENSITY.shape)
print(CONJUNCTIONS_AVG_SYM_H.shape)

mean_L = np.nanmean(CONJUNCTIONS_POES_L)
std_L = np.std(CONJUNCTIONS_POES_L)

print(f"Mean L: {mean_L}, STD L: {std_L}")

mean_fluxes = np.expand_dims(np.nanmean(np.log(CONJUNCTIONS_POES_FLUX), axis = 0), axis=0)
std_fluxes = np.expand_dims(np.nanstd(np.log(CONJUNCTIONS_POES_FLUX), axis = 0), axis = 0)

print(f"Mean fluxes : {mean_fluxes.shape}")

mean_sme = np.nanmean(CONJUNCTIONS_AVG_SME)
std_sme = np.std(CONJUNCTIONS_AVG_SME)

mean_avg_b = np.nanmean(CONJUNCTIONS_AVG_AVG_B)
std_avg_b = np.std(CONJUNCTIONS_AVG_AVG_B)

mean_flow_speed = np.nanmean(CONJUNCTIONS_AVG_FLOW_SPEED)
std_flow_speed = np.std(CONJUNCTIONS_AVG_FLOW_SPEED)

mean_avg_proton_density = np.nanmean(CONJUNCTIONS_AVG_PROTON_DENSITY)
std_avg_proton_density = np.std(CONJUNCTIONS_AVG_PROTON_DENSITY)

mean_avg_sym_h = np.nanmean(CONJUNCTIONS_AVG_SYM_H)
std_avg_sym_h = np.std(CONJUNCTIONS_AVG_SYM_H)

FEATURES = np.hstack(((CONJUNCTIONS_POES_L - mean_L) / std_L, 
                      np.sin((CONJUNCTIONS_POES_MLT * 2 * np.pi) / 24.0), 
                      np.cos((CONJUNCTIONS_POES_MLT * 2 * np.pi) / 24.0),
                      ((np.log(CONJUNCTIONS_POES_FLUX) - mean_fluxes) / std_fluxes)[:, [0, -1]],
                      #(CONJUNCTIONS_AVG_SME - mean_sme)  / std_sme))
                      #(CONJUNCTIONS_AVG_AVG_B - mean_avg_b) / std_avg_b,
                      (CONJUNCTIONS_AVG_FLOW_SPEED - mean_flow_speed) / std_flow_speed))
                      #(CONJUNCTIONS_AVG_PROTON_DENSITY - mean_avg_proton_density) / std_avg_proton_density,
                      #(CONJUNCTIONS_AVG_SYM_H - mean_avg_sym_h) / std_avg_sym_h))
                      
#SMALL VALIDATION SET:
CONJUNCTIONS_POES_TIME_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, 0], axis = 1)
CONJUNCTIONS_POES_L_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, 1], axis = 1)
CONJUNCTIONS_POES_MLT_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, 2], axis = 1)
CONJUNCTIONS_POES_FLUX_VALIDATION = CONJUNCTIONS[validation_subset_selected, 3:9]
CONJUNCTIONS_RBSP_TIME_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -10], axis = 1)
CONJUNCTIONS_RBSP_L_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -9], axis = 1)
CONJUNCTIONS_RBSP_MLT_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -8], axis = 1)
CONJUNCTIONS_RBSP_DEL_MLT_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -7], axis = 1)
CONJUNCTIONS_RBSP_CHORUS_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -6], axis = 1)
CONJUNCTIONS_AVG_SME_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -5], axis = 1)
CONJUNCTIONS_AVG_AVG_B_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -4], axis = 1)
CONJUNCTIONS_AVG_FLOW_SPEED_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -3], axis = 1)
CONJUNCTIONS_AVG_PROTON_DENSITY_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -2], axis = 1)
CONJUNCTIONS_AVG_SYM_H_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -1], axis = 1)

print(CONJUNCTIONS_RBSP_TIME_VALIDATION.shape)
print(CONJUNCTIONS_RBSP_L_VALIDATION.shape)
print(CONJUNCTIONS_RBSP_MLT_VALIDATION.shape)
print(CONJUNCTIONS_RBSP_CHORUS_VALIDATION.shape)
print(CONJUNCTIONS_POES_TIME_VALIDATION.shape)
print(CONJUNCTIONS_POES_L_VALIDATION.shape)
print(CONJUNCTIONS_POES_MLT_VALIDATION.shape)
print(CONJUNCTIONS_RBSP_DEL_MLT_VALIDATION.shape)
print(CONJUNCTIONS_POES_FLUX_VALIDATION.shape)
print(CONJUNCTIONS_AVG_SME_VALIDATION.shape)
print(CONJUNCTIONS_AVG_AVG_B_VALIDATION.shape)
print(CONJUNCTIONS_AVG_FLOW_SPEED_VALIDATION.shape)
print(CONJUNCTIONS_AVG_PROTON_DENSITY_VALIDATION.shape)
print(CONJUNCTIONS_AVG_SYM_H_VALIDATION.shape)


VALIDATION_FEATURES = np.hstack(((CONJUNCTIONS_POES_L_VALIDATION - mean_L) / std_L, 
                                np.sin((CONJUNCTIONS_POES_MLT_VALIDATION * 2 * np.pi) / 24.0), 
                                np.cos((CONJUNCTIONS_POES_MLT_VALIDATION * 2 * np.pi) / 24.0),
                                ((np.log(CONJUNCTIONS_POES_FLUX_VALIDATION) - mean_fluxes) / std_fluxes)[:, [0, -1]],
                                #(CONJUNCTIONS_AVG_SME_VALIDATION - mean_sme) / std_sme))
                                #(CONJUNCTIONS_AVG_AVG_B_VALIDATION - mean_avg_b) / std_avg_b,
                                (CONJUNCTIONS_AVG_FLOW_SPEED_VALIDATION - mean_flow_speed) / std_flow_speed))
                                #(CONJUNCTIONS_AVG_PROTON_DENSITY_VALIDATION - mean_avg_proton_density) / std_avg_proton_density,
                                #(CONJUNCTIONS_AVG_SYM_H_VALIDATION - mean_avg_sym_h) / std_avg_sym_h))
        
np.savez(f"./../processed_data_chorus_neural_network/STAGE_4/{version}/MODEL_READY_DATA_{version}.npz",
        FEATURES = FEATURES,
        LABELS = CONJUNCTIONS_RBSP_CHORUS,
        VALIDATION_FEATURES = VALIDATION_FEATURES,
        VALIDATION_LABELS = CONJUNCTIONS_RBSP_CHORUS_VALIDATION,
        TRAINING_MLT = CONJUNCTIONS_POES_MLT,
        MEAN_FLUXES = mean_fluxes,
        STD_FLUXES = std_fluxes,
        MEAN_SME = mean_sme,
        STD_SME = std_sme,
        MEAN_AVG_B = mean_avg_b,
        STD_AVG_B = std_avg_b,
        MEAN_FLOW_SPEED = mean_flow_speed,
        STD_FLOW_SPEED = std_flow_speed,
        MEAN_AVG_PROTON_DENSITY = mean_avg_proton_density,
        STD_AVG_PROTON_DENSITY = std_avg_proton_density,
        MEAN_AVG_SYM_H = mean_avg_sym_h,
        STD_AVG_SYM_H = std_avg_sym_h)