In [1]:
import sys
import os
# caution: path[0] is reserved for script path (or '' in REPL).
sys.path.insert(1, os.path.abspath('./../src'))


from cdflib.epochs_astropy import CDFAstropy as cdfepoch
import astropy.time
import data_loader
import datetime
import matplotlib.pyplot as plt
import multiprocessing as mp
import numpy as np
import pandas as pd
import tqdm

import data_loader
import rbsp_chorus_tool
import chorus_machine_learning_helper

import importlib
importlib.reload(data_loader)
importlib.reload(rbsp_chorus_tool)

%matplotlib qt

In [None]:
#STAGE 0 DATA VERIFICATION FOR POES LSTAR CALCULATIONS

year = 2012
SATID = "m02"
refs = np.load(fr"./../processed_data_chorus_neural_network/STAGE_0/MPE_DATA_PREPROCESSED_WITH_LSTAR/MPE_PREPROCESSED_DATA_T89_{year}.npz", allow_pickle=True)

DATA = refs["DATA"].flatten()[0]
print(DATA)
SAT = DATA[SATID]
print(type(SAT))

dt_for_all = np.array([datetime.datetime.fromtimestamp(t) for t in SAT["UNIX_TIME"]])

plt.plot(dt_for_all, SAT["Lstar"], label="L*", color = "red", marker="*")
plt.plot(dt_for_all, SAT["L"], label = "IGRF Lm", color = "black", marker="*")
plt.ylabel("|L|")
plt.xlabel("Time")
plt.title(f"Some Orbits for {SATID} in {year}")
plt.legend()

plt.show()

In [None]:
year = 2013
sat = "a"
refs = np.load(fr"./../processed_data/chorus_neural_network/STAGE_1/Lstar/RBSP_{sat.upper()}_T89_{year}.npz", allow_pickle=True)

OMNI = data_loader.load_raw_data_from_config(id = ["OMNI", "ONE_HOUR_RESOLUTION"],
                                                 start = datetime.datetime(year = year, month = 1, day = 1),
                                                 end = datetime.datetime(year = year + 1, month = 1, day = 1), 
                                                 root_data_dir = "./../raw_data/")

OMNI_TIME = cdfepoch.unixtime(OMNI["Epoch"])
KP = OMNI["KP"].astype(np.float64)

invalid_omni_times = (OMNI_TIME < 0) | (KP < 0) | (KP >= 99) | np.isnan(KP) | np.isnan(OMNI_TIME)
KP[invalid_omni_times] = np.nan
    
KP_INTERPOLATED = np.interp(refs["UNIX_TIME"], OMNI_TIME, KP, left = np.nan, right = np.nan)    


fig, axs = plt.subplots(2, 1, sharex=True)

dates = np.array([datetime.datetime.fromtimestamp(t) for t in refs["UNIX_TIME"]])


axs[0].plot(dates, refs["Lstar"])
axs[1].plot(dates, KP_INTERPOLATED)
plt.xlabel("Time (UTC)")
axs[0].set_ylabel("L*")
axs[1].set_ylabel("KP-Index")

plt.show()

In [None]:
#Interface for stage 1, Designed to do a year at a time

year = 2019

In [None]:
#Stage 1 RBSP Chorus Preprocessing, Obtains clean chorus amplitudes

#start = datetime.datetime(year = year, month = 1, day = 1)
#end = datetime.datetime(year = year + 1, month = 1, day = 1)

start = datetime.datetime(year = 2019, month = 1, day = 1)
end = datetime.datetime(year = 2019, month = 10, day = 13, hour = 23, minute = 59, second = 59)

WNA_survey_a = data_loader.load_raw_data_from_config(id=["RBSP", "EMFISIS", "L4", "WNA_SURVEY"],
                                                     start=start,
                                                     end=end,
                                                     satellite="a",
                                                     root_data_dir="/project/rbsp/data/",
                                                     use_config_keys_in_subdir=False)

WNA_survey_b = data_loader.load_raw_data_from_config(id=["RBSP", "EMFISIS", "L4", "WNA_SURVEY"],
                                                     start=start,
                                                     end=end,
                                                     satellite="b",
                                                     root_data_dir="/project/rbsp/data/",
                                                     use_config_keys_in_subdir=False)

WFR_spectral_matrix_a = data_loader.load_raw_data_from_config(id=["RBSP", "EMFISIS", "L2", "WFR_SPECTRAL_MATRIX_DIAGONAL"],
                                                              start=start,
                                                              end=end,
                                                              satellite="a",
                                                              root_data_dir="/project/rbsp/data/",
                                                              use_config_keys_in_subdir=False)

WFR_spectral_matrix_b = data_loader.load_raw_data_from_config(id=["RBSP", "EMFISIS", "L2", "WFR_SPECTRAL_MATRIX_DIAGONAL"],
                                                              start=start,
                                                              end=end,
                                                              satellite="b",
                                                              root_data_dir="/project/rbsp/data/",
                                                              use_config_keys_in_subdir=False)

num_wna_files_A = len(WNA_survey_a["timestamps_per_file"])
num_wna_files_B = len(WNA_survey_b["timestamps_per_file"])
num_wfr_files_A = WFR_spectral_matrix_a["WFR_bandwidth"].shape[0]
num_wfr_files_B = WFR_spectral_matrix_b["WFR_bandwidth"].shape[0]

print(f"Number of files loaded: {num_wna_files_A, num_wna_files_B, num_wfr_files_A, num_wfr_files_B}")

if len({num_wna_files_A, num_wfr_files_A}) != 1:
    raise Exception("The same number of days wasn't loaded for RBSP-A!")

if len({num_wna_files_B, num_wfr_files_B}) != 1:
    raise Exception("The same number of days wasn't loaded for RBSP-B!")

In [None]:
mlt_A = WNA_survey_a["MLT"]
L_A = WNA_survey_a["L"]
epoch_A = WNA_survey_a["Epoch"]

time_A = astropy.time.Time(cdfepoch.to_datetime(epoch_A), format="datetime").utc

chorus_A = rbsp_chorus_tool.iterate_through_days_and_calculate_chorus_amplitudes(WNA_survey = WNA_survey_a,
                                                                                 WFR_spectral_matrix = WFR_spectral_matrix_a)

lower_band_chorus_A = chorus_A["Lower_Band"]
upper_band_chorus_A = chorus_A["Upper_Band"]

within_epoch_range_A = (start < time_A) & (time_A < end)
finite_chorus_A = np.isfinite(lower_band_chorus_A) & np.isfinite(upper_band_chorus_A)
#This line might not be necessary but we want to train on clean data, literally any np.nan will fuck it ALL up. Ill probably double check before training
all_valid_coordinates_A = (epoch_A > 0) & (0 <= mlt_A) & (mlt_A <= 24) & (0 < L_A) & (L_A < 10)

epoch_A = epoch_A[within_epoch_range_A & finite_chorus_A & all_valid_coordinates_A]
L_A = L_A[within_epoch_range_A & finite_chorus_A & all_valid_coordinates_A]
mlt_A = mlt_A[within_epoch_range_A & finite_chorus_A & all_valid_coordinates_A]
lower_band_chorus_A = lower_band_chorus_A[within_epoch_range_A & finite_chorus_A & all_valid_coordinates_A]
upper_band_chorus_A = upper_band_chorus_A[within_epoch_range_A & finite_chorus_A & all_valid_coordinates_A]

mlt_B = WNA_survey_b["MLT"]
L_B = WNA_survey_b["L"]
epoch_B = WNA_survey_b["Epoch"]
time_B = astropy.time.Time(cdfepoch.to_datetime(epoch_B), format="datetime").utc

chorus_B = rbsp_chorus_tool.iterate_through_days_and_calculate_chorus_amplitudes(WNA_survey = WNA_survey_b,
                                                                                 WFR_spectral_matrix = WFR_spectral_matrix_b)

lower_band_chorus_B = chorus_B["Lower_Band"]
upper_band_chorus_B = chorus_B["Upper_Band"]

within_epoch_range_B = (start < time_B) & (time_B < end)
finite_chorus_B = np.isfinite(lower_band_chorus_B) & np.isfinite(upper_band_chorus_B)
all_valid_coordinates_B = (epoch_B > 0) & (0 <= mlt_B) & (mlt_B <= 24) & (0 < L_B) & (L_B < 10)

epoch_B = epoch_B[within_epoch_range_B & finite_chorus_B & all_valid_coordinates_B]
L_B = L_B[within_epoch_range_B & finite_chorus_B & all_valid_coordinates_B]
mlt_B = mlt_B[within_epoch_range_B & finite_chorus_B & all_valid_coordinates_B]
lower_band_chorus_B = lower_band_chorus_B[within_epoch_range_B & finite_chorus_B & all_valid_coordinates_B]
upper_band_chorus_B = upper_band_chorus_B[within_epoch_range_B & finite_chorus_B & all_valid_coordinates_B]


print(epoch_A.shape)
print(lower_band_chorus_A.shape)
print(upper_band_chorus_A.shape)
print(L_A.shape)
print(mlt_A.shape)

print(epoch_B.shape)
print(lower_band_chorus_B.shape)
print(upper_band_chorus_B.shape)
print(L_B.shape)
print(mlt_B.shape)

In [None]:
#Save the RBSP stage 1 data, might honestly only need one stage
np.savez(file = os.path.abspath(f"./../processed_data/chorus_neural_network/STAGE_1/CHORUS/RBSP_OBSERVED_CHORUS_{year}.npz"), 
         EPOCH_A = epoch_A, 
         MLT_A = mlt_A, 
         L_A = L_A, 
         LOWER_BAND_CHORUS_A = lower_band_chorus_A,
         UPPER_BAND_CHORUS_A = upper_band_chorus_A,
         EPOCH_B = epoch_B, 
         MLT_B = mlt_B, 
         L_B = L_B, 
         LOWER_BAND_CHORUS_B = lower_band_chorus_B,
         UPPER_BAND_CHORUS_B = upper_band_chorus_B)


In [5]:
#Stage 2, clean then combine RBSP, OMNI, and POES Data and find conjunctions between RBSP and POES

VERSION = "v1a"
FIELD_MODEL = "T89"

MAX_L_DIFF = 0.10
MAX_MLT_DIFF = 2.0
MAX_T_DIFF_SEC = 60

L_SCALE = (1.0 / MAX_L_DIFF)**2
MLT_SCALE = (1.0 / MAX_MLT_DIFF)**2
TIME_SCALE = (1.0 / MAX_T_DIFF_SEC)**2

CONJUNCTIONS_TOTAL = []

for _year in range(2012, 2020, 1):
    
    print(f"Began processing year : {_year}")
    
    #LOAD THE OBSERVED CHORUS
    print(f"Began loading RBSP Data for year: {_year}")
    refs = np.load(fr"./../processed_data/chorus_neural_network/STAGE_1/CHORUS/RBSP_OBSERVED_CHORUS_{_year}.npz", allow_pickle=True)
    RBSP_A = {}
    RBSP_A["EPOCH"] = refs["EPOCH_A"]
    RBSP_A["MLT"] = refs["MLT_A"]
    RBSP_A["L"] = refs["L_A"]
    RBSP_A["LOWER_BAND"] = refs["LOWER_BAND_CHORUS_A"]
    RBSP_A["UPPER_BAND"] = refs["UPPER_BAND_CHORUS_A"]

    RBSP_B = {}
    RBSP_B["EPOCH"] = refs["EPOCH_B"]
    RBSP_B["MLT"] = refs["MLT_B"]
    RBSP_B["L"] = refs["L_B"]
    RBSP_B["LOWER_BAND"] = refs["LOWER_BAND_CHORUS_B"]
    RBSP_B["UPPER_BAND"] = refs["UPPER_BAND_CHORUS_B"]
    
    refs.close()
    
    RBSP_A["UNIX_TIME"] = cdfepoch.unixtime(RBSP_A["EPOCH"])
    RBSP_B["UNIX_TIME"] = cdfepoch.unixtime(RBSP_B["EPOCH"])
    
    #LOAD THE LSTAR AND INTERPOLATE
    refs_A = np.load(fr"./../processed_data/chorus_neural_network/STAGE_1/Lstar/RBSP_A_{FIELD_MODEL}_{_year}.npz", allow_pickle=True)
    
    MAGEPHEM_TIME_A = refs_A["UNIX_TIME"]
    MAGEPHEM_LSTAR_A = refs_A["Lstar"]
    MAGEPHEM_L_A = refs_A["L"]
    
    refs_A.close()
    
    refs_B = np.load(fr"./../processed_data/chorus_neural_network/STAGE_1/Lstar/RBSP_B_{FIELD_MODEL}_{_year}.npz", allow_pickle=True)
    
    MAGEPHEM_TIME_B = refs_B["UNIX_TIME"]
    MAGEPHEM_LSTAR_B = refs_B["Lstar"]
    MAGEPHEM_L_B = refs_B["L"]
    
    refs_B.close()
    
    #PREPROCESS DATA
    
    RBSP_A["LSTAR"] = np.interp(RBSP_A["UNIX_TIME"], MAGEPHEM_TIME_A, MAGEPHEM_LSTAR_A, left = np.nan, right = np.nan)
    RBSP_B["LSTAR"] = np.interp(RBSP_B["UNIX_TIME"], MAGEPHEM_TIME_B, MAGEPHEM_LSTAR_B, left = np.nan, right = np.nan)
    
    order_A = np.argsort(RBSP_A["UNIX_TIME"])
    order_B = np.argsort(RBSP_B["UNIX_TIME"])
    
    RBSP_A["UNIX_TIME"] = RBSP_A["UNIX_TIME"][order_A]
    RBSP_A["EPOCH"] = RBSP_A["EPOCH"][order_A]
    RBSP_A["MLT"] = RBSP_A["MLT"][order_A]
    RBSP_A["L"] = RBSP_A["L"][order_A]
    RBSP_A["LSTAR"] = RBSP_A["LSTAR"][order_A]
    RBSP_A["LOWER_BAND"] = RBSP_A["LOWER_BAND"][order_A]
    RBSP_A["UPPER_BAND"] = RBSP_A["UPPER_BAND"][order_A]

    RBSP_B["UNIX_TIME"] = RBSP_B["UNIX_TIME"][order_B]
    RBSP_B["EPOCH"] = RBSP_B["EPOCH"][order_B]
    RBSP_B["MLT"] = RBSP_B["MLT"][order_B]
    RBSP_B["L"] = RBSP_B["L"][order_B]
    RBSP_B["LSTAR"] = RBSP_B["LSTAR"][order_B]
    RBSP_B["LOWER_BAND"] = RBSP_B["LOWER_BAND"][order_B]
    RBSP_B["UPPER_BAND"] = RBSP_B["UPPER_BAND"][order_B]
    
    RBSP = [RBSP_A, RBSP_B]
    print(f"RBSP Data loaded for year : {_year}")
    
    print(f"Began loading POES Data for year : {_year}")
    
    POES = {}
    
    refs = np.load(fr"./../processed_data/chorus_neural_network/STAGE_0/MPE_DATA_PREPROCESSED_WITH_LSTAR/MPE_PREPROCESSED_DATA_T89_{_year}.npz", allow_pickle=True)
    POES_DATA = refs["DATA"].flatten()[0]
    
    for SATID in POES_DATA:
        
        SAT = POES_DATA[SATID]
        
        UNIX_TIME = []
        LSTAR = []
        MLT = []
        BLC_FLUX_0 = []
        BLC_FLUX_1 = []
        BLC_FLUX_2 = []
        BLC_FLUX_3 = []
        BLC_FLUX_4 = []
        BLC_FLUX_5 = []
        BLC_FLUX_6 = []
        BLC_FLUX_7 = []
        
        for p in range(len(SAT["UNIX_TIME"]) - 1):
            
            t1 = SAT["UNIX_TIME"][p]
            t2 = SAT["UNIX_TIME"][p + 1]
                        
            if t2 - t1 < 30.0:
                
                t_points = np.arange(t1, t2 + 1, step=2, dtype=np.float64)
                
                UNIX_TIME.append(t_points)
                LSTAR.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["Lstar"][p], SAT["Lstar"][p+1]], left=np.nan, right = np.nan))
                
                X_INTERPOLATED = np.interp(t_points, xp = [t1, t2], fp = [np.cos(SAT["MLT"][p] * 2 * np.pi / 24.0), np.cos(SAT["MLT"][p+1] * 2 * np.pi / 24.0)], left=np.nan, right=np.nan)
                Y_INTERPOLATED = np.interp(t_points, xp = [t1, t2], fp = [np.sin(SAT["MLT"][p] * 2 * np.pi / 24.0), np.sin(SAT["MLT"][p+1] * 2 * np.pi / 24.0)], left=np.nan, right=np.nan)
                ANGLE_IN_RADIANS = np.mod(np.arctan2(Y_INTERPOLATED, X_INTERPOLATED) + 2 * np.pi, 2 * np.pi)
                
                MLT.append((ANGLE_IN_RADIANS * 24.0) / (2 * np.pi))
                                
                BLC_FLUX_0.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 0], SAT["BLC_Flux"][p + 1, 0]], left=np.nan, right=np.nan))
                BLC_FLUX_1.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 1], SAT["BLC_Flux"][p + 1, 1]], left=np.nan, right=np.nan))
                BLC_FLUX_2.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 2], SAT["BLC_Flux"][p + 1, 2]], left=np.nan, right=np.nan))
                BLC_FLUX_3.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 3], SAT["BLC_Flux"][p + 1, 3]], left=np.nan, right=np.nan))
                BLC_FLUX_4.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 4], SAT["BLC_Flux"][p + 1, 4]], left=np.nan, right=np.nan))
                BLC_FLUX_5.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 5], SAT["BLC_Flux"][p + 1, 5]], left=np.nan, right=np.nan))
                BLC_FLUX_6.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 6], SAT["BLC_Flux"][p + 1, 6]], left=np.nan, right=np.nan))
                BLC_FLUX_7.append(np.interp(x = t_points, xp = [t1, t2], fp = [SAT["BLC_Flux"][p, 7], SAT["BLC_Flux"][p + 1, 7]], left=np.nan, right=np.nan))

        UNIX_TIME = np.hstack(UNIX_TIME)
        LSTAR = np.hstack(LSTAR)
        MLT = np.hstack(MLT)
        BLC_FLUX_0 = np.hstack(BLC_FLUX_0)
        BLC_FLUX_1 = np.hstack(BLC_FLUX_1)
        BLC_FLUX_2 = np.hstack(BLC_FLUX_2)
        BLC_FLUX_3 = np.hstack(BLC_FLUX_3)
        BLC_FLUX_4 = np.hstack(BLC_FLUX_4)
        BLC_FLUX_5 = np.hstack(BLC_FLUX_5)
        BLC_FLUX_6 = np.hstack(BLC_FLUX_6)
        BLC_FLUX_7 = np.hstack(BLC_FLUX_7)
        BLC_FLUX = np.hstack([np.expand_dims(BLC_FLUX_0, axis = 1),
                              np.expand_dims(BLC_FLUX_1, axis = 1),
                              np.expand_dims(BLC_FLUX_2, axis = 1), 
                              np.expand_dims(BLC_FLUX_3, axis = 1),
                              np.expand_dims(BLC_FLUX_4, axis = 1),
                              np.expand_dims(BLC_FLUX_5, axis = 1),
                              np.expand_dims(BLC_FLUX_6, axis = 1),
                              np.expand_dims(BLC_FLUX_7, axis = 1)])
        
        POES[SATID] = {"UNIX_TIME" : UNIX_TIME, 
                        "MLT" : MLT, 
                        "BLC_Flux" : BLC_FLUX, 
                        "LSTAR" : LSTAR}
    
    if not POES:
        print(f"No POES satellite coverage found for year : {_year}")
        print(f"SKIPPING YEAR : {_year}")
        continue
    
    refs.close()
    
    print(f"Finished loading POES data for year : {_year}")
    
    OMNI = chorus_machine_learning_helper.load_OMNI_year(_year)
    SUPERMAG = chorus_machine_learning_helper.load_SUPERMAG_SME_year(_year)
    
    
    #FINALLY FIND THE CONJUNCTIONS
    
    print(f"Finding CONJUNCTIONS for year : {_year}")
    CONJUNCTIONS_YEAR = []
    for SATID in POES:
                
        NUMBER_OF_RECORDS = len(POES[SATID]["UNIX_TIME"])
        CONJUNCTIONS = []
        
        print(f"Number of records: {NUMBER_OF_RECORDS} for POES SATELLITE: {SATID}")
                
        for T in tqdm.tqdm(range(NUMBER_OF_RECORDS)):
            
            UNIX_TIME = POES[SATID]["UNIX_TIME"][T]
            LSTAR = POES[SATID]["LSTAR"][T]
            MLT = POES[SATID]["MLT"][T]
            FLUX_SPECTRUM = POES[SATID]["BLC_Flux"][T, :]

            for RBSP_PROBE in RBSP:
                
                TIME_RANGE = np.searchsorted(a = RBSP_PROBE["UNIX_TIME"], v = [(UNIX_TIME - MAX_T_DIFF_SEC), (UNIX_TIME + MAX_T_DIFF_SEC)])

                CANDIDATE_TIMES = []
                CANDIDATE_LSTAR = []
                CANDIDATE_DEL_MLT = []
                CANDIDATE_UPPER_BAND = []
                CANDIDATE_LOWER_BAND = []
                
                for POINT in range(TIME_RANGE[0], TIME_RANGE[1], 1):
                    
                    DEL_LSTAR = (LSTAR - RBSP_PROBE["LSTAR"][POINT])
                    DEL_MLT = np.min( [(max(MLT, RBSP_PROBE["MLT"][POINT]) -  min(MLT, RBSP_PROBE["MLT"][POINT])),
                                      ((24 - max(MLT, RBSP_PROBE["MLT"][POINT])) + (min(MLT, RBSP_PROBE["MLT"][POINT]) - 0))])
                    
                    if (DEL_LSTAR**2 < MAX_L_DIFF**2) and (DEL_MLT**2 < MAX_MLT_DIFF**2):
                                                
                        CANDIDATE_TIMES.append(RBSP_PROBE["UNIX_TIME"][POINT])
                        CANDIDATE_LSTAR.append(RBSP_PROBE["LSTAR"][POINT])
                        CANDIDATE_DEL_MLT.append(DEL_MLT)
                        CANDIDATE_UPPER_BAND.append(RBSP_PROBE["UPPER_BAND"][POINT])
                        CANDIDATE_LOWER_BAND.append(RBSP_PROBE["LOWER_BAND"][POINT])
                        
                if len(CANDIDATE_TIMES) == 0:
                    continue
                
                TIME_RANGE = np.searchsorted(a = SUPERMAG["UNIX_TIME"], v = [(UNIX_TIME - MAX_T_DIFF_SEC), (UNIX_TIME + MAX_T_DIFF_SEC)])
                AVG_SME = np.nanmean(SUPERMAG["SME"][TIME_RANGE[0]:TIME_RANGE[1]])

                TIME_RANGE = np.searchsorted(a = OMNI["UNIX_TIME"], v = [(UNIX_TIME - MAX_T_DIFF_SEC), (UNIX_TIME + MAX_T_DIFF_SEC)])
                AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
                AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
                AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
                AVG_SYM_H = np.nanmean(OMNI["SYM_H"][TIME_RANGE[0]:TIME_RANGE[1]])
                
                if np.isfinite(AVG_SME) & np.isfinite(AVG_AVG_B) & np.isfinite(AVG_FLOW_SPEED) & np.isfinite(AVG_PROTON_DENSITY) & np.isfinite(AVG_SYM_H):
                    
                    
                    CONJUNCTION =  [UNIX_TIME, 
                                    LSTAR, 
                                    MLT,
                                    *FLUX_SPECTRUM,
                                    np.nanmean(CANDIDATE_TIMES), #TIME OF RBSP POINT CHOSEN
                                    np.nanmean(CANDIDATE_LSTAR), #LSTAR OF RBSP POINT CHOSEN
                                    np.nanmean(CANDIDATE_DEL_MLT), #DIFFERENCE IN MLT FOUND
                                    np.nanmean(CANDIDATE_UPPER_BAND), #UPPER BAND CHORUS OBSERVED
                                    np.nanmean(CANDIDATE_LOWER_BAND), #LOWER BAND CHORUS OBSERVED
                                    AVG_SME, 
                                    AVG_AVG_B,
                                    AVG_FLOW_SPEED, 
                                    AVG_PROTON_DENSITY,
                                    AVG_SYM_H]
                                                
                    CONJUNCTIONS.append(CONJUNCTION)
        

        print(f"Number of conjunctions: {len(CONJUNCTIONS)}")
        
        CONJUNCTIONS_YEAR.extend(CONJUNCTIONS)
    
    
    CONJUNCTIONS_TOTAL.extend(CONJUNCTIONS_YEAR)
    
    print(f"Total number of conjunctions so far: {len(CONJUNCTIONS_TOTAL)}")
    
CONJUNCTIONS_TO_BE_SAVED = np.vstack(CONJUNCTIONS_TOTAL)

print(f"Conjunctions to be saved: {CONJUNCTIONS_TO_BE_SAVED.shape}")

np.savez(f"./../processed_data/chorus_neural_network/STAGE_2/{VERSION}/CONJUNCTIONS_{VERSION}_{FIELD_MODEL}.npz", CONJUNCTIONS = CONJUNCTIONS_TO_BE_SAVED)


Began processing year : 2012
Began loading RBSP Data for year: 2012
RBSP Data loaded for year : 2012
Began loading POES Data for year : 2012
Finished loading POES data for year : 2012
Began loading OMNI data for year : 2012
Finished loading OMNI data for year : 2012
Began loading SUPERMAG data for year : 2012
Finished loading SUPERMAG data for year : 2012
Finding CONJUNCTIONS for year : 2012
Number of records: 11216727 for POES SATELLITE: m02


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 11216727/11216727 [11:08<00:00, 16777.30it/s]


Number of conjunctions: 8953
Number of records: 11451286 for POES SATELLITE: n15


100%|██████████| 11451286/11451286 [11:11<00:00, 17054.86it/s]


Number of conjunctions: 11836
Number of records: 11313190 for POES SATELLITE: n17


100%|██████████| 11313190/11313190 [11:01<00:00, 17112.23it/s]


Number of conjunctions: 12906
Number of records: 11786451 for POES SATELLITE: n18


100%|██████████| 11786451/11786451 [11:24<00:00, 17226.28it/s]


Number of conjunctions: 8650
Number of records: 11755449 for POES SATELLITE: n19


100%|██████████| 11755449/11755449 [11:31<00:00, 17010.28it/s]


Number of conjunctions: 5649
Total number of conjunctions so far: 47994
Began processing year : 2013
Began loading RBSP Data for year: 2013
RBSP Data loaded for year : 2013
Began loading POES Data for year : 2013
Finished loading POES data for year : 2013
Began loading OMNI data for year : 2013
Finished loading OMNI data for year : 2013
Began loading SUPERMAG data for year : 2013
Finished loading SUPERMAG data for year : 2013
Finding CONJUNCTIONS for year : 2013
Number of records: 10305126 for POES SATELLITE: m02


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 10305126/10305126 [27:53<00:00, 6156.21it/s] 


Number of conjunctions: 29102
Number of records: 10272510 for POES SATELLITE: n15


100%|██████████| 10272510/10272510 [27:54<00:00, 6135.91it/s] 


Number of conjunctions: 30428
Number of records: 1722924 for POES SATELLITE: n17


100%|██████████| 1722924/1722924 [04:39<00:00, 6164.16it/s] 


Number of conjunctions: 1922
Number of records: 10555329 for POES SATELLITE: n18


100%|██████████| 10555329/10555329 [28:39<00:00, 6137.57it/s] 


Number of conjunctions: 30840
Number of records: 10527786 for POES SATELLITE: n19


100%|██████████| 10527786/10527786 [28:40<00:00, 6119.04it/s] 


Number of conjunctions: 29368
Total number of conjunctions so far: 169654
Began processing year : 2014
Began loading RBSP Data for year: 2014
RBSP Data loaded for year : 2014
Began loading POES Data for year : 2014
Finished loading POES data for year : 2014
Began loading OMNI data for year : 2014
Finished loading OMNI data for year : 2014
Began loading SUPERMAG data for year : 2014
Finished loading SUPERMAG data for year : 2014
Finding CONJUNCTIONS for year : 2014
Number of records: 10365324 for POES SATELLITE: m01


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 10365324/10365324 [27:46<00:00, 6221.24it/s]


Number of conjunctions: 23487
Number of records: 10505463 for POES SATELLITE: m02


100%|██████████| 10505463/10505463 [28:22<00:00, 6170.78it/s]


Number of conjunctions: 23822
Number of records: 10538006 for POES SATELLITE: n15


100%|██████████| 10538006/10538006 [28:22<00:00, 6190.16it/s]


Number of conjunctions: 20230
Number of records: 4615328 for POES SATELLITE: n16


100%|██████████| 4615328/4615328 [12:28<00:00, 6162.81it/s] 


Number of conjunctions: 11952
Number of records: 10866624 for POES SATELLITE: n18


100%|██████████| 10866624/10866624 [29:17<00:00, 6182.50it/s] 


Number of conjunctions: 21543
Number of records: 10872481 for POES SATELLITE: n19


100%|██████████| 10872481/10872481 [29:25<00:00, 6156.83it/s] 


Number of conjunctions: 23471
Total number of conjunctions so far: 294159
Began processing year : 2015
Began loading RBSP Data for year: 2015
RBSP Data loaded for year : 2015
Began loading POES Data for year : 2015
Finished loading POES data for year : 2015
Began loading OMNI data for year : 2015
Finished loading OMNI data for year : 2015
Began loading SUPERMAG data for year : 2015
Finished loading SUPERMAG data for year : 2015
Finding CONJUNCTIONS for year : 2015
Number of records: 10129644 for POES SATELLITE: m01


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 10129644/10129644 [27:28<00:00, 6143.58it/s] 


Number of conjunctions: 25927
Number of records: 10144838 for POES SATELLITE: m02


100%|██████████| 10144838/10144838 [27:26<00:00, 6159.67it/s] 


Number of conjunctions: 25441
Number of records: 10114409 for POES SATELLITE: n15


100%|██████████| 10114409/10114409 [27:25<00:00, 6146.62it/s] 


Number of conjunctions: 27117
Number of records: 10414593 for POES SATELLITE: n18


100%|██████████| 10414593/10414593 [28:16<00:00, 6137.40it/s] 


Number of conjunctions: 28430
Number of records: 10573495 for POES SATELLITE: n19


100%|██████████| 10573495/10573495 [28:51<00:00, 6105.46it/s] 


Number of conjunctions: 33336
Total number of conjunctions so far: 434410
Began processing year : 2016
Began loading RBSP Data for year: 2016
RBSP Data loaded for year : 2016
Began loading POES Data for year : 2016
Finished loading POES data for year : 2016
Began loading OMNI data for year : 2016
Finished loading OMNI data for year : 2016
Began loading SUPERMAG data for year : 2016
Finished loading SUPERMAG data for year : 2016
Finding CONJUNCTIONS for year : 2016
Number of records: 10379217 for POES SATELLITE: m02


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 10379217/10379217 [27:52<00:00, 6206.12it/s] 


Number of conjunctions: 26355
Number of records: 10420462 for POES SATELLITE: n15


100%|██████████| 10420462/10420462 [28:01<00:00, 6196.42it/s] 


Number of conjunctions: 20647
Number of records: 10643241 for POES SATELLITE: n18


100%|██████████| 10643241/10643241 [28:35<00:00, 6203.08it/s] 


Number of conjunctions: 21942
Number of records: 10881780 for POES SATELLITE: n19


100%|██████████| 10881780/10881780 [29:19<00:00, 6185.65it/s] 


Number of conjunctions: 22643
Total number of conjunctions so far: 525997
Began processing year : 2017
Began loading RBSP Data for year: 2017
RBSP Data loaded for year : 2017
Began loading POES Data for year : 2017
Finished loading POES data for year : 2017
Began loading OMNI data for year : 2017
Finished loading OMNI data for year : 2017
Began loading SUPERMAG data for year : 2017
Finished loading SUPERMAG data for year : 2017
Finding CONJUNCTIONS for year : 2017
Number of records: 10404051 for POES SATELLITE: m01


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 10404051/10404051 [28:11<00:00, 6149.45it/s] 


Number of conjunctions: 27830
Number of records: 10407416 for POES SATELLITE: m02


100%|██████████| 10407416/10407416 [28:13<00:00, 6147.02it/s] 


Number of conjunctions: 27835
Number of records: 10351902 for POES SATELLITE: n15


100%|██████████| 10351902/10351902 [28:11<00:00, 6118.32it/s] 


Number of conjunctions: 28609
Number of records: 10561097 for POES SATELLITE: n18


100%|██████████| 10561097/10561097 [28:48<00:00, 6111.00it/s] 


Number of conjunctions: 28829
Number of records: 10840301 for POES SATELLITE: n19


100%|██████████| 10840301/10840301 [29:39<00:00, 6090.56it/s] 


Number of conjunctions: 28872
Total number of conjunctions so far: 667972
Began processing year : 2018
Began loading RBSP Data for year: 2018
RBSP Data loaded for year : 2018
Began loading POES Data for year : 2018
Finished loading POES data for year : 2018
Began loading OMNI data for year : 2018
Finished loading OMNI data for year : 2018
Began loading SUPERMAG data for year : 2018
Finished loading SUPERMAG data for year : 2018
Finding CONJUNCTIONS for year : 2018
Number of records: 10201857 for POES SATELLITE: m01


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 10201857/10201857 [27:45<00:00, 6123.73it/s] 


Number of conjunctions: 29803
Number of records: 10432410 for POES SATELLITE: m02


100%|██████████| 10432410/10432410 [28:20<00:00, 6134.74it/s] 


Number of conjunctions: 30300
Number of records: 10368403 for POES SATELLITE: n15


100%|██████████| 10368403/10368403 [28:14<00:00, 6118.77it/s] 


Number of conjunctions: 29064
Number of records: 10660286 for POES SATELLITE: n18


100%|██████████| 10660286/10660286 [28:59<00:00, 6129.71it/s] 


Number of conjunctions: 31664
Number of records: 10838917 for POES SATELLITE: n19


100%|██████████| 10838917/10838917 [29:29<00:00, 6126.01it/s] 


Number of conjunctions: 26640
Total number of conjunctions so far: 815443
Began processing year : 2019
Began loading RBSP Data for year: 2019
RBSP Data loaded for year : 2019
Began loading POES Data for year : 2019
Finished loading POES data for year : 2019
Began loading OMNI data for year : 2019
Finished loading OMNI data for year : 2019
Began loading SUPERMAG data for year : 2019
Finished loading SUPERMAG data for year : 2019
Finding CONJUNCTIONS for year : 2019
Number of records: 10017532 for POES SATELLITE: m02


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 10017532/10017532 [18:14<00:00, 9149.49it/s]  


Number of conjunctions: 11182
Number of records: 9066605 for POES SATELLITE: m03


100%|██████████| 9066605/9066605 [17:53<00:00, 8447.34it/s]  


Number of conjunctions: 11160
Number of records: 10070766 for POES SATELLITE: n15


100%|██████████| 10070766/10070766 [18:28<00:00, 9085.56it/s]  


Number of conjunctions: 16017
Number of records: 10372732 for POES SATELLITE: n18


100%|██████████| 10372732/10372732 [18:47<00:00, 9197.38it/s]  


Number of conjunctions: 13626
Number of records: 10486100 for POES SATELLITE: n19


100%|██████████| 10486100/10486100 [19:03<00:00, 9169.72it/s]  


Number of conjunctions: 21436
Total number of conjunctions so far: 888864
Conjunctions to be saved: (888864, 21)


In [11]:
#Stage 3, Look at the data and make sure its good enough, then remove solar proton events
VERSION = "v1a"
FIELD_MODEL = "T89"

CONJUNCTIONS_REFS = np.load(f"./../processed_data/chorus_neural_network/STAGE_2/{VERSION}/CONJUNCTIONS_{VERSION}_{FIELD_MODEL}.npz")

CONJUNCTIONS_TESTING = CONJUNCTIONS_REFS["CONJUNCTIONS"]

CONJUNCTIONS_REFS.close()

In [9]:

'''CONJUNCTION =  [UNIX_TIME, 
                    LSTAR, 
                    MLT,
                    *FLUX_SPECTRUM,
                    TOTAL_TIME / NUM_CANDIDATES, #TIME OF RBSP POINT CHOSEN
                    TOTAL_LSTAR / NUM_CANDIDATES, #LSTAR OF RBSP POINT CHOSEN
                    TOTAL_DEL_MLT / NUM_CANDIDATES, #DIFFERENCE IN MLT FOUND
                    TOTAL_UPPER_BAND / NUM_CANDIDATES, #UPPER BAND CHORUS OBSERVED
                    TOTAL_LOWER_BAND / NUM_CANDIDATES, #LOWER BAND CHORUS OBSERVED
                    AVG_SME, 
                    AVG_AVG_B,
                    AVG_FLOW_SPEED, 
                    AVG_PROTON_DENSITY,
                    AVG_SYM_H]'''

C_POES_TIME = CONJUNCTIONS_TESTING[:, 0]
C_POES_LSTAR = CONJUNCTIONS_TESTING[:, 1]
C_POES_MLT = CONJUNCTIONS_TESTING[:, 2]
C_POES_FLUX = CONJUNCTIONS_TESTING[:, 3:-10]
C_RBSP_TIME = CONJUNCTIONS_TESTING[:, -10]
C_RBSP_LSTAR = CONJUNCTIONS_TESTING[:, -9]
C_RBSP_DEL_MLT = CONJUNCTIONS_TESTING[:, -8]
C_RBSP_UPPER_BAND = CONJUNCTIONS_TESTING[:, -7]
C_RBSP_LOWER_BAND = CONJUNCTIONS_TESTING[:, -6]
C_AVG_SME = CONJUNCTIONS_TESTING[:, -5]
C_AVG_AVG_B = CONJUNCTIONS_TESTING[:, -4]
C_AVG_FLOW_SPEED = CONJUNCTIONS_TESTING[:, -3]
C_AVG_PROTON_DENSITY = CONJUNCTIONS_TESTING[:, -2]
C_AVG_SYM_H = CONJUNCTIONS_TESTING[:, -1]

with open(f"./../processed_data/chorus_neural_network/STAGE_2/{VERSION}/CONJUNCTIONS_{VERSION}_FIELD_MODEL.txt", "w") as f:
    f.write("\nConjunctions:\n")
    f.write(f"Number of conjunctions: {CONJUNCTIONS_TESTING.shape[0]} [#]\n")
    f.write(f"Minimum RBSP Time: {np.min(C_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum RBSP Time: {np.max(C_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Minimum POES Time: {np.min(C_POES_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum POES Time: {np.max(C_POES_TIME)} [seconds since unix epoch]\n")
    
    f.write(f"\nL:\n")
    f.write(f"Mean Difference: {np.mean(C_POES_LSTAR - C_RBSP_LSTAR)} [L]\n")
    f.write(f"Standard deviation of Difference {np.std(C_POES_LSTAR - C_RBSP_LSTAR)} [L]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(C_POES_LSTAR - C_RBSP_LSTAR))} [L]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(C_POES_LSTAR - C_RBSP_LSTAR))} [L]\n")

    f.write("\nMLT: \n")
    f.write(f"Mean Absolute Difference: {np.mean(C_RBSP_DEL_MLT)} [MLT]\n")
    f.write(f"Standard deviation of Absolute Difference {np.std(C_RBSP_DEL_MLT)} [MLT]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(C_RBSP_DEL_MLT))} [MLT]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(C_RBSP_DEL_MLT))} [MLT]\n")

    f.write(f"\nTime: \n")
    f.write(f"Mean Difference: {np.mean(C_POES_TIME - C_RBSP_TIME)} [s]\n")
    f.write(f"Standard deviation of Difference {np.std(C_POES_TIME - C_RBSP_TIME)} [s]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(C_POES_TIME - C_RBSP_TIME))} [s]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(C_POES_TIME - C_RBSP_TIME))} [s]\n")

    f.write(f"\nUpper Band Chorus: \n")
    f.write(f"Mean: {np.mean(C_RBSP_UPPER_BAND)} [pT]\n")
    f.write(f"Standard Deviation: {np.std(C_RBSP_UPPER_BAND)} [pT]\n")
    f.write(f"Minimum: {np.min(C_RBSP_UPPER_BAND)} [pT]\n")
    f.write(f"Maximum: {np.max(C_RBSP_UPPER_BAND)} [pT]\n")
    
    f.write(f"\nLower Band Chorus: \n")
    f.write(f"Mean: {np.mean(C_RBSP_LOWER_BAND)} [pT]\n")
    f.write(f"Standard Deviation: {np.std(C_RBSP_LOWER_BAND)} [pT]\n")
    f.write(f"Minimum: {np.min(C_RBSP_LOWER_BAND)} [pT]\n")
    f.write(f"Maximum: {np.max(C_RBSP_LOWER_BAND)} [pT]\n")


    f.write(f"\nSME: \n")
    f.write(f"Mean: {np.mean(C_AVG_SME)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_SME)} [nT]\n")
    f.write(f"Minimum: {np.min(C_AVG_SME)} [nT]\n")
    f.write(f"Maximum: {np.max(C_AVG_SME)} [nT]\n")

    f.write(f"\nAVG_B: \n")
    f.write(f"Mean: {np.mean(C_AVG_AVG_B)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_AVG_B)} [nT]\n")
    f.write(f"Minimum: {np.min(C_AVG_AVG_B)} [nT]\n")
    f.write(f"Maximum: {np.max(C_AVG_AVG_B)} [nT]\n")

    f.write(f"\nFlow Speed: \n")
    f.write(f"Mean: {np.mean(C_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Minimum: {np.min(C_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Maximum: {np.max(C_AVG_FLOW_SPEED)} [km/s]\n")
    
    f.write(f"\nProton Density: \n")
    f.write(f"Mean: {np.mean(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Minimum: {np.min(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Maximum: {np.max(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    
    f.write(f"\nSYM_H: \n")
    f.write(f"Mean: {np.mean(C_AVG_SYM_H)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_SYM_H)} [nT]\n")
    f.write(f"Minimum: {np.min(C_AVG_SYM_H)} [nT]\n")
    f.write(f"Maximum: {np.max(C_AVG_SYM_H)} [nT]\n")


In [10]:
plt.title("RBSP - Closest POES L Shell Comparison")
plt.xlabel("RBSP L-Shell")
plt.ylabel("Closest POES L-Shell")
plt.hlines(y = 4, xmin=1, xmax=7, color="black")
plt.vlines(x = 4, ymin=1, ymax=7, color="black")

plt.scatter(C_RBSP_LSTAR, C_POES_LSTAR)

print(f"Mean difference: {np.mean(C_POES_LSTAR - C_RBSP_LSTAR)}")
print(f"Standard deviation of difference {np.std(C_POES_LSTAR - C_RBSP_LSTAR)}")
print(f"Maximum difference : {np.max(C_POES_LSTAR - C_RBSP_LSTAR)}")


Mean difference: -0.0012989820414318393
Standard deviation of difference 0.062155422303787004
Maximum difference : 0.09999985918294563


In [2]:
#Stage 3 Continued, Removing solar proton events!

VERSION = "v1a"
FIELD_MODEL = "T89"

CONJUNCTIONS_REFS = np.load(f"./../processed_data/chorus_neural_network/STAGE_2/{VERSION}/CONJUNCTIONS_{VERSION}_{FIELD_MODEL}.npz")

CONJUNCTIONS = CONJUNCTIONS_REFS["CONJUNCTIONS"]

CONJUNCTIONS_REFS.close()

SOLAR_PROTON_EVENT_LIST = pd.read_csv(f"./../processed_data/chorus_neural_network/SOLAR_PROTON_EVENT_LIST_1976_2024.csv")


In [3]:
'''CONJUNCTION =  [UNIX_TIME, 
                    L, 
                    MLT,
                    *FLUX_SPECTRUM,
                    candidate[0], #TIME
                    candidate[1], #L
                    candidate[2], #MLT
                    candidate[3], #del_MLT
                    candidate[4], #CHORUS
                    AVG_SME, 
                    AVG_AVG_B,
                    AVG_FLOW_SPEED, 
                    AVG_PROTON_DENSITY,
                    AVG_SYM_H]'''


order_to_sort_conjunctions = np.argsort(CONJUNCTIONS[:, 0]) #Sorted based on POES Conjunction time!
SORTED_CONJUNCTIONS = CONJUNCTIONS[order_to_sort_conjunctions, :]

print(f"Starting shape of conjunctions list: {SORTED_CONJUNCTIONS.shape}")

SORTED_POES_CONJUNCTION_TIMES = SORTED_CONJUNCTIONS[:, 0]

START_OF_SEP_EVENTS_UTC = SOLAR_PROTON_EVENT_LIST["START"]
END_OF_SEP_EVENTS_UTC = SOLAR_PROTON_EVENT_LIST["END"]
ZIPPED_EVENTS = list(zip(START_OF_SEP_EVENTS_UTC, END_OF_SEP_EVENTS_UTC))

print(f"Removing high energy solar proton events!")

for SEP_EVENT in tqdm.tqdm(range(len(ZIPPED_EVENTS))):
    
    START = ZIPPED_EVENTS[SEP_EVENT][0].strip()
    END = ZIPPED_EVENTS[SEP_EVENT][1].strip()
    
    START_YMDHMS = {'year': int(START[0:4]), 'month': int(START[5:7]), 'day': int(START[8:10]), 'hour': int(START[11:13]), 'minute': int(START[13:15]), 'second': 0}
    END_YMDHMS = {'year': int(END[0:4]), 'month': int(END[5:7]), 'day': int(END[8:10]), 'hour': int(END[11:13]), 'minute': int(END[13:15]), 'second': 0}
    
    START_UNIX = astropy.time.Time(START_YMDHMS, format="ymdhms", scale='utc').unix
    END_UNIX = astropy.time.Time(END_YMDHMS, format="ymdhms", scale='utc').unix

    RANGE_TO_REMOVE = np.searchsorted(a = SORTED_POES_CONJUNCTION_TIMES, v = [START_UNIX, END_UNIX])
    
    SORTED_CONJUNCTIONS = np.vstack((SORTED_CONJUNCTIONS[0:RANGE_TO_REMOVE[0], :], SORTED_CONJUNCTIONS[RANGE_TO_REMOVE[1]:, :]))

print(f"Finished removing high energy solar proton events!")

print(f"Saving!")

CLEANED_CONJUNCTIONS = SORTED_CONJUNCTIONS #Should be cleaned by now!

np.savez(f"./../processed_data/chorus_neural_network/STAGE_3/{VERSION}/CLEANED_CONJUNCTIONS_{VERSION}_{FIELD_MODEL}.npz",
        CONJUNCTIONS=CLEANED_CONJUNCTIONS)

C_POES_TIME = CLEANED_CONJUNCTIONS[:, 0]
C_POES_LSTAR = CLEANED_CONJUNCTIONS[:, 1]
C_POES_MLT = CLEANED_CONJUNCTIONS[:, 2]
C_POES_FLUX = CLEANED_CONJUNCTIONS[:, 3:-10]
C_RBSP_TIME = CLEANED_CONJUNCTIONS[:, -10]
C_RBSP_LSTAR = CLEANED_CONJUNCTIONS[:, -9]
C_RBSP_DEL_MLT = CLEANED_CONJUNCTIONS[:, -8]
C_RBSP_UPPER_BAND = CLEANED_CONJUNCTIONS[:, -7]
C_RBSP_LOWER_BAND = CLEANED_CONJUNCTIONS[:, -6]
C_AVG_SME = CLEANED_CONJUNCTIONS[:, -5]
C_AVG_AVG_B = CLEANED_CONJUNCTIONS[:, -4]
C_AVG_FLOW_SPEED = CLEANED_CONJUNCTIONS[:, -3]
C_AVG_PROTON_DENSITY = CLEANED_CONJUNCTIONS[:, -2]
C_AVG_SYM_H = CLEANED_CONJUNCTIONS[:, -1]

print(f"Creating documentation of dataset!")


with open(f"./../processed_data/chorus_neural_network/STAGE_3/{VERSION}/CLEANED_CONJUNCTIONS_{VERSION}_{FIELD_MODEL}.txt", "w") as f:
    f.write("\nConjunctions:\n")
    f.write(f"Number of conjunctions: {CLEANED_CONJUNCTIONS.shape[0]} [#]\n")
    f.write(f"Number lost from cleaning solar proton events: {CONJUNCTIONS.shape[0] - CLEANED_CONJUNCTIONS.shape[0]} [#]\n")
    f.write(f"Minimum RBSP Time: {np.min(C_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum RBSP Time: {np.max(C_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Minimum POES Time: {np.min(C_POES_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum POES Time: {np.max(C_POES_TIME)} [seconds since unix epoch]\n")
    
    f.write(f"\nL:\n")
    f.write(f"Mean Difference: {np.mean(C_POES_LSTAR - C_RBSP_LSTAR)} [L]\n")
    f.write(f"Standard deviation of Difference {np.std(C_POES_LSTAR - C_RBSP_LSTAR)} [L]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(C_POES_LSTAR - C_RBSP_LSTAR))} [L]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(C_POES_LSTAR - C_RBSP_LSTAR))} [L]\n")

    f.write("\nMLT: \n")
    f.write(f"Mean Absolute Difference: {np.mean(C_RBSP_DEL_MLT)} [MLT]\n")
    f.write(f"Standard deviation of Absolute Difference {np.std(C_RBSP_DEL_MLT)} [MLT]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(C_RBSP_DEL_MLT))} [MLT]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(C_RBSP_DEL_MLT))} [MLT]\n")

    f.write(f"\nTime: \n")
    f.write(f"Mean Difference: {np.mean(C_POES_TIME - C_RBSP_TIME)} [s]\n")
    f.write(f"Standard deviation of Difference {np.std(C_POES_TIME - C_RBSP_TIME)} [s]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(C_POES_TIME - C_RBSP_TIME))} [s]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(C_POES_TIME - C_RBSP_TIME))} [s]\n")

    f.write(f"\nUpper Band Chorus: \n")
    f.write(f"Mean: {np.mean(C_RBSP_UPPER_BAND)} [pT]\n")
    f.write(f"Standard Deviation: {np.std(C_RBSP_UPPER_BAND)} [pT]\n")
    f.write(f"Minimum: {np.min(C_RBSP_UPPER_BAND)} [pT]\n")
    f.write(f"Maximum: {np.max(C_RBSP_UPPER_BAND)} [pT]\n")
    
    f.write(f"\nLower Band Chorus: \n")
    f.write(f"Mean: {np.mean(C_RBSP_LOWER_BAND)} [pT]\n")
    f.write(f"Standard Deviation: {np.std(C_RBSP_LOWER_BAND)} [pT]\n")
    f.write(f"Minimum: {np.min(C_RBSP_LOWER_BAND)} [pT]\n")
    f.write(f"Maximum: {np.max(C_RBSP_LOWER_BAND)} [pT]\n")


    f.write(f"\nSME: \n")
    f.write(f"Mean: {np.mean(C_AVG_SME)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_SME)} [nT]\n")
    f.write(f"Minimum: {np.min(C_AVG_SME)} [nT]\n")
    f.write(f"Maximum: {np.max(C_AVG_SME)} [nT]\n")

    f.write(f"\nAVG_B: \n")
    f.write(f"Mean: {np.mean(C_AVG_AVG_B)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_AVG_B)} [nT]\n")
    f.write(f"Minimum: {np.min(C_AVG_AVG_B)} [nT]\n")
    f.write(f"Maximum: {np.max(C_AVG_AVG_B)} [nT]\n")

    f.write(f"\nFlow Speed: \n")
    f.write(f"Mean: {np.mean(C_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Minimum: {np.min(C_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Maximum: {np.max(C_AVG_FLOW_SPEED)} [km/s]\n")
    
    f.write(f"\nProton Density: \n")
    f.write(f"Mean: {np.mean(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Minimum: {np.min(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Maximum: {np.max(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    
    f.write(f"\nSYM_H: \n")
    f.write(f"Mean: {np.mean(C_AVG_SYM_H)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_SYM_H)} [nT]\n")
    f.write(f"Minimum: {np.min(C_AVG_SYM_H)} [nT]\n")
    f.write(f"Maximum: {np.max(C_AVG_SYM_H)} [nT]\n")

print(f"Finished!")
print(f"Ending shape of conjunctions : {CLEANED_CONJUNCTIONS.shape}")

Starting shape of conjunctions list: (888864, 21)
Removing high energy solar proton events!


  0%|          | 0/309 [00:00<?, ?it/s]

100%|██████████| 309/309 [00:05<00:00, 60.11it/s]


Finished removing high energy solar proton events!
Saving!
Creating documentation of dataset!
Finished!
Ending shape of conjunctions : (882645, 21)


In [4]:
#Stage 4, Create datasets used for training, testing, etc

VERSION = "v1a"
FIELD_MODEL = "T89"
MODEL_TYPE = "UPPER_BAND"


CONJUNCTIONS_REFS = np.load(f"./../processed_data/chorus_neural_network/STAGE_3/{VERSION}/CLEANED_CONJUNCTIONS_{VERSION}_{FIELD_MODEL}.npz")

CONJUNCTIONS = CONJUNCTIONS_REFS["CONJUNCTIONS"]

CONJUNCTIONS_REFS.close()

In [5]:
print(CONJUNCTIONS.shape)

C_RBSP_TIME = CONJUNCTIONS[:, -10]

jan1_unix = astropy.time.Time({"year":2016, "month":1, "day":1, "hour":0, "minute":0, "second":0}, format="ymdhms", scale="utc").unix
apr1_unix = astropy.time.Time({"year":2016, "month":4, "day":1, "hour":0, "minute":0, "second":0}, format="ymdhms", scale="utc").unix

where_between_feb1_apr1_2013 = (jan1_unix < C_RBSP_TIME) & (C_RBSP_TIME < apr1_unix)

train_test_subset_selected = ~where_between_feb1_apr1_2013
validation_subset_selected = where_between_feb1_apr1_2013

print(f"Number of conjunctions between feb1 and apr1 2013: {np.count_nonzero(where_between_feb1_apr1_2013)}")

C_POES_TIME = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, 0], axis = 1)
C_POES_LSTAR = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, 1], axis = 1)
C_POES_MLT = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, 2], axis = 1)
C_POES_FLUX = CONJUNCTIONS[train_test_subset_selected, 3:-10]
C_RBSP_TIME = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -10], axis = 1)
C_RBSP_LSTAR = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -9], axis = 1)
C_RBSP_DEL_MLT = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -8], axis = 1)
C_RBSP_UPPER_BAND = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -7], axis = 1)
C_RBSP_LOWER_BAND = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -6], axis = 1)
C_AVG_SME = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -5], axis = 1)
C_AVG_AVG_B = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -4], axis = 1)
C_AVG_FLOW_SPEED = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -3], axis = 1)
C_AVG_PROTON_DENSITY = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -2], axis = 1)
C_AVG_SYM_H = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -1], axis = 1)

print(C_RBSP_TIME.shape)
print(C_RBSP_LSTAR.shape)
print(C_RBSP_UPPER_BAND.shape)
print(C_RBSP_LOWER_BAND.shape)
print(C_POES_TIME.shape)
print(C_POES_LSTAR.shape)
print(C_POES_MLT.shape)
print(C_RBSP_DEL_MLT.shape)
print(C_POES_FLUX.shape)
print(C_AVG_SME.shape)
print(C_AVG_AVG_B.shape)
print(C_AVG_FLOW_SPEED.shape)
print(C_AVG_PROTON_DENSITY.shape)
print(C_AVG_SYM_H.shape)

mean_LSTAR = np.nanmean(C_POES_LSTAR)
std_LSTAR = np.std(C_POES_LSTAR)

mean_fluxes = np.expand_dims(np.nanmean(np.log(C_POES_FLUX), axis = 0), axis=0)
std_fluxes = np.expand_dims(np.nanstd(np.log(C_POES_FLUX), axis = 0), axis = 0)

mean_sme = np.nanmean(C_AVG_SME)
std_sme = np.std(C_AVG_SME)

mean_avg_b = np.nanmean(C_AVG_AVG_B)
std_avg_b = np.std(C_AVG_AVG_B)

mean_flow_speed = np.nanmean(C_AVG_FLOW_SPEED)
std_flow_speed = np.std(C_AVG_FLOW_SPEED)

mean_avg_proton_density = np.nanmean(C_AVG_PROTON_DENSITY)
std_avg_proton_density = np.std(C_AVG_PROTON_DENSITY)

mean_avg_sym_h = np.nanmean(C_AVG_SYM_H)
std_avg_sym_h = np.std(C_AVG_SYM_H)

FEATURES = np.hstack(((C_POES_LSTAR - mean_LSTAR) / std_LSTAR, 
                      np.sin((C_POES_MLT * 2 * np.pi) / 24.0), 
                      np.cos((C_POES_MLT * 2 * np.pi) / 24.0),
                      ((np.log(C_POES_FLUX) - mean_fluxes) / std_fluxes)[:, [0, -1]],
                      (C_AVG_SME - mean_sme)  / std_sme,
                      (C_AVG_AVG_B - mean_avg_b) / std_avg_b,
                      (C_AVG_FLOW_SPEED - mean_flow_speed) / std_flow_speed,
                      (C_AVG_PROTON_DENSITY - mean_avg_proton_density) / std_avg_proton_density,
                      (C_AVG_SYM_H - mean_avg_sym_h) / std_avg_sym_h))


#SMALL VALIDATION SET:
C_POES_TIME_V = np.expand_dims(CONJUNCTIONS[validation_subset_selected, 0], axis = 1)
C_POES_L_V = np.expand_dims(CONJUNCTIONS[validation_subset_selected, 1], axis = 1)
C_POES_MLT_V = np.expand_dims(CONJUNCTIONS[validation_subset_selected, 2], axis = 1)
C_POES_FLUX_V = CONJUNCTIONS[validation_subset_selected, 3:-10]
C_RBSP_TIME_V = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -10], axis = 1)
C_RBSP_L_V = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -9], axis = 1)
C_RBSP_DEL_MLT_V = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -8], axis = 1)
C_RBSP_UPPER_BAND_V = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -7], axis = 1)
C_RBSP_LOWER_BAND_V = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -6], axis = 1)
C_AVG_SME_V = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -5], axis = 1)
C_AVG_AVG_B_V = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -4], axis = 1)
C_AVG_FLOW_SPEED_V = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -3], axis = 1)
C_AVG_PROTON_DENSITY_V = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -2], axis = 1)
C_AVG_SYM_H_V = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -1], axis = 1)

print(C_RBSP_TIME_V.shape)
print(C_RBSP_L_V.shape)
print(C_RBSP_UPPER_BAND_V.shape)
print(C_RBSP_LOWER_BAND_V.shape)
print(C_POES_TIME_V.shape)
print(C_POES_L_V.shape)
print(C_POES_MLT_V.shape)
print(C_RBSP_DEL_MLT_V.shape)
print(C_POES_FLUX_V.shape)
print(C_AVG_SME_V.shape)
print(C_AVG_AVG_B_V.shape)
print(C_AVG_FLOW_SPEED_V.shape)
print(C_AVG_PROTON_DENSITY_V.shape)
print(C_AVG_SYM_H_V.shape)


VALIDATION_FEATURES = np.hstack(((C_POES_L_V - mean_LSTAR) / std_LSTAR, 
                                np.sin((C_POES_MLT_V * 2 * np.pi) / 24.0), 
                                np.cos((C_POES_MLT_V * 2 * np.pi) / 24.0),
                                ((np.log(C_POES_FLUX_V) - mean_fluxes) / std_fluxes)[:, [0, -1]],
                                (C_AVG_SME_V - mean_sme) / std_sme,
                                (C_AVG_AVG_B_V - mean_avg_b) / std_avg_b,
                                (C_AVG_FLOW_SPEED_V - mean_flow_speed) / std_flow_speed,
                                (C_AVG_PROTON_DENSITY_V - mean_avg_proton_density) / std_avg_proton_density,
                                (C_AVG_SYM_H_V - mean_avg_sym_h) / std_avg_sym_h))
        
if MODEL_TYPE == "UPPER_BAND":
        MODEL_LABELS = C_RBSP_UPPER_BAND
        MODEL_LABELS_V = C_RBSP_UPPER_BAND_V
elif MODEL_TYPE == "LOWER_BAND":
        MODEL_LABELS = C_RBSP_LOWER_BAND
        MODEL_LABELS_V = C_RBSP_LOWER_BAND_V

np.savez(f"./../processed_data/chorus_neural_network/STAGE_4/{VERSION}/MODEL_READY_DATA_{VERSION}_{FIELD_MODEL}_{MODEL_TYPE}.npz",
        FEATURES = FEATURES,
        LABELS = MODEL_LABELS,
        VALIDATION_FEATURES = VALIDATION_FEATURES,
        VALIDATION_LABELS = MODEL_LABELS_V,
        TRAINING_MLT = C_POES_MLT,
        MEAN_FLUXES = mean_fluxes,
        STD_FLUXES = std_fluxes,
        MEAN_SME = mean_sme,
        STD_SME = std_sme,
        MEAN_AVG_B = mean_avg_b,
        STD_AVG_B = std_avg_b,
        MEAN_FLOW_SPEED = mean_flow_speed,
        STD_FLOW_SPEED = std_flow_speed,
        MEAN_AVG_PROTON_DENSITY = mean_avg_proton_density,
        STD_AVG_PROTON_DENSITY = std_avg_proton_density,
        MEAN_AVG_SYM_H = mean_avg_sym_h,
        STD_AVG_SYM_H = std_avg_sym_h)

(882645, 21)
Number of conjunctions between feb1 and apr1 2013: 16887
(865758, 1)
(865758, 1)
(865758, 1)
(865758, 1)
(865758, 1)
(865758, 1)
(865758, 1)
(865758, 1)
(865758, 8)
(865758, 1)
(865758, 1)
(865758, 1)
(865758, 1)
(865758, 1)
(16887, 1)
(16887, 1)
(16887, 1)
(16887, 1)
(16887, 1)
(16887, 1)
(16887, 1)
(16887, 1)
(16887, 8)
(16887, 1)
(16887, 1)
(16887, 1)
(16887, 1)
(16887, 1)
