In [4]:
import sys
import os
# caution: path[0] is reserved for script path (or '' in REPL).
sys.path.insert(1, os.path.abspath('./../src'))


import data_loader
import datetime
from cdflib.epochs_astropy import CDFAstropy as cdfepoch
import numpy as np
import matplotlib.pyplot as plt

import astropy.time
import pandas as pd
import tqdm
import rbsp_chorus_tool

import spacepy.irbempy
import spacepy.coordinates
import spacepy.time
import os_helper
import tqdm

import chorus_machine_learning_helper

import importlib

importlib.reload(data_loader)
importlib.reload(rbsp_chorus_tool)
%matplotlib qt

In [5]:
#Stage 0

def calculate_lstar_for_chunk(time, lat, lon, chunk : tuple):
    
    ticks = spacepy.time.Ticktock(time[chunk[0] : chunk[-1]], dtype="UNX")
    
    alt = np.array([807 for i in range(len(time[chunk[0] : chunk[-1]]))])
    
    coords_data = np.vstack((alt, lat[chunk[0] : chunk[-1]], lon[chunk[0] : chunk[-1]]))
    coords = spacepy.coordinates.Coords(data=coords_data.T, carsph="sph", dtype="GDZ", units=["km", "deg", "deg"])
    
    return spacepy.irbempy.get_Lstar(ticks = ticks, loci = coords, alpha=0, extMag="T89")["Lstar"].flatten()


year = 1998
start_of_year_UNIX_TIME = datetime.datetime(year = year, month = 1, day = 1).timestamp()
end_of_year_UNIX_TIME = datetime.datetime(year = year + 1, month = 1, day = 1).timestamp()
equally_spaced_minutes = np.arange(start = start_of_year_UNIX_TIME, stop = end_of_year_UNIX_TIME + 60, step = 60)

MPE = chorus_machine_learning_helper.load_MPE_year(year)

print("Loaded the following satellites:")
print([(s["SATID"], len(s["time"])) for s in MPE])

Finished loading POES data for year : 1998
Loaded the following satellites:
[('n15', 775836)]


In [6]:
data_processed = {}

for SAT in MPE:
    
    unix_times_of_averages = []
    avg_geog_lat = []
    avg_geog_lon = []
    
    for MINUTE in equally_spaced_minutes:
        
        TIME_RANGE = np.searchsorted(a = SAT["UNIX_TIME"], v = [(MINUTE - 30), (MINUTE + 30)])
        
        if (TIME_RANGE[0] != TIME_RANGE[1]):
            
            unix_times_of_averages.append(MINUTE)    
            avg_geog_lat.append(np.nanmean(SAT["geogLat"][TIME_RANGE[0] : TIME_RANGE[1]]))
            y_of_lon = np.nanmean(np.sin(np.deg2rad(SAT["geogLon"][TIME_RANGE[0] : TIME_RANGE[1]])))
            x_of_lon = np.nanmean(np.cos(np.deg2rad(SAT["geogLon"][TIME_RANGE[0] : TIME_RANGE[1]])))            
            avg_geog_lon.append(np.mod((np.rad2deg(np.arctan2(y_of_lon, x_of_lon)) + 360), 360))
        
    unix_times_of_averages = np.array(unix_times_of_averages)
    avg_geog_lat = np.array(avg_geog_lat)
    avg_geog_lon = np.array(avg_geog_lon)
    
    unix_times_of_averages = unix_times_of_averages[np.isfinite(unix_times_of_averages)]
    avg_geog_lat = avg_geog_lat[np.isfinite(avg_geog_lat)]
    avg_geog_lon = avg_geog_lon[np.isfinite(avg_geog_lon)]
    
    big_distances = np.nonzero(((unix_times_of_averages[1:] - unix_times_of_averages[:-1]) > 60))[0] + 1
        
    queued_work = []
    cut_size = 5000
    
    N = len(unix_times_of_averages)
    N_CUTS = N // cut_size
    
    if N % cut_size != 0:
        N_CUTS += 1
    
    for i in range(N_CUTS):
        
        if ((i+1) * cut_size) > N:
            queued_work.append((i * cut_size, N))
        else:
            queued_work.append((i * cut_size, (i+1) * cut_size))
            
        break
                        
    #--------------------------------------------------

    print(queued_work)
            
    lstar_in_chunks = []
    
    for work in tqdm.tqdm(queued_work):
        
        lstar_in_chunks.append(calculate_lstar_for_chunk(unix_times_of_averages, avg_geog_lat, avg_geog_lon, work))
            
    lstar_calculated = np.abs(np.hstack(lstar_in_chunks))
        
    lstar_interpolated = np.zeros_like(SAT["UNIX_TIME"])
    lstar_interpolated[:] = np.nan
    
    if len(big_distances) > 0:
        
        print("There were big distances to deal with, probably should check the data!")
                
        for m, d in enumerate(big_distances):
            
            if m == 0 :
                
                start_index = 0
                end_index = d
                
            else:
                
                start_index = big_distances[m - 1]
                end_index = d
                
            interpolated_between_big_distances = np.interp(SAT["UNIX_TIME"], unix_times_of_averages[start_index:end_index], lstar_calculated[start_index:end_index], left=np.nan, right=np.nan)
            non_nan_values = np.isfinite(interpolated_between_big_distances)
            lstar_interpolated[non_nan_values] = interpolated_between_big_distances[non_nan_values]
        
        #Get the last chunk
        
        start_index = big_distances[-1]
        
        interpolated_between_big_distances = np.interp(SAT["UNIX_TIME"], unix_times_of_averages[start_index:], lstar_calculated[start_index:], left=np.nan, right=np.nan)
        non_nan_values = np.isfinite(interpolated_between_big_distances)
        lstar_interpolated[non_nan_values] = interpolated_between_big_distances[non_nan_values]

    else:
        
        lstar_interpolated = np.interp(SAT["UNIX_TIME"], unix_times_of_averages, lstar_calculated, left=np.nan, right=np.nan)
    
    print(f"Finished processing data for : {SAT["SATID"]}")
    
    data_processed[SAT["SATID"]] = {"UNIX_TIME" : SAT["UNIX_TIME"],
                                    "BLC_Angle" : SAT["BLC_Angle"],
                                    "BLC_Flux" : SAT["BLC_Flux"],
                                    "MLT" : SAT["MLT"],
                                    "Lstar": lstar_interpolated,
                                    "L" : SAT["lValue"],
                                    "geogLat" : SAT["geogLat"],
                                    "geogLon" : SAT["geogLon"]}

output_dir = os.path.abspath(os.path.join("./../processed_data_chorus_neural_network/STAGE_0/MPE_DATA_PREPROCESSED_WITH_LSTAR"))
os_helper.verify_output_dir_exists(output_dir, force_creation = True, hint="Output directory for L*")

print(f"Saving data for : {year} to : {output_dir}")

np.savez(file = os.path.abspath(os.path.join(output_dir, f"MPE_PREPROCESSED_DATA_T89_{year}_Test.npz")), DATA = data_processed)

[(0, 5000)]


100%|██████████| 1/1 [00:09<00:00,  9.85s/it]

Time taken: 9.699092388153076
There were big distances to deal with, probably should check the data!





ValueError: fp and xp are not of the same length.

In [7]:
points = np.array([(lstar_calculated[i], avg_geog_lat[i], avg_geog_lon[i]) for i in range(len(lstar_calculated))])[0:25]

print(points)

[[         nan -18.23640013 295.16536712]
 [         nan -14.47854948 294.27519226]
 [         nan -11.81563314 293.65886471]
 [         nan  -7.89704967 292.76546478]
 [         nan  -4.1329748  291.91870117]
 [         nan  -1.30914995 291.28744507]
 [         nan   2.92729986 290.34155273]
 [         nan   6.22227478 289.60281372]
 [         nan   9.98727489 288.75057984]
 [         nan  13.28026613 287.99417114]
 [         nan  16.57139969 287.22281647]
 [         nan  20.32972527 286.31859589]
 [         nan  24.08417463 285.38284303]
 [         nan  27.36526616 284.53241984]
 [         nan  30.64169979 283.6426697 ]
 [         nan  34.37982464 282.57037356]
 [         nan  38.10979843 281.42317967]
 [         nan  41.36563238 280.34506227]
 [         nan  44.61224842 279.17397317]
 [         nan  48.30984783 277.70134749]
 [         nan  51.99032307 276.04101585]
 [         nan  55.19343313 274.39277149]
 [         nan  58.37449741 272.48529885]
 [         nan  75.27612495 139.41

In [26]:
dt_for_all = [datetime.datetime.fromtimestamp(t) for t in SAT["UNIX_TIME"]]

plt.plot(dt_for_all, lstar_interpolated, color="black")
plt.plot(dt_for_all, SAT["lValue"])

plt.show()

KeyError: 'lValue'

In [27]:
refs = np.load(fr"./../processed_data_chorus_neural_network/STAGE_0/MPE_DATA_PREPROCESSED_WITH_LSTAR/MPE_PREPROCESSED_DATA_T89_1998.npz", allow_pickle=True)

print(refs)

DATA = refs["DATA"].flatten()[0]
print(DATA)
SAT = DATA['n15']
print(type(SAT))

dt_for_all = [datetime.datetime.fromtimestamp(t) for t in SAT["UNIX_TIME"]]

plt.plot(dt_for_all, SAT["Lstar"], label="L*", color = "red")
plt.plot(dt_for_all, SAT["L"], label = "IGRF Lm", color = "black")
plt.legend()

plt.show()

NpzFile './../processed_data_chorus_neural_network/STAGE_0/MPE_DATA_PREPROCESSED_WITH_LSTAR/MPE_PREPROCESSED_DATA_T89_1998.npz' with keys: DATA
{'n15': {'UNIX_TIME': array([8.99337638e+08, 8.99337654e+08, 8.99337670e+08, ...,
       9.15062372e+08, 9.15062388e+08, 9.15062404e+08]), 'BLC_Angle': array([60.0606569 , 60.22211631, 60.40531253, ..., 49.47817342,
       49.62623049, 49.7966486 ]), 'BLC_Flux': array([[1.37677305e+04, 9.73845659e+03, 6.84373393e+03, ...,
        2.24349889e+03, 1.50539311e+03, 9.91196794e+02],
       [1.54514233e+04, 1.02519394e+04, 6.79301765e+03, ...,
        1.95346492e+03, 1.28230903e+03, 8.38451713e+02],
       [1.30353254e+04, 8.68605184e+03, 5.78025291e+03, ...,
        1.68366880e+03, 1.10983673e+03, 7.28665827e+02],
       ...,
       [1.18595952e+00, 1.25027509e+00, 1.31661439e+00, ...,
        1.51755838e+00, 1.58062968e+00, 1.63832745e+00],
       [1.06218402e+00, 1.12195677e+00, 1.18419677e+00, ...,
        1.37558442e+00, 1.43558087e+00, 1.489882

  el.exec() if hasattr(el, "exec") else el.exec_()


In [None]:
#Interface for stage 1, Designed to do a year at a time

year = 2019

In [None]:
#Stage 1 RBSP Chorus Preprocessing, Obtains clean chorus amplitudes

start = datetime.datetime(year = year, month = 1, day = 1)
end = datetime.datetime(year = year + 1, month = 1, day = 1)

WNA_survey_a = data_loader.load_raw_data_from_config(id=["RBSP", "EMFISIS", "L4", "WNA_SURVEY"],
                                                     start=start,
                                                     end=end,
                                                     satellite="a")

WNA_survey_b = data_loader.load_raw_data_from_config(id=["RBSP", "EMFISIS", "L4", "WNA_SURVEY"],
                                                     start=start,
                                                     end=end,
                                                     satellite="b")

WFR_spectral_matrix_a = data_loader.load_raw_data_from_config(id=["RBSP", "EMFISIS", "L2", "WFR_SPECTRAL_MATRIX_DIAGONAL"],
                                                              start=start,
                                                              end=end,
                                                              satellite="a")

WFR_spectral_matrix_b = data_loader.load_raw_data_from_config(id=["RBSP", "EMFISIS", "L2", "WFR_SPECTRAL_MATRIX_DIAGONAL"],
                                                              start=start,
                                                              end=end,
                                                              satellite="b")

In [None]:
mlt_A = WNA_survey_a["MLT"]
L_A = WNA_survey_a["L"]
epoch_A = WNA_survey_a["Epoch"]

time_A = astropy.time.Time(cdfepoch.to_datetime(epoch_A), format="datetime").utc

chorus_A = rbsp_chorus_tool.iterate_through_days_and_calculate_chorus_amplitudes(WNA_survey = WNA_survey_a,
                                                                                 WFR_spectral_matrix = WFR_spectral_matrix_a)


within_epoch_range_A = (start < time_A) & (time_A < end)
finite_chorus_A = np.isfinite(chorus_A)
#This line might not be necessary but we want to train on clean data, literally any np.nan will fuck it ALL up. Ill probably double check before training
all_positive_coordinates_A = (epoch_A > 0) & (mlt_A > 0) & (L_A > 0)

epoch_A = epoch_A[within_epoch_range_A & finite_chorus_A & all_positive_coordinates_A]
L_A = L_A[within_epoch_range_A & finite_chorus_A & all_positive_coordinates_A]
mlt_A = mlt_A[within_epoch_range_A & finite_chorus_A & all_positive_coordinates_A]
chorus_A = chorus_A[within_epoch_range_A & finite_chorus_A & all_positive_coordinates_A]


mlt_B = WNA_survey_b["MLT"]
L_B = WNA_survey_b["L"]
epoch_B = WNA_survey_b["Epoch"]
time_B = astropy.time.Time(cdfepoch.to_datetime(epoch_B), format="datetime").utc

chorus_B = rbsp_chorus_tool.iterate_through_days_and_calculate_chorus_amplitudes(WNA_survey = WNA_survey_b,
                                                                                 WFR_spectral_matrix = WFR_spectral_matrix_b)

within_epoch_range_B = (start < time_B) & (time_B < end)
finite_chorus_B = np.isfinite(chorus_B)
all_positive_coordinates_B = (epoch_B > 0) & (mlt_B > 0) & (L_B > 0)

epoch_B = epoch_B[within_epoch_range_B & finite_chorus_B & all_positive_coordinates_B]
L_B = L_B[within_epoch_range_B & finite_chorus_B & all_positive_coordinates_B]
mlt_B = mlt_B[within_epoch_range_B & finite_chorus_B & all_positive_coordinates_B]
chorus_B = chorus_B[within_epoch_range_B & finite_chorus_B & all_positive_coordinates_B]

print(epoch_A.shape)
print(chorus_A.shape)
print(L_A.shape)

print(epoch_B.shape)
print(chorus_B.shape)
print(L_B.shape)

In [None]:
#Save the RBSP stage 1 data, might honestly only need one stage
np.savez(file = os.path.abspath(f"./../processed_data_chorus_neural_network/STAGE_1/RBSP_OBSERVED_CHORUS_{year}.npz"), 
         EPOCH_A = epoch_A, 
         MLT_A = mlt_A, 
         L_A = L_A, 
         CHORUS_A = chorus_A,
         EPOCH_B = epoch_B, 
         MLT_B = mlt_B, 
         L_B = L_B, 
         CHORUS_B = chorus_B)


In [7]:
#Stage 2, clean then combine RBSP, OMNI, and POES Data and find conjunctions between RBSP and POES

MAX_L_DIFF = 0.10
MAX_MLT_DIFF = 1.5
MAX_T_DIFF_SEC = 60

L_SCALE = (1.0 / MAX_L_DIFF)**2
MLT_SCALE = (1.0 / MAX_MLT_DIFF)**2
TIME_SCALE = (1.0 / MAX_T_DIFF_SEC)**2

CONJUNCTIONS_TOTAL = []

for _year in range(2012, 2020, 1):
    
    print(f"Began processing year : {_year}")
    
    print(f"Began loading RBSP Data for year: {_year}")
    refs = np.load(f"./../processed_data_chorus_neural_network/STAGE_1/RBSP_OBSERVED_CHORUS_{_year}.npz")
    RBSP_A = {}
    RBSP_A["EPOCH"] = refs["EPOCH_A"]
    RBSP_A["MLT"] = refs["MLT_A"]
    RBSP_A["L"] = refs["L_A"]
    RBSP_A["CHORUS"] = refs["CHORUS_A"]
    
    RBSP_B = {}
    RBSP_B["EPOCH"] = refs["EPOCH_B"]
    RBSP_B["MLT"] = refs["MLT_B"]
    RBSP_B["L"] = refs["L_B"]
    RBSP_B["CHORUS"] = refs["CHORUS_B"]
    
    refs.close()
    
    RBSP_A["UNIX_TIME"] = cdfepoch.unixtime(RBSP_A["EPOCH"])
    RBSP_B["UNIX_TIME"] = cdfepoch.unixtime(RBSP_B["EPOCH"])
    
    order_A = np.argsort(RBSP_A["UNIX_TIME"])
    order_B = np.argsort(RBSP_B["UNIX_TIME"])
    
    RBSP_A["UNIX_TIME"] = RBSP_A["UNIX_TIME"][order_A]
    RBSP_A["EPOCH"] = RBSP_A["EPOCH"][order_A]
    RBSP_A["MLT"] = RBSP_A["MLT"][order_A]
    RBSP_A["L"] = RBSP_A["L"][order_A]
    RBSP_A["CHORUS"] = RBSP_A["CHORUS"][order_A]

    RBSP_B["UNIX_TIME"] = RBSP_B["UNIX_TIME"][order_B]
    RBSP_B["EPOCH"] = RBSP_B["EPOCH"][order_B]
    RBSP_B["MLT"] = RBSP_B["MLT"][order_B]
    RBSP_B["L"] = RBSP_B["L"][order_B]
    RBSP_B["CHORUS"] = RBSP_B["CHORUS"][order_B]
    
    RBSP = [RBSP_A, RBSP_B]
    print(f"RBSP Data loaded for year : {_year}")
    
    print(f"Began loading POES Data for year : {_year}")
    POES = []
    
    for SAT in ["m01", "m02", "m03", "n15", "n16", "n17", "n18", "n19"]:
        
        POES_sat_refs = data_loader.load_raw_data_from_config(id=["POES", "SEM", "MPE"],
                                                              satellite=SAT,
                                                              start=datetime.datetime(year=_year, month=1, day=1),
                                                              end=datetime.datetime(year=_year, month=12, day=31, hour=23, minute=59, second=59))
        
        if POES_sat_refs:
            
            
            #This was done cause I wanted to scale the MLT before cleaning but Im lazy
            if _year < 2014:
                POES_sat_refs["MLT"] = (POES_sat_refs["MLT"] / 360.0) * 24.0
            
            valid_times = np.isfinite(POES_sat_refs["time"]) & (0 < POES_sat_refs["time"])
            valid_BLC_Angle = np.isfinite(POES_sat_refs["BLC_Angle"]) & (0 < POES_sat_refs["BLC_Angle"])
            valid_BLC_Flux = np.all(np.isfinite(POES_sat_refs["BLC_Flux"][:, :8]), axis=1) & np.all((0 < POES_sat_refs["BLC_Flux"][:, :8]), axis=1)
            valid_MLT = np.isfinite(POES_sat_refs["MLT"]) & (0 < POES_sat_refs["MLT"]) & (POES_sat_refs["MLT"] < 24)
            valid_L = np.isfinite(POES_sat_refs["lValue"]) & (0 < POES_sat_refs["lValue"]) & (POES_sat_refs["lValue"] < 10)
            valid_points = valid_times & valid_BLC_Angle & valid_BLC_Flux & valid_MLT & valid_L
            
            if np.any(valid_points):
                
                POES_sat_refs["time"] = POES_sat_refs["time"][valid_points]
                POES_sat_refs["BLC_Angle"] = POES_sat_refs["BLC_Angle"][valid_points]
                POES_sat_refs["BLC_Flux"] = POES_sat_refs["BLC_Flux"][valid_points, :8]
                POES_sat_refs["MLT"] = POES_sat_refs["MLT"][valid_points]
                POES_sat_refs["lValue"] = POES_sat_refs["lValue"][valid_points]
            
                if _year < 2014:
                                    
                    POES_sat_refs["UNIX_TIME"] = cdfepoch.unixtime(POES_sat_refs["time"])
                else:
                    POES_sat_refs["UNIX_TIME"] = (POES_sat_refs["time"] / 1000)
                    
                #Sort them so assumptions for binary search are satisfied:
                order = np.argsort(POES_sat_refs["UNIX_TIME"])
                POES_sat_refs["time"] = POES_sat_refs["time"][order]
                POES_sat_refs["UNIX_TIME"] = POES_sat_refs["UNIX_TIME"][order]
                POES_sat_refs["BLC_Angle"] = POES_sat_refs["BLC_Angle"][order]
                POES_sat_refs["BLC_Flux"] = POES_sat_refs["BLC_Flux"][order, :]
                POES_sat_refs["MLT"] = POES_sat_refs["MLT"][order]
                POES_sat_refs["lValue"] = POES_sat_refs["lValue"][order]
                
                POES.append(POES_sat_refs)
    
    if not POES:
        print(f"No POES satellite coverage found for year : {_year}")
        print(f"SKIPPING YEAR : {_year}")
        continue
    
    print(f"Finished loading POES data for year : {_year}")
    
    print(f"Began loading SUPERMAG data for year : {_year}")
    SUPERMAG_df = pd.read_csv(f"./../processed_data_chorus_neural_network/SUPERMAG_SME/sme_{_year}.csv")
    SUPERMAG = {}
    
    valid_SME = np.isfinite(SUPERMAG_df["SME"]) & (0 < SUPERMAG_df["SME"])
    
    if not np.any(valid_SME):
        print(f"No valid SME for year : {_year}")
        print(f"SKIPPING YEAR : {_year}")
        continue
    
    SUPERMAG["SME"] = np.array(SUPERMAG_df["SME"][valid_SME])
    SUPERMAG["Date_UTC"] = np.array(SUPERMAG_df["Date_UTC"][valid_SME])
    SUPERMAG["UNIX_TIME"] = astropy.time.Time(SUPERMAG["Date_UTC"].astype(str), scale="utc", in_subfmt='date_hms').unix
    
    order = np.argsort(SUPERMAG["UNIX_TIME"])
    SUPERMAG["SME"] = SUPERMAG["SME"][order]
    SUPERMAG["Date_UTC"] = SUPERMAG["Date_UTC"][order]
    SUPERMAG["UNIX_TIME"] = SUPERMAG["UNIX_TIME"][order]
    
    print(f"Finished loading SUPERMAG data for year : {_year}")
    
    print(f"Began loading OMNI data for year : {_year}")
    OMNI_refs = data_loader.load_raw_data_from_config(id = ["OMNI", "ONE_MIN_RESOLUTION"], 
                                                 start = datetime.datetime(year=_year, month=1, day=1),
                                                 end = datetime.datetime(year=_year, month=12, day=31, hour=23, minute=59, second=59))
    OMNI = {}
    
    valid_times = np.isfinite(OMNI_refs["Epoch"]) & (0 < OMNI_refs["Epoch"])
    valid_AVG_B = np.isfinite(OMNI_refs["F"]) & (0 <= OMNI_refs["F"]) & (OMNI_refs["F"] < 9990)
    valid_FLOW_SPEED = np.isfinite(OMNI_refs["flow_speed"]) & (0 <= OMNI_refs["flow_speed"]) & (OMNI_refs["flow_speed"] < 99900)
    valid_PROTON_DENSITY = np.isfinite(OMNI_refs["proton_density"]) & (-900 <= OMNI_refs["proton_density"]) & (OMNI_refs["proton_density"] < 900)
    valid_SYM_H = np.isfinite(OMNI_refs["SYM_H"]) & (-99000 <= OMNI_refs["SYM_H"]) & (OMNI_refs["SYM_H"] < 99900)
    valid_points = valid_times & valid_AVG_B & valid_FLOW_SPEED & valid_PROTON_DENSITY & valid_SYM_H
    
    if(not np.any(valid_points)):
        print(f"No valid OMNI DATA for year : {_year}")
        print(f"SKIPPING YEAR : {_year}")
        continue
    
    OMNI["EPOCH"] = OMNI_refs["Epoch"][valid_points]
    OMNI["UNIX_TIME"] = cdfepoch.unixtime(OMNI_refs["Epoch"][valid_points])
    OMNI["AVG_B"] = OMNI_refs["F"][valid_points]
    OMNI["FLOW_SPEED"] = OMNI_refs["flow_speed"][valid_points]
    OMNI["PROTON_DENSITY"] = OMNI_refs["proton_density"][valid_points]
    OMNI["SYM_H"] = OMNI_refs["SYM_H"][valid_points]
    
    order = np.argsort(OMNI["UNIX_TIME"])
    OMNI["EPOCH"] = OMNI["EPOCH"][order]
    OMNI["UNIX_TIME"] = OMNI["UNIX_TIME"][order]
    OMNI["AVG_B"] = OMNI["AVG_B"][order]
    OMNI["FLOW_SPEED"] = OMNI["FLOW_SPEED"][order]
    OMNI["PROTON_DENSITY"] = OMNI["PROTON_DENSITY"][order]
    OMNI["SYM_H"] = OMNI["SYM_H"][order]
    
    print(f"Finished loading OMNI data for year : {_year}")
    
    print(f"Finding CONJUNCTIONS for year : {_year}")
    CONJUNCTIONS_YEAR = []
    for POES_SAT in POES:
        
        NUMBER_OF_RECORDS = len(POES_SAT["UNIX_TIME"])
        CONJUNCTIONS = []
        
        print(f"Number of records: {NUMBER_OF_RECORDS}")
                
        for T in tqdm.tqdm(range(NUMBER_OF_RECORDS)):
            
            UNIX_TIME = POES_SAT["UNIX_TIME"][T]
            L = POES_SAT["lValue"][T]
            MLT = POES_SAT["MLT"][T]
            FLUX_SPECTRUM = POES_SAT["BLC_Flux"][T, :]
                        
            for RBSP_PROBE in RBSP:
                
                TIME_RANGE = np.searchsorted(a = RBSP_PROBE["UNIX_TIME"], v = [(UNIX_TIME - MAX_T_DIFF_SEC), (UNIX_TIME + MAX_T_DIFF_SEC)])

                NUM_CANDIDATES = 0
                TOTAL_TIME = 0
                TOTAL_L = 0
                TOTAL_DEL_MLT = 0
                TOTAL_CHORUS = 0
                
                for POINT in range(TIME_RANGE[0], TIME_RANGE[1], 1):
                    
                    del_L = (L - RBSP_PROBE["L"][POINT])
                    del_MLT = np.min( [(max(MLT, RBSP_PROBE["MLT"][POINT]) -  min(MLT, RBSP_PROBE["MLT"][POINT])),
                                      ((24 - max(MLT, RBSP_PROBE["MLT"][POINT])) + (min(MLT, RBSP_PROBE["MLT"][POINT]) - 0))])
                    
                    if (del_L**2 < MAX_L_DIFF**2) and (del_MLT**2 < MAX_MLT_DIFF**2):
                                                
                        NUM_CANDIDATES += 1
                        TOTAL_TIME += RBSP_PROBE["UNIX_TIME"][POINT]
                        TOTAL_L += RBSP_PROBE["L"][POINT]
                        TOTAL_DEL_MLT += del_MLT
                        TOTAL_CHORUS += RBSP_PROBE["CHORUS"][POINT]
                        
                if NUM_CANDIDATES == 0:
                    continue
                
                TIME_RANGE = np.searchsorted(a = SUPERMAG["UNIX_TIME"], v = [(UNIX_TIME - MAX_T_DIFF_SEC), (UNIX_TIME + MAX_T_DIFF_SEC)])
                AVG_SME = np.nanmean(SUPERMAG["SME"][TIME_RANGE[0]:TIME_RANGE[1]])

                TIME_RANGE = np.searchsorted(a = OMNI["UNIX_TIME"], v = [(UNIX_TIME - MAX_T_DIFF_SEC), (UNIX_TIME + MAX_T_DIFF_SEC)])
                AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
                AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
                AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
                AVG_SYM_H = np.nanmean(OMNI["SYM_H"][TIME_RANGE[0]:TIME_RANGE[1]])
                
                if np.isfinite(AVG_SME) & np.isfinite(AVG_AVG_B) & np.isfinite(AVG_FLOW_SPEED) & np.isfinite(AVG_PROTON_DENSITY) & np.isfinite(AVG_SYM_H):
                    
                    
                    CONJUNCTION =  [UNIX_TIME, 
                                    L, 
                                    MLT,
                                    *FLUX_SPECTRUM,
                                    TOTAL_TIME / NUM_CANDIDATES, #TIME
                                    TOTAL_L / NUM_CANDIDATES, #L
                                    0,
                                    TOTAL_DEL_MLT / NUM_CANDIDATES, #del_MLT
                                    TOTAL_CHORUS / NUM_CANDIDATES, #CHORUS
                                    AVG_SME, 
                                    AVG_AVG_B,
                                    AVG_FLOW_SPEED, 
                                    AVG_PROTON_DENSITY,
                                    AVG_SYM_H]
                                                
                    CONJUNCTIONS.append(CONJUNCTION)
        

        print(f"Number of conjunctions: {len(CONJUNCTIONS)}")
        
        CONJUNCTIONS_YEAR.extend(CONJUNCTIONS)
    
    
    CONJUNCTIONS_TOTAL.extend(CONJUNCTIONS_YEAR)
    
    print(f"Total number of conjunctions so far: {len(CONJUNCTIONS_TOTAL)}")

CONJUNCTIONS_TO_BE_SAVED = np.vstack(CONJUNCTIONS_TOTAL)

print(f"Conjunctions to be saved: {CONJUNCTIONS_TO_BE_SAVED.shape}")

np.savez(f"./../processed_data_chorus_neural_network/STAGE_2/v5a/CONJUNCTIONS_v5a.npz",
        CONJUNCTIONS=CONJUNCTIONS_TO_BE_SAVED)


Began processing year : 2012
Began loading RBSP Data for year: 2012
RBSP Data loaded for year : 2012
Began loading POES Data for year : 2012




Finished loading POES data for year : 2012
Began loading SUPERMAG data for year : 2012
Finished loading SUPERMAG data for year : 2012
Began loading OMNI data for year : 2012
Finished loading OMNI data for year : 2012
Finding CONJUNCTIONS for year : 2012
Number of records: 1542503


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 1542503/1542503 [01:46<00:00, 14538.39it/s]


Number of conjunctions: 1658
Number of records: 1564594


100%|██████████| 1564594/1564594 [01:45<00:00, 14777.09it/s] 


Number of conjunctions: 1493
Number of records: 1564390


100%|██████████| 1564390/1564390 [01:44<00:00, 14910.46it/s] 


Number of conjunctions: 1470
Number of records: 1578310


100%|██████████| 1578310/1578310 [01:44<00:00, 15070.82it/s] 


Number of conjunctions: 1483
Number of records: 1571588


100%|██████████| 1571588/1571588 [01:46<00:00, 14787.11it/s] 


Number of conjunctions: 1454
Total number of conjunctions so far: 7558
Began processing year : 2013
Began loading RBSP Data for year: 2013
RBSP Data loaded for year : 2013
Began loading POES Data for year : 2013
Finished loading POES data for year : 2013
Began loading SUPERMAG data for year : 2013
Finished loading SUPERMAG data for year : 2013
Began loading OMNI data for year : 2013
Finished loading OMNI data for year : 2013
Finding CONJUNCTIONS for year : 2013
Number of records: 1409288


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 1409288/1409288 [04:26<00:00, 5287.22it/s]


Number of conjunctions: 3560
Number of records: 1397910


100%|██████████| 1397910/1397910 [04:23<00:00, 5300.86it/s]


Number of conjunctions: 3854
Number of records: 229970


100%|██████████| 229970/229970 [00:43<00:00, 5262.83it/s]


Number of conjunctions: 610
Number of records: 1409589


100%|██████████| 1409589/1409589 [04:25<00:00, 5315.18it/s]


Number of conjunctions: 3986
Number of records: 1403042


100%|██████████| 1403042/1403042 [04:23<00:00, 5323.81it/s]


Number of conjunctions: 3936
Total number of conjunctions so far: 23504
Began processing year : 2014
Began loading RBSP Data for year: 2014
RBSP Data loaded for year : 2014
Began loading POES Data for year : 2014
Finished loading POES data for year : 2014
Began loading SUPERMAG data for year : 2014
Finished loading SUPERMAG data for year : 2014
Began loading OMNI data for year : 2014
Finished loading OMNI data for year : 2014
Finding CONJUNCTIONS for year : 2014
Number of records: 1465525


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 1465525/1465525 [04:37<00:00, 5283.07it/s]


Number of conjunctions: 3863
Number of records: 1484433


100%|██████████| 1484433/1484433 [04:41<00:00, 5279.88it/s]


Number of conjunctions: 3976
Number of records: 1479400


100%|██████████| 1479400/1479400 [04:39<00:00, 5292.52it/s]


Number of conjunctions: 3614
Number of records: 642159


100%|██████████| 642159/642159 [02:00<00:00, 5316.61it/s]


Number of conjunctions: 1696
Number of records: 1496242


100%|██████████| 1496242/1496242 [04:42<00:00, 5290.83it/s]


Number of conjunctions: 3593
Number of records: 1485999


100%|██████████| 1485999/1485999 [04:41<00:00, 5278.48it/s]


Number of conjunctions: 3634
Total number of conjunctions so far: 43880
Began processing year : 2015
Began loading RBSP Data for year: 2015
RBSP Data loaded for year : 2015
Began loading POES Data for year : 2015
Finished loading POES data for year : 2015
Began loading SUPERMAG data for year : 2015
Finished loading SUPERMAG data for year : 2015
Began loading OMNI data for year : 2015
Finished loading OMNI data for year : 2015
Finding CONJUNCTIONS for year : 2015
Number of records: 1446924


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 1446924/1446924 [04:35<00:00, 5257.24it/s]


Number of conjunctions: 3624
Number of records: 1447074


100%|██████████| 1447074/1447074 [04:34<00:00, 5272.62it/s]


Number of conjunctions: 3548
Number of records: 1437649


100%|██████████| 1437649/1437649 [04:32<00:00, 5274.02it/s]


Number of conjunctions: 3803
Number of records: 1455435


100%|██████████| 1455435/1455435 [04:36<00:00, 5262.30it/s]


Number of conjunctions: 3902
Number of records: 1454915


100%|██████████| 1454915/1454915 [04:35<00:00, 5279.23it/s]


Number of conjunctions: 4116
Total number of conjunctions so far: 62873
Began processing year : 2016
Began loading RBSP Data for year: 2016
RBSP Data loaded for year : 2016
Began loading POES Data for year : 2016
Finished loading POES data for year : 2016
Began loading SUPERMAG data for year : 2016
Finished loading SUPERMAG data for year : 2016
Began loading OMNI data for year : 2016
Finished loading OMNI data for year : 2016
Finding CONJUNCTIONS for year : 2016
Number of records: 1477986


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 1477986/1477986 [04:40<00:00, 5269.81it/s]


Number of conjunctions: 4029
Number of records: 1479857


100%|██████████| 1479857/1479857 [04:40<00:00, 5269.45it/s]


Number of conjunctions: 3738
Number of records: 1495294


100%|██████████| 1495294/1495294 [04:44<00:00, 5254.57it/s]


Number of conjunctions: 4162
Number of records: 1498334


100%|██████████| 1498334/1498334 [04:45<00:00, 5252.09it/s]


Number of conjunctions: 3820
Total number of conjunctions so far: 78622
Began processing year : 2017
Began loading RBSP Data for year: 2017
RBSP Data loaded for year : 2017
Began loading POES Data for year : 2017
Finished loading POES data for year : 2017
Began loading SUPERMAG data for year : 2017
Finished loading SUPERMAG data for year : 2017
Began loading OMNI data for year : 2017
Finished loading OMNI data for year : 2017
Finding CONJUNCTIONS for year : 2017
Number of records: 1482805


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 1482805/1482805 [04:41<00:00, 5264.15it/s]


Number of conjunctions: 3836
Number of records: 1481869


100%|██████████| 1481869/1481869 [04:41<00:00, 5258.57it/s]


Number of conjunctions: 3878
Number of records: 1475238


100%|██████████| 1475238/1475238 [04:41<00:00, 5239.66it/s]


Number of conjunctions: 4130
Number of records: 1489607


100%|██████████| 1489607/1489607 [04:43<00:00, 5255.37it/s]


Number of conjunctions: 3974
Number of records: 1496300


100%|██████████| 1496300/1496300 [04:48<00:00, 5186.63it/s]


Number of conjunctions: 4139
Total number of conjunctions so far: 98579
Began processing year : 2018
Began loading RBSP Data for year: 2018
RBSP Data loaded for year : 2018
Began loading POES Data for year : 2018
Finished loading POES data for year : 2018
Began loading SUPERMAG data for year : 2018
Finished loading SUPERMAG data for year : 2018
Began loading OMNI data for year : 2018
Finished loading OMNI data for year : 2018
Finding CONJUNCTIONS for year : 2018
Number of records: 1443579


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 1443579/1443579 [04:37<00:00, 5209.19it/s]


Number of conjunctions: 4197
Number of records: 1472708


100%|██████████| 1472708/1472708 [04:42<00:00, 5218.73it/s]


Number of conjunctions: 4284
Number of records: 1466108


100%|██████████| 1466108/1466108 [04:42<00:00, 5182.82it/s]


Number of conjunctions: 3968
Number of records: 1491747


100%|██████████| 1491747/1491747 [04:45<00:00, 5224.64it/s]


Number of conjunctions: 4334
Number of records: 1490983


100%|██████████| 1490983/1490983 [04:45<00:00, 5214.34it/s]


Number of conjunctions: 4450
Total number of conjunctions so far: 119812
Began processing year : 2019
Began loading RBSP Data for year: 2019
RBSP Data loaded for year : 2019
Began loading POES Data for year : 2019
Finished loading POES data for year : 2019
Began loading SUPERMAG data for year : 2019
Finished loading SUPERMAG data for year : 2019
Began loading OMNI data for year : 2019
Finished loading OMNI data for year : 2019
Finding CONJUNCTIONS for year : 2019
Number of records: 1412994


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 1412994/1412994 [02:59<00:00, 7853.86it/s]  


Number of conjunctions: 2372
Number of records: 1273453


100%|██████████| 1273453/1273453 [02:55<00:00, 7246.98it/s]  


Number of conjunctions: 2295
Number of records: 1423905


100%|██████████| 1423905/1423905 [03:01<00:00, 7830.54it/s]  


Number of conjunctions: 2330
Number of records: 1450116


100%|██████████| 1450116/1450116 [03:04<00:00, 7852.97it/s]  


Number of conjunctions: 2428
Number of records: 1449804


100%|██████████| 1449804/1449804 [03:04<00:00, 7871.10it/s]  


Number of conjunctions: 2484
Total number of conjunctions so far: 131721
Conjunctions to be saved: (131721, 21)


In [8]:
#Stage 3, Look at the data and make sure its good enough, then remove solar proton events
version = "v5a"

CONJUNCTIONS_REFS = np.load(f"./../processed_data_chorus_neural_network/STAGE_2/{version}/CONJUNCTIONS_{version}.npz")

CONJUNCTIONS_TESTING = CONJUNCTIONS_REFS["CONJUNCTIONS"]

CONJUNCTIONS_REFS.close()

In [9]:

'''PERCENTAGES_COVERED = [25.7899, 
                       25.7872,
                       16.9213,
                       16.8872,
                       23.2865,
                       23.2174,
                       21.2469,
                       21.1556,
                       17.7844,
                       17.9721,
                       22.3004,
                       22.5147,
                       23.2432,
                       23.2491,
                       20.0671,
                       19.0985] these were for v2a'''

'''CONJUNCTION =  [UNIX_TIME, 
                    L, 
                    MLT,
                    *FLUX_SPECTRUM,
                    candidate[0], #TIME
                    candidate[1], #L
                    candidate[2], #MLT
                    candidate[3], #del_MLT
                    candidate[4], #CHORUS
                    AVG_SME, 
                    AVG_AVG_B,
                    AVG_FLOW_SPEED, 
                    AVG_PROTON_DENSITY,
                    AVG_SYM_H]'''

CONJUNCTIONS_POES_TIME = CONJUNCTIONS_TESTING[:, 0]
CONJUNCTIONS_POES_L = CONJUNCTIONS_TESTING[:, 1]
CONJUNCTIONS_POES_MLT = CONJUNCTIONS_TESTING[:, 2]
CONJUNCTIONS_POES_FLUX = CONJUNCTIONS_TESTING[:, 3:-10]
CONJUNCTIONS_RBSP_TIME = CONJUNCTIONS_TESTING[:, -10]
CONJUNCTIONS_RBSP_L = CONJUNCTIONS_TESTING[:, -9]
CONJUNCTIONS_RBSP_MLT = CONJUNCTIONS_TESTING[:, -8]
CONJUNCTIONS_RBSP_DEL_MLT = CONJUNCTIONS_TESTING[:, -7]
CONJUNCTIONS_RBSP_CHORUS = CONJUNCTIONS_TESTING[:, -6]
CONJUNCTIONS_AVG_SME = CONJUNCTIONS_TESTING[:, -5]
CONJUNCTIONS_AVG_AVG_B = CONJUNCTIONS_TESTING[:, -4]
CONJUNCTIONS_AVG_FLOW_SPEED = CONJUNCTIONS_TESTING[:, -3]
CONJUNCTIONS_AVG_PROTON_DENSITY = CONJUNCTIONS_TESTING[:, -2]
CONJUNCTIONS_AVG_SYM_H = CONJUNCTIONS_TESTING[:, -1]

with open(f"./../processed_data_chorus_neural_network/STAGE_2/{version}/CONJUNCTIONS_{version}.txt", "w") as f:
    f.write("\nConjunctions:\n")
    f.write(f"Number of conjunctions: {CONJUNCTIONS_TESTING.shape[0]} [#]\n")
    f.write(f"Minimum RBSP Time: {np.min(CONJUNCTIONS_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum RBSP Time: {np.max(CONJUNCTIONS_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Minimum POES Time: {np.min(CONJUNCTIONS_POES_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum POES Time: {np.max(CONJUNCTIONS_POES_TIME)} [seconds since unix epoch]\n")
    
    f.write(f"\nL:\n")
    f.write(f"Mean Difference: {np.mean(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)} [L]\n")
    f.write(f"Standard deviation of Difference {np.std(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)} [L]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L))} [L]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L))} [L]\n")

    f.write("\nMLT: \n")
    f.write(f"Mean Absolute Difference: {np.mean(CONJUNCTIONS_RBSP_DEL_MLT)} [MLT]\n")
    f.write(f"Standard deviation of Absolute Difference {np.std(CONJUNCTIONS_RBSP_DEL_MLT)} [MLT]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(CONJUNCTIONS_RBSP_DEL_MLT))} [MLT]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(CONJUNCTIONS_RBSP_DEL_MLT))} [MLT]\n")

    f.write(f"\nTime: \n")
    f.write(f"Mean Difference: {np.mean(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME)} [s]\n")
    f.write(f"Standard deviation of Difference {np.std(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME)} [s]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME))} [s]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME))} [s]\n")

    f.write(f"\nChorus: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")


    f.write(f"\nSME: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_SME)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_SME)} [nT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_SME)} [nT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_SME)} [nT]\n")

    f.write(f"\nAVG_B: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")

    f.write(f"\nFlow Speed: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    
    f.write(f"\nProton Density: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    
    f.write(f"\nSYM_H: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")


In [None]:
plt.title("RBSP - Closest POES L Shell Comparison")
plt.xlabel("RBSP L-Shell")
plt.ylabel("Closest POES L-Shell")
plt.hlines(y = 4, xmin=1, xmax=7, color="black")
plt.vlines(x = 4, ymin=1, ymax=7, color="black")

plt.scatter(CONJUNCTIONS_RBSP_L, CONJUNCTIONS_POES_L)

print(f"Mean difference: {np.mean(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)}")
print(f"Standard deviation of difference {np.std(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)}")
print(f"Maximum difference : {np.max(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)}")


In [None]:
plt.title("RBSP MLT - Closest POES MLT")
plt.xlabel("RBSP MLT")
plt.ylabel("Closest POES MLT")
plt.hlines(y = 12, xmin=0, xmax=25, color="black")
plt.vlines(x = 12, ymin=0, ymax=25, color="black")

plt.scatter(CONJUNCTIONS_RBSP_MLT, CONJUNCTIONS_POES_MLT)

print(f"Mean difference: {np.mean(CONJUNCTIONS_RBSP_DEL_MLT)}")
print(f"Standard deviation of difference {np.std(CONJUNCTIONS_RBSP_DEL_MLT)}")
print(f"Maximum difference : {np.max(CONJUNCTIONS_RBSP_DEL_MLT)}")

In [12]:
#Stage 3 Continued, Removing solar proton events!

version = "v5a"

CONJUNCTIONS_REFS = np.load(f"./../processed_data_chorus_neural_network/STAGE_2/{version}/CONJUNCTIONS_{version}.npz")

CONJUNCTIONS = CONJUNCTIONS_REFS["CONJUNCTIONS"]

CONJUNCTIONS_REFS.close()

SOLAR_PROTON_EVENT_LIST = pd.read_csv(f"./../processed_data_chorus_neural_network/SOLAR_PROTON_EVENT_LIST_1976_2024.csv")


In [13]:
'''CONJUNCTION =  [UNIX_TIME, 
                    L, 
                    MLT,
                    *FLUX_SPECTRUM,
                    candidate[0], #TIME
                    candidate[1], #L
                    candidate[2], #MLT
                    candidate[3], #del_MLT
                    candidate[4], #CHORUS
                    AVG_SME, 
                    AVG_AVG_B,
                    AVG_FLOW_SPEED, 
                    AVG_PROTON_DENSITY,
                    AVG_SYM_H]'''


order_to_sort_conjunctions = np.argsort(CONJUNCTIONS[:, 0]) #Sorted based on POES Conjunction time!
SORTED_CONJUNCTIONS = CONJUNCTIONS[order_to_sort_conjunctions, :]

print(f"Starting shape of conjunctions list: {SORTED_CONJUNCTIONS.shape}")

SORTED_POES_CONJUNCTION_TIMES = SORTED_CONJUNCTIONS[:, 0]

START_OF_SEP_EVENTS_UTC = SOLAR_PROTON_EVENT_LIST["START"]
END_OF_SEP_EVENTS_UTC = SOLAR_PROTON_EVENT_LIST["END"]
ZIPPED_EVENTS = list(zip(START_OF_SEP_EVENTS_UTC, END_OF_SEP_EVENTS_UTC))

print(f"Removing high energy solar proton events!")

for SEP_EVENT in tqdm.tqdm(range(len(ZIPPED_EVENTS))):
    
    START = ZIPPED_EVENTS[SEP_EVENT][0].strip()
    END = ZIPPED_EVENTS[SEP_EVENT][1].strip()
    
    START_YMDHMS = {'year': int(START[0:4]), 'month': int(START[5:7]), 'day': int(START[8:10]), 'hour': int(START[11:13]), 'minute': int(START[13:15]), 'second': 0}
    END_YMDHMS = {'year': int(END[0:4]), 'month': int(END[5:7]), 'day': int(END[8:10]), 'hour': int(END[11:13]), 'minute': int(END[13:15]), 'second': 0}
    
    START_UNIX = astropy.time.Time(START_YMDHMS, format="ymdhms", scale='utc').unix
    END_UNIX = astropy.time.Time(END_YMDHMS, format="ymdhms", scale='utc').unix

    RANGE_TO_REMOVE = np.searchsorted(a = SORTED_POES_CONJUNCTION_TIMES, v = [START_UNIX, END_UNIX])
    
    SORTED_CONJUNCTIONS = np.vstack((SORTED_CONJUNCTIONS[0:RANGE_TO_REMOVE[0], :], SORTED_CONJUNCTIONS[RANGE_TO_REMOVE[1]:, :]))

print(f"Finished removing high energy solar proton events!")

print(f"Saving!")

CLEANED_CONJUNCTIONS = SORTED_CONJUNCTIONS #Should be cleaned by now!

np.savez(f"./../processed_data_chorus_neural_network/STAGE_3/{version}/CLEANED_CONJUNCTIONS_{version}.npz",
        CONJUNCTIONS=CLEANED_CONJUNCTIONS)

CONJUNCTIONS_POES_TIME = CLEANED_CONJUNCTIONS[:, 0]
CONJUNCTIONS_POES_L = CLEANED_CONJUNCTIONS[:, 1]
CONJUNCTIONS_POES_MLT = CLEANED_CONJUNCTIONS[:, 2]
CONJUNCTIONS_POES_FLUX = CLEANED_CONJUNCTIONS[:, 3:-10]
CONJUNCTIONS_RBSP_TIME = CLEANED_CONJUNCTIONS[:, -10]
CONJUNCTIONS_RBSP_L = CLEANED_CONJUNCTIONS[:, -9]
CONJUNCTIONS_RBSP_MLT = CLEANED_CONJUNCTIONS[:, -8]
CONJUNCTIONS_RBSP_DEL_MLT = CLEANED_CONJUNCTIONS[:, -7]
CONJUNCTIONS_RBSP_CHORUS = CLEANED_CONJUNCTIONS[:, -6]
CONJUNCTIONS_AVG_SME = CLEANED_CONJUNCTIONS[:, -5]
CONJUNCTIONS_AVG_AVG_B = CLEANED_CONJUNCTIONS[:, -4]
CONJUNCTIONS_AVG_FLOW_SPEED = CLEANED_CONJUNCTIONS[:, -3]
CONJUNCTIONS_AVG_PROTON_DENSITY = CLEANED_CONJUNCTIONS[:, -2]
CONJUNCTIONS_AVG_SYM_H = CLEANED_CONJUNCTIONS[:, -1]

print(f"Creating documentation of dataset!")


with open(f"./../processed_data_chorus_neural_network/STAGE_3/{version}/CLEANED_CONJUNCTIONS_{version}.txt", "w") as f:
    f.write("\nConjunctions:\n")
    f.write(f"Number of conjunctions: {CLEANED_CONJUNCTIONS.shape[0]} [#]\n")
    f.write(f"Number lost from cleaning solar proton events: {CONJUNCTIONS.shape[0] - CLEANED_CONJUNCTIONS.shape[0]} [#]\n")
    f.write(f"Minimum RBSP Time: {np.min(CONJUNCTIONS_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum RBSP Time: {np.max(CONJUNCTIONS_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Minimum POES Time: {np.min(CONJUNCTIONS_POES_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum POES Time: {np.max(CONJUNCTIONS_POES_TIME)} [seconds since unix epoch]\n")
    
    f.write(f"\nL:\n")
    f.write(f"Mean Difference: {np.mean(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)} [L]\n")
    f.write(f"Standard deviation of Difference {np.std(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L)} [L]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L))} [L]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(CONJUNCTIONS_POES_L - CONJUNCTIONS_RBSP_L))} [L]\n")

    f.write("\nMLT: \n")
    f.write(f"Mean Absolute Difference: {np.mean(CONJUNCTIONS_RBSP_DEL_MLT)} [MLT]\n")
    f.write(f"Standard deviation of Absolute Difference {np.std(CONJUNCTIONS_RBSP_DEL_MLT)} [MLT]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(CONJUNCTIONS_RBSP_DEL_MLT))} [MLT]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(CONJUNCTIONS_RBSP_DEL_MLT))} [MLT]\n")

    f.write(f"\nTime: \n")
    f.write(f"Mean Difference: {np.mean(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME)} [s]\n")
    f.write(f"Standard deviation of Difference {np.std(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME)} [s]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME))} [s]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(CONJUNCTIONS_POES_TIME - CONJUNCTIONS_RBSP_TIME))} [s]\n")

    f.write(f"\nChorus: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_RBSP_CHORUS)} [pT]\n")


    f.write(f"\nSME: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_SME)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_SME)} [nT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_SME)} [nT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_SME)} [nT]\n")

    f.write(f"\nAVG_B: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_AVG_B)} [nT]\n")

    f.write(f"\nFlow Speed: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_FLOW_SPEED)} [km/s]\n")
    
    f.write(f"\nProton Density: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_PROTON_DENSITY)} [n/cc]\n")
    
    f.write(f"\nSYM_H: \n")
    f.write(f"Mean: {np.mean(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")
    f.write(f"Minimum: {np.min(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")
    f.write(f"Maximum: {np.max(CONJUNCTIONS_AVG_SYM_H)} [nT]\n")

print(f"Finished!")
print(f"Ending shape of conjunctions : {CLEANED_CONJUNCTIONS.shape}")

Starting shape of conjunctions list: (131721, 21)
Removing high energy solar proton events!


100%|██████████| 309/309 [00:01<00:00, 207.64it/s]

Finished removing high energy solar proton events!
Saving!
Creating documentation of dataset!
Finished!
Ending shape of conjunctions : (130741, 21)





In [19]:
#Stage 4, Create datasets used for training, testing, etc

version = "v5a"

CONJUNCTIONS_REFS = np.load(f"./../processed_data_chorus_neural_network/STAGE_3/{version}/CLEANED_CONJUNCTIONS_{version}.npz")

CONJUNCTIONS = CONJUNCTIONS_REFS["CONJUNCTIONS"]

CONJUNCTIONS_REFS.close()

In [21]:
print(CONJUNCTIONS.shape)

CONJUNCTIONS_CHORUS = CONJUNCTIONS[:, -6]

CONJUNCTIONS_RBSP_TIME = CONJUNCTIONS[:, -10]

where_chorus_greater_zero = (0 < CONJUNCTIONS_CHORUS)

jan1_unix = astropy.time.Time({"year":2016, "month":1, "day":1, "hour":0, "minute":0, "second":0}, format="ymdhms", scale="utc").unix
apr1_unix = astropy.time.Time({"year":2016, "month":4, "day":1, "hour":0, "minute":0, "second":0}, format="ymdhms", scale="utc").unix

where_between_feb1_apr1_2013 = (jan1_unix < CONJUNCTIONS_RBSP_TIME) & (CONJUNCTIONS_RBSP_TIME < apr1_unix)

train_test_subset_selected = where_chorus_greater_zero & ~where_between_feb1_apr1_2013
validation_subset_selected = where_chorus_greater_zero & where_between_feb1_apr1_2013

print(f"Number of conjunctions between feb1 and apr1 2013: {np.count_nonzero(where_between_feb1_apr1_2013)}")
print(f"Number of conjunctions with non-zero chorus: {np.count_nonzero(where_chorus_greater_zero)}")

CONJUNCTIONS_POES_TIME = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, 0], axis = 1)
CONJUNCTIONS_POES_L = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, 1], axis = 1)
CONJUNCTIONS_POES_MLT = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, 2], axis = 1)
CONJUNCTIONS_POES_FLUX = CONJUNCTIONS[train_test_subset_selected, 3:9]
CONJUNCTIONS_RBSP_TIME = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -10], axis = 1)
CONJUNCTIONS_RBSP_L = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -9], axis = 1)
CONJUNCTIONS_RBSP_MLT = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -8], axis = 1)
CONJUNCTIONS_RBSP_DEL_MLT = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -7], axis = 1)
CONJUNCTIONS_RBSP_CHORUS = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -6], axis = 1)
CONJUNCTIONS_AVG_SME = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -5], axis = 1)
CONJUNCTIONS_AVG_AVG_B = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -4], axis = 1)
CONJUNCTIONS_AVG_FLOW_SPEED = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -3], axis = 1)
CONJUNCTIONS_AVG_PROTON_DENSITY = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -2], axis = 1)
CONJUNCTIONS_AVG_SYM_H = np.expand_dims(CONJUNCTIONS[train_test_subset_selected, -1], axis = 1)

print(CONJUNCTIONS_RBSP_TIME.shape)
print(CONJUNCTIONS_RBSP_L.shape)
print(CONJUNCTIONS_RBSP_MLT.shape)
print(CONJUNCTIONS_RBSP_CHORUS.shape)
print(CONJUNCTIONS_POES_TIME.shape)
print(CONJUNCTIONS_POES_L.shape)
print(CONJUNCTIONS_POES_MLT.shape)
print(CONJUNCTIONS_RBSP_DEL_MLT.shape)
print(CONJUNCTIONS_POES_FLUX.shape)
print(CONJUNCTIONS_AVG_SME.shape)
print(CONJUNCTIONS_AVG_AVG_B.shape)
print(CONJUNCTIONS_AVG_FLOW_SPEED.shape)
print(CONJUNCTIONS_AVG_PROTON_DENSITY.shape)
print(CONJUNCTIONS_AVG_SYM_H.shape)

mean_L = np.nanmean(CONJUNCTIONS_POES_L)
std_L = np.std(CONJUNCTIONS_POES_L)

print(f"Mean L: {mean_L}, STD L: {std_L}")

mean_fluxes = np.expand_dims(np.nanmean(np.log(CONJUNCTIONS_POES_FLUX), axis = 0), axis=0)
std_fluxes = np.expand_dims(np.nanstd(np.log(CONJUNCTIONS_POES_FLUX), axis = 0), axis = 0)

print(f"Mean fluxes : {mean_fluxes.shape}")

mean_sme = np.nanmean(CONJUNCTIONS_AVG_SME)
std_sme = np.std(CONJUNCTIONS_AVG_SME)

mean_avg_b = np.nanmean(CONJUNCTIONS_AVG_AVG_B)
std_avg_b = np.std(CONJUNCTIONS_AVG_AVG_B)

mean_flow_speed = np.nanmean(CONJUNCTIONS_AVG_FLOW_SPEED)
std_flow_speed = np.std(CONJUNCTIONS_AVG_FLOW_SPEED)

mean_avg_proton_density = np.nanmean(CONJUNCTIONS_AVG_PROTON_DENSITY)
std_avg_proton_density = np.std(CONJUNCTIONS_AVG_PROTON_DENSITY)

mean_avg_sym_h = np.nanmean(CONJUNCTIONS_AVG_SYM_H)
std_avg_sym_h = np.std(CONJUNCTIONS_AVG_SYM_H)

FEATURES = np.hstack(((CONJUNCTIONS_POES_L - mean_L) / std_L, 
                      np.sin((CONJUNCTIONS_POES_MLT * 2 * np.pi) / 24.0), 
                      np.cos((CONJUNCTIONS_POES_MLT * 2 * np.pi) / 24.0),
                      ((np.log(CONJUNCTIONS_POES_FLUX) - mean_fluxes) / std_fluxes)[:, [0, -1]],
                      #(CONJUNCTIONS_AVG_SME - mean_sme)  / std_sme))
                      #(CONJUNCTIONS_AVG_AVG_B - mean_avg_b) / std_avg_b,
                      (CONJUNCTIONS_AVG_FLOW_SPEED - mean_flow_speed) / std_flow_speed))
                      #(CONJUNCTIONS_AVG_PROTON_DENSITY - mean_avg_proton_density) / std_avg_proton_density,
                      #(CONJUNCTIONS_AVG_SYM_H - mean_avg_sym_h) / std_avg_sym_h))
                      
#SMALL VALIDATION SET:
CONJUNCTIONS_POES_TIME_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, 0], axis = 1)
CONJUNCTIONS_POES_L_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, 1], axis = 1)
CONJUNCTIONS_POES_MLT_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, 2], axis = 1)
CONJUNCTIONS_POES_FLUX_VALIDATION = CONJUNCTIONS[validation_subset_selected, 3:9]
CONJUNCTIONS_RBSP_TIME_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -10], axis = 1)
CONJUNCTIONS_RBSP_L_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -9], axis = 1)
CONJUNCTIONS_RBSP_MLT_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -8], axis = 1)
CONJUNCTIONS_RBSP_DEL_MLT_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -7], axis = 1)
CONJUNCTIONS_RBSP_CHORUS_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -6], axis = 1)
CONJUNCTIONS_AVG_SME_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -5], axis = 1)
CONJUNCTIONS_AVG_AVG_B_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -4], axis = 1)
CONJUNCTIONS_AVG_FLOW_SPEED_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -3], axis = 1)
CONJUNCTIONS_AVG_PROTON_DENSITY_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -2], axis = 1)
CONJUNCTIONS_AVG_SYM_H_VALIDATION = np.expand_dims(CONJUNCTIONS[validation_subset_selected, -1], axis = 1)

print(CONJUNCTIONS_RBSP_TIME_VALIDATION.shape)
print(CONJUNCTIONS_RBSP_L_VALIDATION.shape)
print(CONJUNCTIONS_RBSP_MLT_VALIDATION.shape)
print(CONJUNCTIONS_RBSP_CHORUS_VALIDATION.shape)
print(CONJUNCTIONS_POES_TIME_VALIDATION.shape)
print(CONJUNCTIONS_POES_L_VALIDATION.shape)
print(CONJUNCTIONS_POES_MLT_VALIDATION.shape)
print(CONJUNCTIONS_RBSP_DEL_MLT_VALIDATION.shape)
print(CONJUNCTIONS_POES_FLUX_VALIDATION.shape)
print(CONJUNCTIONS_AVG_SME_VALIDATION.shape)
print(CONJUNCTIONS_AVG_AVG_B_VALIDATION.shape)
print(CONJUNCTIONS_AVG_FLOW_SPEED_VALIDATION.shape)
print(CONJUNCTIONS_AVG_PROTON_DENSITY_VALIDATION.shape)
print(CONJUNCTIONS_AVG_SYM_H_VALIDATION.shape)


VALIDATION_FEATURES = np.hstack(((CONJUNCTIONS_POES_L_VALIDATION - mean_L) / std_L, 
                                np.sin((CONJUNCTIONS_POES_MLT_VALIDATION * 2 * np.pi) / 24.0), 
                                np.cos((CONJUNCTIONS_POES_MLT_VALIDATION * 2 * np.pi) / 24.0),
                                ((np.log(CONJUNCTIONS_POES_FLUX_VALIDATION) - mean_fluxes) / std_fluxes)[:, [0, -1]],
                                #(CONJUNCTIONS_AVG_SME_VALIDATION - mean_sme) / std_sme))
                                #(CONJUNCTIONS_AVG_AVG_B_VALIDATION - mean_avg_b) / std_avg_b,
                                (CONJUNCTIONS_AVG_FLOW_SPEED_VALIDATION - mean_flow_speed) / std_flow_speed))
                                #(CONJUNCTIONS_AVG_PROTON_DENSITY_VALIDATION - mean_avg_proton_density) / std_avg_proton_density,
                                #(CONJUNCTIONS_AVG_SYM_H_VALIDATION - mean_avg_sym_h) / std_avg_sym_h))
        
np.savez(f"./../processed_data_chorus_neural_network/STAGE_4/{version}/MODEL_READY_DATA_{version}.npz",
        FEATURES = FEATURES,
        LABELS = CONJUNCTIONS_RBSP_CHORUS,
        VALIDATION_FEATURES = VALIDATION_FEATURES,
        VALIDATION_LABELS = CONJUNCTIONS_RBSP_CHORUS_VALIDATION,
        TRAINING_MLT = CONJUNCTIONS_POES_MLT,
        MEAN_FLUXES = mean_fluxes,
        STD_FLUXES = std_fluxes,
        MEAN_SME = mean_sme,
        STD_SME = std_sme,
        MEAN_AVG_B = mean_avg_b,
        STD_AVG_B = std_avg_b,
        MEAN_FLOW_SPEED = mean_flow_speed,
        STD_FLOW_SPEED = std_flow_speed,
        MEAN_AVG_PROTON_DENSITY = mean_avg_proton_density,
        STD_AVG_PROTON_DENSITY = std_avg_proton_density,
        MEAN_AVG_SYM_H = mean_avg_sym_h,
        STD_AVG_SYM_H = std_avg_sym_h)

(130741, 21)
Number of conjunctions between feb1 and apr1 2013: 3284
Number of conjunctions with non-zero chorus: 130740
(127456, 1)
(127456, 1)
(127456, 1)
(127456, 1)
(127456, 1)
(127456, 1)
(127456, 1)
(127456, 1)
(127456, 6)
(127456, 1)
(127456, 1)
(127456, 1)
(127456, 1)
(127456, 1)
Mean L: 4.0540999945598255, STD L: 1.316954157213637
Mean fluxes : (1, 6)
(3284, 1)
(3284, 1)
(3284, 1)
(3284, 1)
(3284, 1)
(3284, 1)
(3284, 1)
(3284, 1)
(3284, 6)
(3284, 1)
(3284, 1)
(3284, 1)
(3284, 1)
(3284, 1)
