In [1]:
import os
import sys

# caution: path[0] is reserved for script path (or '' in REPL).
sys.path.insert(1, os.path.abspath("./../src"))


import datetime
import importlib

import astropy.time
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tqdm
from cdflib.epochs_astropy import CDFAstropy as cdfepoch
from dateutil import rrule
import seaborn as sns

import data_loader
import rbsp_chorus_tool

importlib.reload(data_loader)
importlib.reload(rbsp_chorus_tool)

pdata_folder = os.path.abspath(r"./../processed_data/chorus_neural_network/")

%matplotlib qt

In [None]:
# STAGE 0 DATA VERIFICATION FOR POES LSTAR CALCULATIONS

mpe_folder = os.path.join(pdata_folder, "STAGE_0", "MPE_DATA_PREPROCESSED_WITH_LSTAR")

year = 2012
SATID = "m02"

refs = np.load(
    file=os.path.join(
        mpe_folder,
        rf"MPE_PREPROCESSED_DATA_T89_{year}.npz",
    ),
    allow_pickle=True,
)

DATA = refs["DATA"].flatten()[0]
SAT = DATA[SATID]

dt_for_all = np.array([datetime.datetime.fromtimestamp(t) for t in SAT["UNIX_TIME"]])

plt.plot(dt_for_all, SAT["Lstar"], label="L*", color="red", marker="*")
plt.plot(dt_for_all, SAT["L"], label="IGRF Lm", color="black", marker="*")
plt.ylabel("|L|")
plt.xlabel("Time")
plt.title(f"Some Orbits for {SATID} in {year}")
plt.legend()

plt.show()

In [None]:
# DATA VERIFICATION RBSP L-STAR DATA CALCULATIONS

lstar_folder = os.path.join(pdata_folder, "STAGE_1", "Lstar")

year = 2012
sat = "a"
refs = np.load(
    file=os.path.join(lstar_folder, rf"RBSP_{sat.upper()}_T89_{year}.npz"),
    allow_pickle=True,
)


dates = np.array([datetime.datetime.fromtimestamp(t) for t in refs["UNIX_TIME"]])


#plt.plot(dates, refs["Lstar"], label="L* at EQ")
#plt.plot(dates, refs["LSTAR_LOCAL"], label="L* Local")
#plt.xlabel("Time (UTC)")
#plt.ylabel("L*")
#plt.legend()

plt.plot(dates, refs["MLAT"], label="MLAT")
plt.xlabel("Time (UTC)")
plt.ylabel("MLAT")
plt.legend()

plt.show()

In [None]:
# Interpolate POES data cause I forgot to interpolate it before with the nans included

for _year in range(2012, 2021):

    POES = {}

    refs = np.load(
        rf"./../processed_data/chorus_neural_network/STAGE_0/MPE_DATA_PREPROCESSED_WITH_LSTAR/MPE_PREPROCESSED_DATA_T89_{_year}.npz",
        allow_pickle=True,
    )
    POES_DATA = refs["DATA"].flatten()[0]

    for SATID in POES_DATA:

        SAT = POES_DATA[SATID]

        UNIX_TIME = []
        L = []
        MLT = []
        BLC_FLUX_0 = []
        BLC_FLUX_1 = []
        BLC_FLUX_2 = []
        BLC_FLUX_3 = []
        BLC_FLUX_4 = []
        BLC_FLUX_5 = []
        BLC_FLUX_6 = []
        BLC_FLUX_7 = []

        for p in tqdm.tqdm(range(len(SAT["UNIX_TIME"]) - 1)):

            t1 = SAT["UNIX_TIME"][p]
            t2 = SAT["UNIX_TIME"][p + 1]

            if t2 - t1 < 30.0:

                t_points = np.arange(t1, t2 + 1, step=1, dtype=np.float64)

                UNIX_TIME.append(t_points)
                L.append(
                    np.interp(
                        x=t_points,
                        xp=[t1, t2],
                        fp=[SAT["L"][p], SAT["L"][p + 1]],
                        left=np.nan,
                        right=np.nan,
                    )
                )

                X_INTERPOLATED = np.interp(
                    t_points,
                    xp=[t1, t2],
                    fp=[
                        np.cos(SAT["MLT"][p] * 2 * np.pi / 24.0),
                        np.cos(SAT["MLT"][p + 1] * 2 * np.pi / 24.0),
                    ],
                    left=np.nan,
                    right=np.nan,
                )
                Y_INTERPOLATED = np.interp(
                    t_points,
                    xp=[t1, t2],
                    fp=[
                        np.sin(SAT["MLT"][p] * 2 * np.pi / 24.0),
                        np.sin(SAT["MLT"][p + 1] * 2 * np.pi / 24.0),
                    ],
                    left=np.nan,
                    right=np.nan,
                )
                ANGLE_IN_RADIANS = np.mod(
                    np.arctan2(Y_INTERPOLATED, X_INTERPOLATED) + 2 * np.pi, 2 * np.pi
                )

                MLT.append((ANGLE_IN_RADIANS * 24.0) / (2 * np.pi))

                BLC_FLUX_0.append(
                    np.interp(
                        x=t_points,
                        xp=[t1, t2],
                        fp=[SAT["BLC_Flux"][p, 0], SAT["BLC_Flux"][p + 1, 0]],
                        left=np.nan,
                        right=np.nan,
                    )
                )
                BLC_FLUX_1.append(
                    np.interp(
                        x=t_points,
                        xp=[t1, t2],
                        fp=[SAT["BLC_Flux"][p, 1], SAT["BLC_Flux"][p + 1, 1]],
                        left=np.nan,
                        right=np.nan,
                    )
                )
                BLC_FLUX_2.append(
                    np.interp(
                        x=t_points,
                        xp=[t1, t2],
                        fp=[SAT["BLC_Flux"][p, 2], SAT["BLC_Flux"][p + 1, 2]],
                        left=np.nan,
                        right=np.nan,
                    )
                )
                BLC_FLUX_3.append(
                    np.interp(
                        x=t_points,
                        xp=[t1, t2],
                        fp=[SAT["BLC_Flux"][p, 3], SAT["BLC_Flux"][p + 1, 3]],
                        left=np.nan,
                        right=np.nan,
                    )
                )
                BLC_FLUX_4.append(
                    np.interp(
                        x=t_points,
                        xp=[t1, t2],
                        fp=[SAT["BLC_Flux"][p, 4], SAT["BLC_Flux"][p + 1, 4]],
                        left=np.nan,
                        right=np.nan,
                    )
                )
                BLC_FLUX_5.append(
                    np.interp(
                        x=t_points,
                        xp=[t1, t2],
                        fp=[SAT["BLC_Flux"][p, 5], SAT["BLC_Flux"][p + 1, 5]],
                        left=np.nan,
                        right=np.nan,
                    )
                )
                BLC_FLUX_6.append(
                    np.interp(
                        x=t_points,
                        xp=[t1, t2],
                        fp=[SAT["BLC_Flux"][p, 6], SAT["BLC_Flux"][p + 1, 6]],
                        left=np.nan,
                        right=np.nan,
                    )
                )
                BLC_FLUX_7.append(
                    np.interp(
                        x=t_points,
                        xp=[t1, t2],
                        fp=[SAT["BLC_Flux"][p, 7], SAT["BLC_Flux"][p + 1, 7]],
                        left=np.nan,
                        right=np.nan,
                    )
                )

        UNIX_TIME = np.hstack(UNIX_TIME)
        L = np.hstack(L)
        MLT = np.hstack(MLT)
        BLC_FLUX_0 = np.hstack(BLC_FLUX_0)
        BLC_FLUX_1 = np.hstack(BLC_FLUX_1)
        BLC_FLUX_2 = np.hstack(BLC_FLUX_2)
        BLC_FLUX_3 = np.hstack(BLC_FLUX_3)
        BLC_FLUX_4 = np.hstack(BLC_FLUX_4)
        BLC_FLUX_5 = np.hstack(BLC_FLUX_5)
        BLC_FLUX_6 = np.hstack(BLC_FLUX_6)
        BLC_FLUX_7 = np.hstack(BLC_FLUX_7)
        BLC_FLUX = np.hstack(
            [
                np.expand_dims(BLC_FLUX_0, axis=1),
                np.expand_dims(BLC_FLUX_1, axis=1),
                np.expand_dims(BLC_FLUX_2, axis=1),
                np.expand_dims(BLC_FLUX_3, axis=1),
                np.expand_dims(BLC_FLUX_4, axis=1),
                np.expand_dims(BLC_FLUX_5, axis=1),
                np.expand_dims(BLC_FLUX_6, axis=1),
                np.expand_dims(BLC_FLUX_7, axis=1),
            ]
        )

        POES[SATID] = {
            "UNIX_TIME": UNIX_TIME,
            "MLT": MLT,
            "BLC_Flux": BLC_FLUX,
            "L": L,
        }

    if not POES:
        print(f"No POES satellite coverage found for year : {_year}")
        print(f"SKIPPING YEAR : {_year}")
        continue

    refs.close()

    np.savez(
        file=os.path.abspath(
            f"./../processed_data/chorus_neural_network/STAGE_0/MPE_DATA_PREPROCESSED_WITH_LSTAR/MPE_PREPROCESSED_DATA_T89_{_year}_interpolated.npz"
        ),
        DATA=POES,
    )

In [4]:
%%time
# Stage 1 RBSP Chorus Preprocessing, Obtains clean chorus amplitudes

year = 2019

pdata_folder = os.path.abspath("./../processed_data/chorus_neural_network/")
rbsp_density_folder = os.path.join(pdata_folder, "STAGE_0", "ELECTRON_DENSITY_DATA_PREPROCESSED")
output_folder = os.path.join(pdata_folder, "STAGE_1", "DENSITY_AND_CHORUS")

start = datetime.datetime(year=year, month=1, day=1, tzinfo=datetime.UTC)
end = datetime.datetime(year=year+1, month=1, day=1, tzinfo=datetime.UTC)

evenly_spaced_seconds = np.arange(start.timestamp(), end.timestamp() + 1, step=1)

WNA_A = data_loader.load_raw_data_from_config(
    id=["RBSP", "EMFISIS", "L4", "WNA_SURVEY"],
    start=start,
    end=end,
    satellite="a",
    root_data_dir="/project/rbsp/data/",
    use_config_keys_in_subdir=False,
)

WNA_B = data_loader.load_raw_data_from_config(
    id=["RBSP", "EMFISIS", "L4", "WNA_SURVEY"],
    start=start,
    end=end,
    satellite="b",
    root_data_dir="/project/rbsp/data/",
    use_config_keys_in_subdir=False,
)

density_refs_A = np.load(
    file=os.path.join(rbsp_density_folder, rf"RBSP_A_OBSERVED_DENSITY_{year}.npz"),
    allow_pickle=True,
)

DENSITY_TIME_A = density_refs_A["UNIX_TIME"]
DENSITY_A = density_refs_A["DENSITY"]

density_refs_A.close()

density_refs_B = np.load(
    file=os.path.join(rbsp_density_folder, rf"RBSP_B_OBSERVED_DENSITY_{year}.npz"),
    allow_pickle=True,
)

DENSITY_TIME_B = density_refs_B["UNIX_TIME"]
DENSITY_B = density_refs_B["DENSITY"]

density_refs_B.close()


THRUSTER_EVENTS_DF_A = pd.read_csv(os.path.join(pdata_folder, "THRUSTER_EVENTS_RBSPA.csv"))
THRUSTER_EVENTS_DF_B = pd.read_csv(os.path.join(pdata_folder, "THRUSTER_EVENTS_RBSPB.csv"))

THRUSTER_START_TIMES_A = pd.to_datetime(THRUSTER_EVENTS_DF_A["Start Time"], utc=True, format="ISO8601").astype(np.int64) // 10**9
THRUSTER_END_TIMES_A = pd.to_datetime(THRUSTER_EVENTS_DF_A["End Time"], utc=True, format="ISO8601").astype(np.int64) // 10**9

THRUSTER_START_TIMES_B = pd.to_datetime(THRUSTER_EVENTS_DF_B["Start Time"], utc=True, format="ISO8601").astype(np.int64) // 10**9
THRUSTER_END_TIMES_B = pd.to_datetime(THRUSTER_EVENTS_DF_B["End Time"], utc=True, format="ISO8601").astype(np.int64) // 10**9

RBSP_A = {

    "WNA" : WNA_A,
    "DENSITY_TIME" : DENSITY_TIME_A,
    "DENSITY" : DENSITY_A,
    "THRUSTER_START_TIMES" : THRUSTER_START_TIMES_A,
    "THRUSTER_END_TIMES" : THRUSTER_END_TIMES_A,
    "SATID" : "A"
}

RBSP_B = {

    "WNA" : WNA_B,
    "DENSITY_TIME" : DENSITY_TIME_B,
    "DENSITY" : DENSITY_B,
    "THRUSTER_START_TIMES" : THRUSTER_START_TIMES_B,
    "THRUSTER_END_TIMES" : THRUSTER_END_TIMES_B,
    "SATID" : "B"
}

RBSP = [RBSP_A, RBSP_B]

CPU times: user 25.7 s, sys: 38.6 s, total: 1min 4s
Wall time: 2min 22s


In [5]:
%%time

for PROBE in RBSP:
    
    MLT = PROBE["WNA"]["MLT"]
    MLAT = PROBE["WNA"]["MagLat"]
    L = PROBE["WNA"]["L"]
    EPOCH = PROBE["WNA"]["Epoch"]
    PLANARITY = PROBE["WNA"]["plansvd"]
    ELLIPTICITY = PROBE["WNA"]["ellsvd"]
    #DENSITY_TIME = PROBE["DENSITY_TIME"]
    #DENSITY = PROBE["DENSITY"]
    THRUSTER_START_TIMES = PROBE["THRUSTER_START_TIMES"]
    THRUSTER_END_TIMES = PROBE["THRUSTER_END_TIMES"]
    SATID = PROBE["SATID"]
    
    TIME = cdfepoch.unixtime(EPOCH)
    
    LOWER_CHORUS = np.asarray(rbsp_chorus_tool.calculate_chorus_power(
        WNA_survey=PROBE["WNA"], Magnetic_Planarity=PLANARITY, Magnetic_Ellipticity=ELLIPTICITY, lower=True
    ))
    
    UPPER_CHORUS = np.asarray(rbsp_chorus_tool.calculate_chorus_power(
        WNA_survey=PROBE["WNA"], Magnetic_Planarity=PLANARITY, Magnetic_Ellipticity=ELLIPTICITY, lower=False
    ))
    
    within_epoch_range = (start.timestamp() < TIME) & (TIME < end.timestamp())
    all_valid_coordinates = (EPOCH > 0) & (0 <= MLT) & (MLT <= 24) & (0 < L) & (L < 10) & (-90 <= MLAT) & (MLAT <= 90)
    
    MLT[~(within_epoch_range & all_valid_coordinates)] = np.nan
    MLAT[~(within_epoch_range & all_valid_coordinates)] = np.nan
    L[~(within_epoch_range & all_valid_coordinates)] = np.nan
    LOWER_CHORUS[~(within_epoch_range & all_valid_coordinates)] = np.nan
    UPPER_CHORUS[~(within_epoch_range & all_valid_coordinates)] = np.nan
    
    TIME_INTERPOLATED = []
    L_INTERPOLATED = []
    MLT_INTERPOLATED = []
    MLAT_INTERPOLATED = []
    LOWER_CHORUS_INTERPOLATED = []
    UPPER_CHORUS_INTERPOLATED = []
    
    for p in tqdm.tqdm(range(len(TIME) - 1)):
    
        t1 = TIME[p]
        t2 = TIME[p + 1]
    
        if t2 - t1 < 60.0:
    
            t_points = np.arange(t1, t2 + 1, step=1, dtype=np.float64)
    
            TIME_INTERPOLATED.extend(t_points)
            L_INTERPOLATED.extend(
                np.interp(
                    x=t_points,
                    xp=[t1, t2],
                    fp=[L[p], L[p + 1]],
                    left=np.nan,
                    right=np.nan,
                )
            )
    
            x_int = np.interp(
                t_points,
                xp=[t1, t2],
                fp=[np.cos(MLT[p] * 2 * np.pi / 24.0), np.cos(MLT[p+1] * 2 * np.pi / 24.0)],
                left=np.nan,
                right=np.nan,
            )
            
            y_int = np.interp(
                t_points,
                xp=[t1, t2],
                fp=[np.sin(MLT[p] * 2 * np.pi / 24.0), np.sin(MLT[p+1] * 2 * np.pi / 24.0)],
                left=np.nan,
                right=np.nan,
            )
            
            angle = np.mod(np.arctan2(y_int, x_int) + 2 * np.pi, 2 * np.pi)
            MLT_INTERPOLATED.extend((angle * 24) / (2 * np.pi))
    
            MLAT_INTERPOLATED.extend(
                np.interp(
                    x=t_points,
                    xp=[t1, t2],
                    fp=[MLAT[p], MLAT[p + 1]],
                    left=np.nan,
                    right=np.nan,
                )
            )
    
            LOWER_CHORUS_INTERPOLATED.extend(
                np.interp(
                    x=t_points,
                    xp=[t1, t2],
                    fp=[LOWER_CHORUS[p], LOWER_CHORUS[p + 1]],
                    left=np.nan,
                    right=np.nan,
                )
            )
    
            UPPER_CHORUS_INTERPOLATED.extend(
                np.interp(
                    x=t_points,
                    xp=[t1, t2],
                    fp=[UPPER_CHORUS[p], UPPER_CHORUS[p + 1]],
                    left=np.nan,
                    right=np.nan,
                )
            )

    TIME = np.array(TIME_INTERPOLATED)
    L = np.array(L_INTERPOLATED)
    MLT = np.array(MLT_INTERPOLATED)
    MLAT = np.array(MLAT_INTERPOLATED)
    LOWER_CHORUS = np.array(LOWER_CHORUS_INTERPOLATED)
    UPPER_CHORUS = np.array(UPPER_CHORUS_INTERPOLATED)
    #DENSITY = np.interp(x = TIME, xp = DENSITY_TIME, fp = DENSITY)

    print("\nShapes before cleaning thruster events and removing NaNs:")
    print(TIME.shape)
    print(L.shape)
    print(MLT.shape)
    print(MLAT.shape)
    print(LOWER_CHORUS.shape)
    print(UPPER_CHORUS.shape)
    #print(DENSITY.shape)

    NUM_IN_THRUSTER_EVENTS = 0
    for START_TIME, END_TIME in zip(THRUSTER_START_TIMES, THRUSTER_END_TIMES):
        NUM_IN_THRUSTER_EVENTS += np.sum((START_TIME <= TIME) & (TIME <= END_TIME))
        TIME[(START_TIME <= TIME) & (TIME <= END_TIME)] = np.nan
    
    NOT_NAN_LOWER = (
        np.isfinite(TIME)
        & np.isfinite(L)
        & np.isfinite(MLT)
        & np.isfinite(MLAT)
        & np.isfinite(LOWER_CHORUS)
        #& np.isfinite(DENSITY)
    )

    NOT_NAN_UPPER = (
        np.isfinite(TIME)
        & np.isfinite(L)
        & np.isfinite(MLT)
        & np.isfinite(MLAT)
        & np.isfinite(UPPER_CHORUS)
        #& np.isfinite(DENSITY)
    )

    print(f"Number of points in thruster events : {np.sum(np.isnan(TIME))}")
    print(f"Number of LOWER BAND CHORUS that were NAN: {np.sum(np.isnan(LOWER_CHORUS))}")
    print(f"Number of UPPER BAND CHORUS that were NAN: {np.sum(np.isnan(UPPER_CHORUS))}")
    
    TIME_LOWER = TIME[NOT_NAN_LOWER]
    L_LOWER = L[NOT_NAN_LOWER]
    MLT_LOWER = MLT[NOT_NAN_LOWER]
    MLAT_LOWER = MLAT[NOT_NAN_LOWER]
    CHORUS_LOWER = LOWER_CHORUS[NOT_NAN_LOWER]
    #DENSITY_LOWER = DENSITY[NOT_NAN_LOWER]

    TIME_UPPER = TIME[NOT_NAN_UPPER]
    L_UPPER = L[NOT_NAN_UPPER]
    MLT_UPPER = MLT[NOT_NAN_UPPER]
    MLAT_UPPER = MLAT[NOT_NAN_UPPER]
    CHORUS_UPPER = UPPER_CHORUS[NOT_NAN_UPPER]
    #DENSITY_UPPER = DENSITY[NOT_NAN_UPPER]

    print("\nShapes after cleaning thruster events and removing NaNs:\n")
    print("Lower:")
    print(TIME_LOWER.shape)
    print(L_LOWER.shape)
    print(MLT_LOWER.shape)
    print(MLAT_LOWER.shape)
    print(CHORUS_LOWER.shape)
    #print(DENSITY_LOWER.shape)

    print("\nUpper:")

    print(TIME_UPPER.shape)
    print(L_UPPER.shape)
    print(MLT_UPPER.shape)
    print(MLAT_UPPER.shape)
    print(CHORUS_UPPER.shape)
    #print(DENSITY_UPPER.shape)
    
    print("\n")

    np.savez(
        file=os.path.abspath(os.path.join(output_folder, f"RBSP_EMFISIS_CHORUS_AND_DENSITY_{year}_{SATID}_LOWER_BAND.npz")),
        UNIX_TIME=TIME_LOWER,
        MLT=MLT_LOWER,
        MLAT=MLAT_LOWER,
        L=L_LOWER,
        CHORUS=CHORUS_LOWER,
        #DENSITY=DENSITY_LOWER
    )

    np.savez(
        file=os.path.abspath(os.path.join(output_folder, f"RBSP_EMFISIS_CHORUS_AND_DENSITY_{year}_{SATID}_UPPER_BAND.npz")),
        UNIX_TIME=TIME_UPPER,
        MLT=MLT_UPPER,
        MLAT=MLAT_UPPER,
        L=L_UPPER,
        CHORUS=CHORUS_UPPER,
        #DENSITY=DENSITY_UPPER
    )


0         2.1
1         4.3
2         6.4
3         8.5
4        10.7
       ...   
60     7079.5
61     7943.3
62     8912.6
63    10000.3
64    11257.4
Name: f_l, Length: 65, dtype: float64
0        2.1
1        2.1
2        2.1
3        2.1
4        2.1
       ...  
60     816.0
61     914.3
62    1027.5
63    1151.4
64    1371.5
Name: del_l, Length: 65, dtype: float64
0         2.1
1         4.3
2         6.4
3         8.5
4        10.7
       ...   
60     7079.5
61     7943.3
62     8912.6
63    10000.3
64    11257.4
Name: f_l, Length: 65, dtype: float64
0        2.1
1        2.1
2        2.1
3        2.1
4        2.1
       ...  
60     816.0
61     914.3
62    1027.5
63    1151.4
64    1371.5
Name: del_l, Length: 65, dtype: float64


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4118380/4118380 [01:58<00:00, 34751.92it/s]



Shapes before cleaning thruster events and removing NaNs:
(32152351,)
(32152351,)
(32152351,)
(32152351,)
(32152351,)
(32152351,)
Number of points in thruster events : 253219
Number of LOWER BAND CHORUS that were NAN: 6492256
Number of UPPER BAND CHORUS that were NAN: 12110426

Shapes after cleaning thruster events and removing NaNs:

Lower:
(25439279,)
(25439279,)
(25439279,)
(25439279,)
(25439279,)

Upper:
(19837479,)
(19837479,)
(19837479,)
(19837479,)
(19837479,)


0         2.1
1         4.3
2         6.4
3         8.5
4        10.7
       ...   
60     7079.5
61     7943.3
62     8912.6
63    10000.3
64    11257.4
Name: f_l, Length: 65, dtype: float64
0        2.1
1        2.1
2        2.1
3        2.1
4        2.1
       ...  
60     816.0
61     914.3
62    1027.5
63    1151.4
64    1371.5
Name: del_l, Length: 65, dtype: float64
0         2.1
1         4.3
2         6.4
3         8.5
4        10.7
       ...   
60     7079.5
61     7943.3
62     8912.6
63    10000.3
64    1125

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2829323/2829323 [01:21<00:00, 34886.73it/s]



Shapes before cleaning thruster events and removing NaNs:
(20571149,)
(20571149,)
(20571149,)
(20571149,)
(20571149,)
(20571149,)
Number of points in thruster events : 185041
Number of LOWER BAND CHORUS that were NAN: 4259556
Number of UPPER BAND CHORUS that were NAN: 7771477

Shapes after cleaning thruster events and removing NaNs:

Lower:
(16151999,)
(16151999,)
(16151999,)
(16151999,)
(16151999,)

Upper:
(12651899,)
(12651899,)
(12651899,)
(12651899,)
(12651899,)


CPU times: user 52min 14s, sys: 23 s, total: 52min 37s
Wall time: 52min 36s


In [None]:
print(np.sum(CHORUS_LOWER == 0.01))

In [None]:
# Stage 2, clean then combine RBSP, OMNI, and POES Data and find conjunctions between RBSP and POES See bin_before_finding_conjunctions.py

In [2]:
# Stage 3 Continued, Removing solar proton events!

VERSION = "v4"
FIELD_MODEL = "T89"
MODEL_TYPE = "LOWER_BAND"


pdata_folder = os.path.abspath(r"./../processed_data/chorus_neural_network/")

STAGE_2_folder = os.path.join(pdata_folder, "STAGE_2", VERSION)
STAGE_3_folder = os.path.join(pdata_folder, "STAGE_3", VERSION)


CONJUNCTIONS_REFS = np.load(
    os.path.join(STAGE_2_folder, rf"CONJUNCTIONS_{VERSION}_{FIELD_MODEL}_{MODEL_TYPE}.npz")
)

CONJUNCTIONS = CONJUNCTIONS_REFS["CONJUNCTIONS"]

CONJUNCTIONS_REFS.close()

SOLAR_PROTON_EVENT_LIST = pd.read_csv(
    os.path.join(pdata_folder, r"SOLAR_PROTON_EVENT_LIST_1976_2024.csv")
)

In [3]:
"""CONJUNCTION = [
    CHUNK_TIME + (T_SIZE / 2.0),
    AVG_L_POES[x_bin, y_bin],
    AVG_MLT_POES[x_bin, y_bin],
    AVG_FLUX_0[x_bin, y_bin],
    AVG_FLUX_1[x_bin, y_bin],
    AVG_FLUX_2[x_bin, y_bin],
    AVG_FLUX_3[x_bin, y_bin],
    AVG_FLUX_4[x_bin, y_bin],
    AVG_FLUX_5[x_bin, y_bin],
    AVG_FLUX_6[x_bin, y_bin],
    AVG_FLUX_7[x_bin, y_bin],
    CHUNK_TIME + (T_SIZE / 2.0),
    AVG_L_RBSP[x_bin, y_bin, z_bin],  # LSTAR OF RBSP POINT CHOSEN
    AVG_MLT_RBSP[x_bin, y_bin, z_bin],  # DIFFERENCE IN MLT FOUND
    AVG_MLAT_RBSP[x_bin, y_bin, z_bin],
    AVG_CHORUS[x_bin, y_bin, z_bin],  # CHORUS OBSERVED
    AVG_DENSITY_RBSP[x_bin, y_bin, z_bin],
    SME_MEAN,
    SME_VARIATION,
    OMNI["AVG_B"],
    OMNI["FLOW_SPEED"],
    OMNI["PROTON_DENSITY"],
    OMNI["SYM_H"]]"""

order_to_sort_conjunctions = np.argsort(
    CONJUNCTIONS[:, 0]
)  # Sorted based on POES Conjunction time!
SORTED_CONJUNCTIONS = CONJUNCTIONS[order_to_sort_conjunctions, :]

print(f"Starting shape of conjunctions list: {SORTED_CONJUNCTIONS.shape}")

SORTED_POES_CONJUNCTION_TIMES = SORTED_CONJUNCTIONS[:, 0]

START_OF_SEP_EVENTS_UTC = SOLAR_PROTON_EVENT_LIST["START"]
END_OF_SEP_EVENTS_UTC = SOLAR_PROTON_EVENT_LIST["END"]
ZIPPED_EVENTS = list(zip(START_OF_SEP_EVENTS_UTC, END_OF_SEP_EVENTS_UTC))

print("Removing high energy solar proton events!")

for SEP_EVENT in tqdm.tqdm(range(len(ZIPPED_EVENTS))):

    START = ZIPPED_EVENTS[SEP_EVENT][0].strip()
    END = ZIPPED_EVENTS[SEP_EVENT][1].strip()

    START_YMDHMS = {
        "year": int(START[0:4]),
        "month": int(START[5:7]),
        "day": int(START[8:10]),
        "hour": int(START[11:13]),
        "minute": int(START[13:15]),
        "second": 0,
    }
    END_YMDHMS = {
        "year": int(END[0:4]),
        "month": int(END[5:7]),
        "day": int(END[8:10]),
        "hour": int(END[11:13]),
        "minute": int(END[13:15]),
        "second": 0,
    }

    START_UNIX = astropy.time.Time(START_YMDHMS, format="ymdhms", scale="utc").unix
    END_UNIX = astropy.time.Time(END_YMDHMS, format="ymdhms", scale="utc").unix

    RANGE_TO_REMOVE = np.searchsorted(a=SORTED_POES_CONJUNCTION_TIMES, v=[START_UNIX, END_UNIX])

    SORTED_CONJUNCTIONS = np.vstack(
        (
            SORTED_CONJUNCTIONS[0 : RANGE_TO_REMOVE[0], :],
            SORTED_CONJUNCTIONS[RANGE_TO_REMOVE[1] :, :],
        )
    )

print("Finished removing high energy solar proton events!")

print("Saving!")

CLEANED_CONJUNCTIONS = SORTED_CONJUNCTIONS  # Should be cleaned by now!

np.savez(
    file=os.path.join(STAGE_3_folder, rf"CLEANED_CONJUNCTIONS_{VERSION}_{FIELD_MODEL}_{MODEL_TYPE}.npz"),
    CONJUNCTIONS=CLEANED_CONJUNCTIONS,
)

C_POES_TIME = CLEANED_CONJUNCTIONS[:, 0]
C_POES_LSTAR = CLEANED_CONJUNCTIONS[:, 1]
C_POES_MLT = CLEANED_CONJUNCTIONS[:, 2]
C_POES_FLUX = CLEANED_CONJUNCTIONS[:, 3:-12]
C_RBSP_TIME = CLEANED_CONJUNCTIONS[:, -12]
C_RBSP_LSTAR = CLEANED_CONJUNCTIONS[:, -11]
C_RBSP_MLT = CLEANED_CONJUNCTIONS[:, -10]
C_RBSP_MLAT = CLEANED_CONJUNCTIONS[:, -9]
C_RBSP_CHORUS = CLEANED_CONJUNCTIONS[:, -8]
C_RBSP_DENSITY = CLEANED_CONJUNCTIONS[:, -7]
C_AVG_SME = CLEANED_CONJUNCTIONS[:, -6]
C_VAR_SME = CLEANED_CONJUNCTIONS[:, -5]
C_AVG_AVG_B = CLEANED_CONJUNCTIONS[:, -4]
C_AVG_FLOW_SPEED = CLEANED_CONJUNCTIONS[:, -3]
C_AVG_PROTON_DENSITY = CLEANED_CONJUNCTIONS[:, -2]
C_AVG_SYM_H = CLEANED_CONJUNCTIONS[:, -1]

print("Creating documentation of dataset!")


with open(
    os.path.join(STAGE_3_folder, rf"CLEANED_CONJUNCTIONS_{VERSION}_{FIELD_MODEL}_{MODEL_TYPE}.txt"),
    "w",
) as f:

    f.write("\nConjunctions:\n")
    f.write(f"Number of conjunctions: {CLEANED_CONJUNCTIONS.shape[0]} [#]\n")
    f.write(
        f"Number lost from cleaning solar proton events: {CONJUNCTIONS.shape[0] - CLEANED_CONJUNCTIONS.shape[0]} [#]\n"
    )
    f.write(f"Minimum RBSP Time: {np.min(C_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum RBSP Time: {np.max(C_RBSP_TIME)} [seconds since unix epoch]\n")
    f.write(f"Minimum POES Time: {np.min(C_POES_TIME)} [seconds since unix epoch]\n")
    f.write(f"Maximum POES Time: {np.max(C_POES_TIME)} [seconds since unix epoch]\n")

    f.write("\nL:\n")
    f.write(f"Mean Difference: {np.mean(C_POES_LSTAR - C_RBSP_LSTAR)} [L]\n")
    f.write(f"Standard deviation of Difference {np.std(C_POES_LSTAR - C_RBSP_LSTAR)} [L]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(C_POES_LSTAR - C_RBSP_LSTAR))} [L]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(C_POES_LSTAR - C_RBSP_LSTAR))} [L]\n")

    f.write("\nMLT: \n")
    f.write(f"Mean Absolute Difference: {np.mean(C_POES_MLT - C_RBSP_MLT)} [MLT]\n")
    f.write(f"Standard deviation of Absolute Difference {np.std(C_POES_MLT - C_RBSP_MLT)} [MLT]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(C_POES_MLT - C_RBSP_MLT))} [MLT]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(C_POES_MLT - C_RBSP_MLT))} [MLT]\n")

    f.write("\nMLAT: \n")
    f.write(f"Mean: {np.mean(C_RBSP_MLAT)} [degrees]\n")
    f.write(f"Standard Deviation: {np.std(C_RBSP_MLAT)} [degrees]\n")
    f.write(f"Minimum: {np.min(C_RBSP_MLAT)} [degrees]\n")
    f.write(f"Maximum: {np.max(C_RBSP_MLAT)} [degrees]\n")

    f.write("\nTime: \n")
    f.write(f"Mean Difference: {np.mean(C_POES_TIME - C_RBSP_TIME)} [s]\n")
    f.write(f"Standard deviation of Difference {np.std(C_POES_TIME - C_RBSP_TIME)} [s]\n")
    f.write(f"Minimum Absolute Difference : {np.min(np.abs(C_POES_TIME - C_RBSP_TIME))} [s]\n")
    f.write(f"Maximum Absolute Difference : {np.max(np.abs(C_POES_TIME - C_RBSP_TIME))} [s]\n")

    f.write(f"\n{MODEL_TYPE} Chorus: \n")
    f.write(f"Mean: {np.mean(C_RBSP_CHORUS)} [pT]\n")
    f.write(f"Standard Deviation: {np.std(C_RBSP_CHORUS)} [pT]\n")
    f.write(f"Minimum: {np.min(C_RBSP_CHORUS)} [pT]\n")
    f.write(f"Maximum: {np.max(C_RBSP_CHORUS)} [pT]\n")

    f.write("\nDensity: \n")
    f.write(f"Mean: {np.mean(C_RBSP_DENSITY)} [cm^-3]\n")
    f.write(f"Standard Deviation: {np.std(C_RBSP_DENSITY)} [cm^-3]\n")
    f.write(f"Minimum: {np.min(C_RBSP_DENSITY)} [cm^-3]\n")
    f.write(f"Maximum: {np.max(C_RBSP_DENSITY)} [cm^-3]\n")

    f.write("\nSME: \n")
    f.write(f"Mean: {np.mean(C_AVG_SME)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_SME)} [nT]\n")
    f.write(f"Minimum: {np.min(C_AVG_SME)} [nT]\n")
    f.write(f"Maximum: {np.max(C_AVG_SME)} [nT]\n")

    f.write("\nSME STD: \n")
    f.write(f"Mean: {np.mean(np.sqrt(C_VAR_SME))} [nT]\n")
    f.write(f"Standard Deviation: {np.std(np.sqrt(C_VAR_SME))} [nT]\n")
    f.write(f"Minimum: {np.min(np.sqrt(C_VAR_SME))} [nT]\n")
    f.write(f"Maximum: {np.max(np.sqrt(C_VAR_SME))} [nT]\n")

    f.write("\nAVG_B: \n")
    f.write(f"Mean: {np.mean(C_AVG_AVG_B)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_AVG_B)} [nT]\n")
    f.write(f"Minimum: {np.min(C_AVG_AVG_B)} [nT]\n")
    f.write(f"Maximum: {np.max(C_AVG_AVG_B)} [nT]\n")

    f.write("\nFlow Speed: \n")
    f.write(f"Mean: {np.mean(C_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Minimum: {np.min(C_AVG_FLOW_SPEED)} [km/s]\n")
    f.write(f"Maximum: {np.max(C_AVG_FLOW_SPEED)} [km/s]\n")

    f.write("\nProton Density: \n")
    f.write(f"Mean: {np.mean(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Minimum: {np.min(C_AVG_PROTON_DENSITY)} [n/cc]\n")
    f.write(f"Maximum: {np.max(C_AVG_PROTON_DENSITY)} [n/cc]\n")

    f.write("\nSYM_H: \n")
    f.write(f"Mean: {np.mean(C_AVG_SYM_H)} [nT]\n")
    f.write(f"Standard Deviation: {np.std(C_AVG_SYM_H)} [nT]\n")
    f.write(f"Minimum: {np.min(C_AVG_SYM_H)} [nT]\n")
    f.write(f"Maximum: {np.max(C_AVG_SYM_H)} [nT]\n")

print("Finished!")
print(f"Ending shape of conjunctions : {CLEANED_CONJUNCTIONS.shape}")

Starting shape of conjunctions list: (160847, 23)
Removing high energy solar proton events!


100%|████████████████████████████████████████████████████████████████████████████████| 309/309 [00:10<00:00, 28.45it/s]


Finished removing high energy solar proton events!
Saving!
Creating documentation of dataset!
Finished!
Ending shape of conjunctions : (159520, 23)


In [4]:
# Stage 4, Create datasets used for training, testing, etc

VERSION = "v4"
FIELD_MODEL = "T89"
MODEL_TYPE = "LOWER_BAND"

pdata_folder = os.path.abspath(r"./../processed_data/chorus_neural_network/")
STAGE_3_folder = os.path.join(pdata_folder, "STAGE_3", VERSION)
STAGE_4_folder = os.path.join(pdata_folder, "STAGE_4", VERSION)

CONJUNCTIONS_REFS = np.load(
    file=os.path.join(STAGE_3_folder, rf"CLEANED_CONJUNCTIONS_{VERSION}_{FIELD_MODEL}_{MODEL_TYPE}.npz")
)

CONJUNCTIONS = CONJUNCTIONS_REFS["CONJUNCTIONS"]

CONJUNCTIONS_REFS.close()

In [5]:
POES = data_loader.load_raw_data_from_config(
    id=["POES", "SEM", "MPE"],
    start=datetime.datetime(year=2000, month=1, day=1),
    end=datetime.datetime(year=2000, month=1, day=2),
    satellite="n15",
)

ENERGIES = POES["energy"][0]
print(ENERGIES[2:8])
print(ENERGIES[1:7])
DIFF_E = ENERGIES[2:8] - ENERGIES[1:7]
print(DIFF_E)
print(len(DIFF_E))

In [6]:
print(CONJUNCTIONS.shape)

mission_start_date = datetime.datetime(year=2012, month=8, day=30, tzinfo=datetime.UTC)
mission_end_date = datetime.datetime(year=2019, month=10, day=19, tzinfo=datetime.UTC)

C_POES_TIME = CONJUNCTIONS[:, 0]

validation_start_date = datetime.datetime(year=2016, month=2, day=1, tzinfo=datetime.UTC)
validation_end_date = datetime.datetime(year=2016, month=3, day=1, tzinfo=datetime.UTC)
validation_times = (validation_start_date.timestamp() < C_POES_TIME) & (C_POES_TIME < validation_end_date.timestamp())

within_mission_time = (mission_start_date.timestamp() < C_POES_TIME) & (C_POES_TIME < mission_end_date.timestamp())

train_test_subset_selected = ~validation_times & within_mission_time
validation_subset_selected = validation_times & within_mission_time

DAY = np.zeros(shape=(C_POES_TIME.shape[0]))
print("Identifying Days of Data Points....")
for DAY_ID, dt in enumerate(rrule.rrule(rrule.DAILY, dtstart=mission_start_date, until=mission_end_date)):

    within_day = (dt.timestamp() <= C_POES_TIME) & (C_POES_TIME < (dt + datetime.timedelta(days=1)).timestamp())
    DAY[within_day] = DAY_ID

print(f"Min day: {np.min(DAY)}")
print(f"Max day: {np.max(DAY)}")

print(f"Number of conjunctions in validation set: {np.count_nonzero(validation_times)}")

C_POES_TIME = np.expand_dims(CONJUNCTIONS[:, 0], axis=1)
C_POES_LSTAR = np.expand_dims(CONJUNCTIONS[:, 1], axis=1)
C_POES_MLT = np.expand_dims(CONJUNCTIONS[:, 2], axis=1)

C_POES_FLUX = CONJUNCTIONS[:, 3:-12][:, 1:7]
C_POES_FLUX_INTEGRATED = np.expand_dims(np.sum(C_POES_FLUX * DIFF_E, axis=1), axis=1)

C_RBSP_TIME = np.expand_dims(CONJUNCTIONS[:, -12], axis=1)
C_RBSP_LSTAR = np.expand_dims(CONJUNCTIONS[:, -11], axis=1)
C_RBSP_MLT = np.expand_dims(CONJUNCTIONS[:, -10], axis=1)
C_RBSP_MLAT = np.expand_dims(CONJUNCTIONS[:, -9], axis=1)
C_RBSP_CHORUS = np.expand_dims(CONJUNCTIONS[:, -8], axis=1)
C_RBSP_DENSITY = np.expand_dims(CONJUNCTIONS[:, -7], axis=1)
C_AVG_SME = np.expand_dims(CONJUNCTIONS[:, -6], axis=1)
C_VAR_SME = np.expand_dims(CONJUNCTIONS[:, -5], axis=1)
C_AVG_AVG_B = np.expand_dims(CONJUNCTIONS[:, -4], axis=1)
C_AVG_FLOW_SPEED = np.expand_dims(CONJUNCTIONS[:, -3], axis=1)
C_AVG_PROTON_DENSITY = np.expand_dims(CONJUNCTIONS[:, -2], axis=1)
C_AVG_SYM_H = np.expand_dims(CONJUNCTIONS[:, -1], axis=1)

print(C_RBSP_TIME.shape)
print(C_RBSP_LSTAR.shape)
print(C_RBSP_CHORUS.shape)
print(C_RBSP_DENSITY.shape)
print(C_RBSP_MLAT.shape)
print(C_POES_TIME.shape)
print(C_POES_LSTAR.shape)
print(C_POES_MLT.shape)
print(C_RBSP_MLT.shape)
print(C_POES_FLUX_INTEGRATED.shape)
print(C_AVG_SME.shape)
print(C_VAR_SME.shape)
print(C_AVG_AVG_B.shape)
print(C_AVG_FLOW_SPEED.shape)
print(C_AVG_PROTON_DENSITY.shape)
print(C_AVG_SYM_H.shape)

mean_LSTAR = np.nanmean(C_POES_LSTAR)
std_LSTAR = np.std(C_POES_LSTAR)

mean_MLAT = np.nanmean(C_RBSP_MLAT)
std_MLAT = np.std(C_RBSP_MLAT)

mean_fluxes = np.log10(np.nanmean(C_POES_FLUX))
std_fluxes = np.log10(np.nanstd(C_POES_FLUX))

mean_density = np.nanmean(C_RBSP_DENSITY)
std_density = np.nanstd(C_RBSP_DENSITY)

mean_sme = np.log10(np.nanmean(C_AVG_SME))
std_sme = np.log10(np.std(C_AVG_SME))

mean_avg_b = np.nanmean(C_AVG_AVG_B)
std_avg_b = np.std(C_AVG_AVG_B)

mean_flow_speed = np.nanmean(C_AVG_FLOW_SPEED)
std_flow_speed = np.std(C_AVG_FLOW_SPEED)

mean_avg_proton_density = np.nanmean(C_AVG_PROTON_DENSITY)
std_avg_proton_density = np.std(C_AVG_PROTON_DENSITY)

mean_avg_sym_h = np.nanmean(C_AVG_SYM_H)
std_avg_sym_h = np.std(C_AVG_SYM_H)


L_MAX = 9

FEATURES = np.hstack(
    (
        C_POES_LSTAR,
        C_POES_LSTAR * np.cos((C_POES_MLT * 2 * np.pi) / 24.0),
        C_POES_LSTAR * np.sin((C_POES_MLT * 2 * np.pi) / 24.0),
        C_RBSP_MLAT,
        C_RBSP_DENSITY,
        C_POES_FLUX_INTEGRATED,
        C_AVG_SME,
        C_VAR_SME,
        C_AVG_AVG_B
    )
)

chorus_greater_than_p1_pT = (1e-1 < C_RBSP_CHORUS.flatten())
poes_greater_than_1000 = (100 < C_POES_FLUX_INTEGRATED.flatten())
FEATURES_T = FEATURES[train_test_subset_selected & chorus_greater_than_p1_pT & poes_greater_than_1000, :]
FEATURES_V = FEATURES[validation_subset_selected & chorus_greater_than_p1_pT & poes_greater_than_1000, :]

MODEL_LABELS = C_RBSP_CHORUS[train_test_subset_selected & chorus_greater_than_p1_pT & poes_greater_than_1000]
MODEL_LABELS_V = C_RBSP_CHORUS[validation_subset_selected & chorus_greater_than_p1_pT & poes_greater_than_1000]

DAY_T = DAY[train_test_subset_selected & chorus_greater_than_p1_pT & poes_greater_than_1000]
DAY_V = DAY[validation_subset_selected & chorus_greater_than_p1_pT & poes_greater_than_1000]

print(FEATURES_T.shape)
print(MODEL_LABELS.shape)
print(FEATURES_V.shape)
print(MODEL_LABELS_V.shape)

np.savez(
    file=os.path.join(STAGE_4_folder, f"MODEL_READY_DATA_{VERSION}_{FIELD_MODEL}_{MODEL_TYPE}.npz"),
    FEATURES=FEATURES_T,
    LABELS=MODEL_LABELS,
    VALIDATION_FEATURES=FEATURES_V,
    VALIDATION_LABELS=MODEL_LABELS_V,
    TRAINING_DAY_IDS=DAY_T,
    VALIDATION_DAY_IDS=DAY_V,
    TRAINING_MLT=C_POES_MLT,
    MEAN_L=mean_LSTAR,
    STD_L=std_LSTAR,
    MEAN_MLAT=mean_MLAT,
    STD_MLAT=std_MLAT,
    MEAN_DENSITY=mean_density,
    STD_DENSITY=std_density,
    MEAN_FLUXES=mean_fluxes,
    STD_FLUXES=std_fluxes,
    MEAN_SME=mean_sme,
    STD_SME=std_sme,
    MEAN_AVG_B=mean_avg_b,
    STD_AVG_B=std_avg_b,
    MEAN_FLOW_SPEED=mean_flow_speed,
    STD_FLOW_SPEED=std_flow_speed,
    MEAN_AVG_PROTON_DENSITY=mean_avg_proton_density,
    STD_AVG_PROTON_DENSITY=std_avg_proton_density,
    MEAN_AVG_SYM_H=mean_avg_sym_h,
    STD_AVG_SYM_H=std_avg_sym_h,
)

In [9]:


plt.scatter(x=FEATURES_T[:, -4], y=MODEL_LABELS, c=FEATURES_T[:, -3], s=1.0, norm=matplotlib.colors.LogNorm(vmin=10, vmax=1000))
plt.xscale("log")
plt.yscale("log")

In [None]:
sns.histplot(
    {"SME": C_VAR_SME.flatten(), "CHORUS": C_RBSP_CHORUS.flatten(), "DENSITY" : C_RBSP_DENSITY.flatten()},
    bins=50,
    x="SME",
    y="CHORUS",
    log_scale=(True, True)
)

In [10]:

plt.scatter(x=np.sqrt(C_VAR_SME), y=C_RBSP_CHORUS, c = C_RBSP_DENSITY, s=0.5, norm=matplotlib.colors.LogNorm(vmin=1, vmax=100))
plt.xscale("log")
plt.yscale("log")