In [7]:
import os
import sys

# caution: path[0] is reserved for script path (or '' in REPL).
sys.path.insert(1, os.path.abspath("./../src"))

import datetime
import importlib

import astropy
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tqdm

from sklearnex import patch_sklearn
patch_sklearn()
import sklearn

import chorus_machine_learning_helper
import data_loader
import dynamic_chorus_model
import plot_tools

importlib.reload(data_loader)
importlib.reload(dynamic_chorus_model)
importlib.reload(chorus_machine_learning_helper)
importlib.reload(plot_tools)

%matplotlib qt

Intel(R) Extension for Scikit-learn* enabled (https://github.com/uxlfoundation/scikit-learn-intelex)


SyntaxError: invalid syntax (dynamic_chorus_model.py, line 71)

In [None]:
VERSION = "v1"
MODEL_TYPE = "LOWER_BAND"

pdata_folder = os.path.abspath("./../processed_data/chorus_neural_network/")
rbsp_chorus_folder = os.path.join(pdata_folder, "observed_chorus")
output_folder = os.path.join(pdata_folder, "models", VERSION)

T_SIZE = 31860 / 2
SAMPLING_SIZE = 100000

year = 2012

In [6]:
POES_temp = data_loader.load_raw_data_from_config(
    id=["POES", "SEM", "MPE"],
    start=datetime.datetime(year=2000, month=1, day=1),
    end=datetime.datetime(year=2000, month=1, day=2),
    satellite="n15",
)

ENERGIES = POES_temp["energy"][0]
DIFF_E = ENERGIES[1:] - ENERGIES[:-1]


In [4]:
%%time

RBSP = []

for SATID in ["A", "B"]:

    # LOAD THE OBSERVED CHORUS
    print(f"Began loading RBSP Data for year: {year}")
    refs = np.load(
        file=os.path.join(rbsp_chorus_folder, rf"observed_chorus_{year}_{SATID}_{MODEL_TYPE}.npz"),
        allow_pickle=True,
    )
    PROBE = {}
    PROBE["UNIX_TIME"] = refs["UNIX_TIME"]
    PROBE["MLT"] = refs["MLT"]
    PROBE["MLAT"] = refs["MLAT"]
    PROBE["L"] = refs["L"]
    PROBE["CHORUS"] = refs["CHORUS"]

    refs.close()

    print(f"\nRBSP-{SATID} SHAPES BEFORE PREPROCESSING:")
    print(PROBE["UNIX_TIME"].shape)
    print(PROBE["MLT"].shape)
    print(PROBE["L"].shape)
    print(PROBE["MLAT"].shape)
    print(PROBE["CHORUS"].shape)

    order = np.argsort(PROBE["UNIX_TIME"])
    PROBE["UNIX_TIME"] = PROBE["UNIX_TIME"][order]
    PROBE["MLT"] = PROBE["MLT"][order]
    PROBE["L"] = PROBE["L"][order]
    PROBE["MLAT"] = PROBE["MLAT"][order]
    PROBE["CHORUS"] = PROBE["CHORUS"][order]

    #t_Lpp, Lpp = chorus_machine_learning_helper.load_plasmapause_filter(year=year)
    #Lpp_i = np.interp(x=PROBE["UNIX_TIME"], xp=t_Lpp, fp=Lpp, left=None, right=None)
    #inside_plasmasphere = PROBE["L"] < Lpp_i
    #PROBE["CHORUS"][inside_plasmasphere] = 0

    print(f"\nRBSP-{SATID} SHAPES AFTER REMOVING POINTS OUTSIDE BINS and INSIDE PLASMASPHERE:")
    print(PROBE["UNIX_TIME"].shape)
    print(PROBE["MLT"].shape)
    print(PROBE["L"].shape)
    print(PROBE["MLAT"].shape)
    print(PROBE["CHORUS"].shape)

    RBSP.append(PROBE)

print(f"RBSP Data loaded for year : {year}")
print(f"Began loading POES Data for year : {year}")

POES = chorus_machine_learning_helper.load_MPE_year(year=year)

print(f"Finished loading POES data for year : {year}")

SUPERMAG = chorus_machine_learning_helper.load_SUPERMAG_SME_year(year)

print(f"Loading HP data for year : {year}")

HP_UNIX_TIME, HP = chorus_machine_learning_helper.load_hp30(os.path.join(pdata_folder, "Hp30_Ap30_1996_2025.txt"))

tstart = datetime.datetime(year=year, month=1, day=1, tzinfo=datetime.UTC).timestamp()
tend = datetime.datetime(year=year + 1, month=1, day=1, tzinfo=datetime.UTC).timestamp()

order = np.argsort(HP_UNIX_TIME)
HP_UNIX_TIME = HP_UNIX_TIME[order]
HP = HP[order]

time_mask_hp = np.searchsorted(a=HP_UNIX_TIME, v=[tstart, tend])
HP_UNIX_TIME = HP_UNIX_TIME[time_mask_hp[0]:time_mask_hp[-1]]
HP = HP[time_mask_hp[0]:time_mask_hp[-1]]

print(f"Finished Loading HP data for year : {year}")


Began loading RBSP Data for year: 2012

RBSP-A SHAPES BEFORE PREPROCESSING:
(3220864,)
(3220864,)
(3220864,)
(3220864,)
(3220864,)





RBSP-A SHAPES AFTER REMOVING POINTS OUTSIDE BINS and INSIDE PLASMASPHERE:
(3090772,)
(3090772,)
(3090772,)
(3090772,)
(3090772,)
Began loading RBSP Data for year: 2012

RBSP-B SHAPES BEFORE PREPROCESSING:
(3179153,)
(3179153,)
(3179153,)
(3179153,)
(3179153,)

RBSP-B SHAPES AFTER REMOVING POINTS OUTSIDE BINS and INSIDE PLASMASPHERE:
(3052023,)
(3052023,)
(3052023,)
(3052023,)
(3052023,)
RBSP Data loaded for year : 2012
Began loading POES Data for year : 2012
Finished loading POES data for year : 2012
Finished loading POES data for year : 2012
Began loading SUPERMAG data for year : 2012
Finished loading SUPERMAG data for year : 2012
Loading HP data for year : 2012


  hp = pd.read_csv(os.path.abspath(path), delim_whitespace=True)


Finished Loading HP data for year : 2012
CPU times: total: 46.8 s
Wall time: 1min 31s


In [5]:
%%time

X_conditional_total = []
X_convolutional_total = []
y_total = []

i = 0

for tcurr in np.arange(tstart, tend, T_SIZE):

    POES_L = []
    POES_MLT = []
    POES_FLUX = []

    for SAT in POES:
        TIME_RANGE = np.searchsorted(SAT["UNIX_TIME"], v=[tcurr, tcurr + T_SIZE])
        POES_L.append(SAT["L"][TIME_RANGE[0] : TIME_RANGE[-1]])
        POES_MLT.append(SAT["MLT"][TIME_RANGE[0] : TIME_RANGE[-1]])
        POES_FLUX.append(SAT["BLC_Flux"][TIME_RANGE[0] : TIME_RANGE[-1], :])

    POES_L = np.hstack(POES_L)
    POES_MLT = np.hstack(POES_MLT)
    POES_FLUX = np.vstack(POES_FLUX)

    if POES_L.shape[0] == 0:
        continue

    RBSP_L = []
    RBSP_MLT = []
    RBSP_CHORUS = []
    RBSP_MLAT = []

    for PROBE in RBSP:

        TIME_RANGE = np.searchsorted(
            a=PROBE["UNIX_TIME"],
            v=[tcurr, tcurr + T_SIZE],
        )

        RBSP_L.append(PROBE["L"][TIME_RANGE[0] : TIME_RANGE[1]])
        RBSP_MLT.append(PROBE["MLT"][TIME_RANGE[0] : TIME_RANGE[1]])
        RBSP_CHORUS.append(PROBE["CHORUS"][TIME_RANGE[0] : TIME_RANGE[1]])
        RBSP_MLAT.append(PROBE["MLAT"][TIME_RANGE[0] : TIME_RANGE[1]])

    RBSP_L = np.hstack(RBSP_L)
    RBSP_MLT = np.hstack(RBSP_MLT)
    RBSP_CHORUS = np.hstack(RBSP_CHORUS)
    RBSP_MLAT = np.hstack(RBSP_MLAT)

    if RBSP_L.shape[0] == 0:
        continue

    #--------------------------------------------------------------------------------------------------------#

    POES_theta = (POES_MLT / 24) * (2 * np.pi)

    UNDER_130_KeV_FLUX = np.nansum((POES_FLUX[:, :7] * DIFF_E[:7]), axis=1)
    TOTAL_FLUX = np.nansum((POES_FLUX[:, :-1] * DIFF_E), axis=1)
    FLUX_RATIO = UNDER_130_KeV_FLUX / TOTAL_FLUX

    # Query points
    r_query = np.abs(np.random.normal(loc=0, scale=5, size=SAMPLING_SIZE))
    theta_query = np.abs(np.mod(np.random.normal(loc=0, scale=np.pi, size=SAMPLING_SIZE), 2*np.pi))

    ratio_interp = (
        dynamic_chorus_model.interpolate_around_points(
            POES_L,
            POES_theta,
            FLUX_RATIO,
            r_query,
            theta_query,
            k=5
        )
    )


    RBSP_theta = (RBSP_MLT / 24) * (2 * np.pi)

    chorus_interp = (
        dynamic_chorus_model.interpolate_around_points(
            RBSP_L,
            RBSP_theta,
            RBSP_CHORUS,
            r_query,
            theta_query,
            k=5
        )
    )

    mlat_interp = (
        dynamic_chorus_model.interpolate_around_points(
            RBSP_L,
            RBSP_theta,
            RBSP_MLAT,
            r_query,
            theta_query,
            k=5
        )
    )


    labeled = np.isfinite(chorus_interp) & np.isfinite(ratio_interp)

    if not np.any(labeled):
        continue

    L_sampled = r_query[labeled]
    MLT_sampled = (theta_query[labeled] * 24) / (2 * np.pi)
    MLAT_sampled = mlat_interp[labeled]
    flux_sampled = ratio_interp[labeled]
    chorus_sampled = chorus_interp[labeled]

    time_mask_hp_2 = np.searchsorted(a=HP_UNIX_TIME, v=[tcurr, tcurr + T_SIZE])
    max_hp = np.nanmax(HP[time_mask_hp_2[0] : time_mask_hp_2[-1]])
    hp_sampled = np.array([max_hp for l in range(len(L_sampled))])

    t_data = np.array([tcurr for l in range(len(L_sampled))])

    X_conditional = np.hstack([np.expand_dims(L_sampled, axis=1),
                               np.expand_dims(MLT_sampled, axis=1),
                               np.expand_dims(MLAT_sampled, axis=1),
                               np.expand_dims(flux_sampled, axis=1),
                               np.expand_dims(hp_sampled, axis=1),
                               np.expand_dims(t_data, axis=1)])

    y = chorus_sampled

    time_mask_sme = np.searchsorted(a=SUPERMAG["UNIX_TIME"], v=[tcurr - T_SIZE])
    X_convolutional = SUPERMAG["SME"][time_mask_sme[0]:time_mask_sme[0] + 512]

    if len(X_convolutional) < 512:
        continue

    X_convolutional = np.vstack([np.expand_dims(X_convolutional, axis=0) for i in range(len(L_sampled))])

    X_conditional_total.append(X_conditional)
    X_convolutional_total.append(X_convolutional)
    y_total.append(y)

    if i % 10 == 0:
        print(f"Total number of conjunctions: {len(y_total)}")
        print(datetime.datetime.fromtimestamp(tcurr))

    i += 1

Total number of conjunctions: 1
2012-08-30 14:43:30
Total number of conjunctions: 11
2012-09-01 10:58:30
Total number of conjunctions: 21
2012-09-03 07:13:30
Total number of conjunctions: 31
2012-09-05 03:28:30
Total number of conjunctions: 41
2012-09-06 23:43:30
Total number of conjunctions: 51
2012-09-08 19:58:30
Total number of conjunctions: 61
2012-09-10 20:39:00
Total number of conjunctions: 71
2012-09-12 16:54:00
Total number of conjunctions: 81
2012-09-14 13:09:00
Total number of conjunctions: 91
2012-09-16 09:24:00
Total number of conjunctions: 101
2012-09-18 05:39:00
Total number of conjunctions: 111
2012-09-20 01:54:00
Total number of conjunctions: 121
2012-09-21 22:09:00
Total number of conjunctions: 131
2012-09-23 18:24:00
Total number of conjunctions: 141
2012-09-25 14:39:00
Total number of conjunctions: 151
2012-09-27 10:54:00
Total number of conjunctions: 161
2012-09-29 07:09:00
Total number of conjunctions: 171
2012-10-01 03:24:00
Total number of conjunctions: 181
2012-

In [6]:
X_conditional_total = np.vstack(X_conditional_total)
X_convolutional_total = np.vstack(X_convolutional_total)
y_total = np.hstack(y_total)

print(X_conditional_total.shape)
print(X_convolutional_total.shape)
print(y_total.shape)

np.savez(
    file=os.path.join(output_folder, rf"raw_dataset_{VERSION}_{MODEL_TYPE}_{year}.npz"),
    X_conditional_total=X_conditional_total,
    X_convolutional_total=X_convolutional_total,
    y_total=y_total
)


(530081, 6)
(530081, 512)
(530081,)


In [7]:
dataset = np.load(
    file=os.path.join(output_folder, rf"raw_dataset_{VERSION}_{MODEL_TYPE}_{year}.npz")
)

print("Reading Solar Proton Event List")

SOLAR_PROTON_EVENT_LIST = pd.read_csv(
    os.path.join(pdata_folder, r"SOLAR_PROTON_EVENT_LIST_1976_2024.csv")
)

print("Finished Reading Solar Proton Event List")

print("\nBefore removing non-valid values")
print(f"Conditional Shape: {X_conditional_total.shape}")
print(f"Convolutional Shape: {X_convolutional_total.shape}")
print(f"Labels Shape: {y_total.shape}")

order_to_sort_conjunctions = np.argsort(
    X_conditional_total[:, -1]
)  # Sorted based on POES Conjunction time!
X_conditional_total = X_conditional_total[order_to_sort_conjunctions, :]
X_convolutional_total = X_convolutional_total[order_to_sort_conjunctions, :]
y_total = y_total[order_to_sort_conjunctions]

all_valid_conditional = np.all(np.isfinite(X_conditional_total), axis=1)
all_valid_convolutional = np.all(np.isfinite(X_convolutional_total), axis=1)
valid_y = np.isfinite(y_total) & (y_total >= 0)

all_valid = all_valid_conditional & all_valid_convolutional & valid_y

X_conditional_total = X_conditional_total[all_valid, :]
X_convolutional_total = X_convolutional_total[all_valid, :]
y_total = y_total[all_valid]

print("\nAfter removing non-valid values")
print(f"Conditional Shape: {X_conditional_total.shape}")
print(f"Convolutional Shape: {X_convolutional_total.shape}")
print(f"Labels Shape: {y_total.shape}")

times_sorted = X_conditional_total[:, -1]

start_of_sep_events_utc = SOLAR_PROTON_EVENT_LIST["START"]
end_of_sep_events_utc = SOLAR_PROTON_EVENT_LIST["END"]
zipped_events = list(zip(start_of_sep_events_utc, end_of_sep_events_utc))

print("Removing high energy solar proton events!")

for SEP in tqdm.tqdm(range(len(zipped_events))):

    S = zipped_events[SEP][0].strip()
    E = zipped_events[SEP][1].strip()

    S_YMDHMS = {
        "year": int(S[0:4]),
        "month": int(S[5:7]),
        "day": int(S[8:10]),
        "hour": int(S[11:13]),
        "minute": int(S[13:15]),
        "second": 0,
    }
    E_YMDHMS = {
        "year": int(E[0:4]),
        "month": int(E[5:7]),
        "day": int(E[8:10]),
        "hour": int(E[11:13]),
        "minute": int(E[13:15]),
        "second": 0,
    }

    S_UNIX = astropy.time.Time(S_YMDHMS, format="ymdhms", scale="utc").unix
    E_UNIX = astropy.time.Time(E_YMDHMS, format="ymdhms", scale="utc").unix

    RANGE_TO_REMOVE = np.searchsorted(a=times_sorted, v=[S_UNIX - T_SIZE, E_UNIX + T_SIZE])

    X_conditional_total = np.vstack(
        (
            X_conditional_total[0 : RANGE_TO_REMOVE[0], :],
            X_conditional_total[RANGE_TO_REMOVE[1] :, :],
        )
    )

    X_convolutional_total = np.vstack(
        (
            X_convolutional_total[0 : RANGE_TO_REMOVE[0], :],
            X_convolutional_total[RANGE_TO_REMOVE[1] :, :],
        )
    )

    y_total = np.hstack(
        (
            y_total[0 : RANGE_TO_REMOVE[0]],
            y_total[RANGE_TO_REMOVE[1] :],
        )
    )

print("Finished removing high energy solar proton events!")

print("\nAfter removing solar proton events")
print(f"Conditional Shape: {X_conditional_total.shape}")
print(f"Convolutional Shape: {X_convolutional_total.shape}")
print(f"Labels Shape: {y_total.shape}")

print("Saving!")

dataset.close()


np.savez(
    file=os.path.join(output_folder, rf"spe_cleaned_dataset_{VERSION}_{MODEL_TYPE}_{year}.npz"),
    X_conditional_total=X_conditional_total,
    X_convolutional_total=X_convolutional_total,
    y_total=y_total
)

Reading Solar Proton Event List
Finished Reading Solar Proton Event List

Before removing non-valid values
Conditional Shape: (530081, 6)
Convolutional Shape: (530081, 512)
Labels Shape: (530081,)

After removing non-valid values
Conditional Shape: (530081, 6)
Convolutional Shape: (530081, 512)
Labels Shape: (530081,)
Removing high energy solar proton events!


100%|████████████████████████████████████████████████████████████████████████████████| 309/309 [02:11<00:00,  2.34it/s]


Finished removing high energy solar proton events!

After removing solar proton events
Conditional Shape: (523929, 6)
Convolutional Shape: (523929, 512)
Labels Shape: (523929,)
Saving!


In [None]:
print(f"Min chorus power: {np.min(y_total)}")
print(f"Max chorus power: {np.max(y_total)}")

print(f"Min chorus power: {np.min(np.sqrt(y_total))}")
print(f"Max chorus power: {np.max(np.sqrt(y_total))}")

In [8]:
X_conditional = []
X_convolutional = []
y = []


for year in range(2012, 2020):

    d = np.load(
        file=os.path.join(output_folder, rf"spe_cleaned_dataset_{VERSION}_{MODEL_TYPE}_{year}.npz")
    )

    X_conditional.append(d["X_conditional_total"])
    X_convolutional.append(d["X_convolutional_total"])
    y.append(d["y_total"])
    d.close()

X_conditional = np.vstack(X_conditional)
X_convolutional = np.vstack(X_convolutional)
y = np.hstack(y)

print(X_conditional.shape)
print(X_convolutional.shape)
print(y.shape)

transformed_mlt = np.hstack([np.cos((X_conditional[:, 1:2] / 24) * (2 * np.pi)) , np.sin((X_conditional[:, 1:2] / 24) * (2 * np.pi))])

X_conditional = np.hstack([X_conditional[:, 0:1], transformed_mlt, X_conditional[:, 2:]])

print(X_conditional.shape)
print(X_convolutional.shape)
print(y.shape)

np.savez(
    file=os.path.join(output_folder, rf"total_dataset_{VERSION}_{MODEL_TYPE}.npz"),
    X_conditional=X_conditional,
    X_convolutional=X_convolutional,
    y=y
)

(9852471, 6)
(9852471, 512)
(9852471,)
(9852471, 7)
(9852471, 512)
(9852471,)


In [145]:
print(f"Min chorus power: {np.min(y)}")
print(f"Max chorus power: {np.max(y)}")

print(f"Min chorus amplitude: {np.min(np.sqrt(y))}")
print(f"Max chorus amplitude: {np.max(np.sqrt(y))}")

Min chorus power: 0.0
Max chorus power: 683759.2197013681
Min chorus power: 0.0
Max chorus power: 826.897345322482


In [158]:
plot_tools.plot_2d_heatmap(X_conditional[:, 1],
                           X_conditional[:, 0],
                           np.sqrt(y),
                           bins=20,
                           xlim=(0.0, 24.0),
                           ylim=(2, 7),
                           xtitle="MLT",
                           ytitle="L",
                           ztitle="Average Chorus Amplitude (pT)",
                           title="Average Chorus Amplitude vs L vs MLT",
                           norm="symlog",
                           plot_density = True)

In [None]:
plot_tools.plot_2d_heatmap(X_conditional[:, -3],
                           X_conditional[:, -2],
                           y,
                           bins=20,
                           xlim=(0.0, 1.0),
                           ylim=(0, 8),
                           xtitle="Ratio of flux from (30 KeV to 130 KeV) / Total",
                           ytitle="Max Kp in past 4 hours",
                           ztitle="Average Chorus Power (pT^2)",
                           title="Average Chorus Power vs Kp vs Flux Ratio",
                           norm="symlog")

In [None]:
plot_tools.plot_2d_heatmap(X_conditional[:, -3],
                           X_conditional[:, 0],
                           y,
                           bins=50,
                           xlim=(0.0, 1.0),
                           ylim=(2, 7),
                           xtitle="Ratio of flux from (30 KeV to 130 KeV) / Total",
                           ytitle="L-Shell",
                           ztitle="Average Chorus Power (pT^2)",
                           title="Average Chorus Power vs L vs Flux Ratio",
                           norm="symlog")

In [None]:
np.savez(
    file=os.path.join(output_folder, rf"model_ready_dataset_{VERSION}_{MODEL_TYPE}_{year}.npz"),
    X_conditional=X_conditional,
    X_convolutional=X_convolutional,
    y=y
)

In [None]:
plot_tools.plot_2d_heatmap(X_conditional[:, -2],
                           X_conditional[:, 0],
                           y,
                           bins=20,
                           xlim=(0.0, 8.0),
                           ylim=(2, 7),
                           xtitle="Hp30 (max in 4h)",
                           ytitle="L-Shell",
                           ztitle="Average Chorus Power (pT^2)",
                           title="Average Chorus Power vs L vs Hp",
                           norm="symlog")

In [None]:
plot_tools.plot_2d_heatmap(X_conditional[:, -2],
                           X_conditional[:, 0],
                           np.sqrt(y),
                           bins=20,
                           xlim=(0.0, 8.0),
                           ylim=(2, 7),
                           xtitle="Hp30 (max in 4h)",
                           ytitle="L-Shell",
                           ztitle="Average Chorus Amplitude (pT)",
                           title="Average Chorus Amplitude vs L vs Hp",
                           norm=matplotlib.colors.SymLogNorm(vmin=0, vmax=1000, linthresh=10))

In [None]:
plt.scatter(x=r_query * np.cos(theta_query), y=r_query * np.sin(theta_query), c=ratio_interp, marker="+", s=20, norm=matplotlib.colors.LogNorm(vmin=1e-1, vmax=1e7))
plt.scatter(x=POES_L * np.cos(POES_theta), y=POES_L * np.sin(POES_theta), c=FLUX_INTEGRATED, marker="x", s=30, norm=matplotlib.colors.LogNorm(vmin=1e-1, vmax=1e7))
plt.colorbar()
plt.xlim(-8, 8)
plt.ylim(-8, 8)
plt.show()

In [None]:
plt.scatter(x=r_query * np.cos(theta_query), y=r_query * np.sin(theta_query), c=chorus_interp, marker="+", s=20, norm=matplotlib.colors.LogNorm(vmin=1e-1, vmax=1e2))
plt.scatter(x=RBSP_L * np.cos(RBSP_theta), y=RBSP_L * np.sin(RBSP_theta), c=RBSP_CHORUS, marker="x", s=30, norm=matplotlib.colors.LogNorm(vmin=1e-1, vmax=1e2))
plt.colorbar()
plt.xlim(-8, 8)
plt.ylim(-8, 8)
plt.show()

In [None]:
labeled = np.isfinite(chorus_interp) & np.isfinite(ratio_interp)

plt.scatter(x=r_query[labeled] * np.cos(theta_query[labeled]),
            y=r_query[labeled] * np.sin(theta_query[labeled]),
            c=chorus_interp[labeled],
            marker="+",
            s=20,
            norm=matplotlib.colors.LogNorm(vmin=1e-1, vmax=1e2))

plt.xlim(-8, 8)
plt.ylim(-8, 8)
plt.show()