In [1]:
import sys
import os
# caution: path[0] is reserved for script path (or '' in REPL).
sys.path.insert(1, os.path.abspath('./../src'))

import numpy as np
import data_loader
import rbsp_chorus_tool
import plot_tools
import chorus_machine_learning_helper
import datetime
from cdflib.epochs_astropy import CDFAstropy as cdfepoch
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import pandas as pd
import astropy.time
import tqdm

import importlib
importlib.reload(data_loader)
importlib.reload(plot_tools)
importlib.reload(rbsp_chorus_tool)
importlib.reload(chorus_machine_learning_helper)

import xgboost as xgb


SMALL_SIZE = 12
MEDIUM_SIZE = 16
BIGGER_SIZE = 20

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=8)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title


%matplotlib qt

In [2]:
start = datetime.datetime(year = 2016, month = 1, day = 1, hour = 0, minute = 0, second = 0)
end = datetime.datetime(year = 2016, month = 4, day = 1, hour = 0, minute = 0, second = 0)
epoch_bin_size = datetime.timedelta(hours = 6)

#L
L_bin_size = 0.25
L_start = 3.5
L_end = 7.0

#MLT

MLT_min_for_L_v_Time_Plots = 3
MLT_max_for_L_v_Time_Plots = 15

In [3]:
print(f"Began loading RBSP Data for year: {start.year}")
refs = np.load(f"./../processed_data_chorus_neural_network/STAGE_1/RBSP_OBSERVED_CHORUS_{start.year}.npz")
RBSP_A = {}
RBSP_A["EPOCH"] = refs["EPOCH_A"]
RBSP_A["MLT"] = refs["MLT_A"]
RBSP_A["L"] = refs["L_A"]
RBSP_A["CHORUS"] = refs["CHORUS_A"]

RBSP_B = {}
RBSP_B["EPOCH"] = refs["EPOCH_B"]
RBSP_B["MLT"] = refs["MLT_B"]
RBSP_B["L"] = refs["L_B"]
RBSP_B["CHORUS"] = refs["CHORUS_B"]

refs.close()

RBSP_A["UNIX_TIME"] = cdfepoch.unixtime(RBSP_A["EPOCH"])
RBSP_B["UNIX_TIME"] = cdfepoch.unixtime(RBSP_B["EPOCH"])

order_A = np.argsort(RBSP_A["UNIX_TIME"])
order_B = np.argsort(RBSP_B["UNIX_TIME"])

RBSP_A["UNIX_TIME"] = RBSP_A["UNIX_TIME"][order_A]
RBSP_A["EPOCH"] = RBSP_A["EPOCH"][order_A]
RBSP_A["MLT"] = RBSP_A["MLT"][order_A]
RBSP_A["L"] = RBSP_A["L"][order_A]
RBSP_A["CHORUS"] = RBSP_A["CHORUS"][order_A]

RBSP_B["UNIX_TIME"] = RBSP_B["UNIX_TIME"][order_B]
RBSP_B["EPOCH"] = RBSP_B["EPOCH"][order_B]
RBSP_B["MLT"] = RBSP_B["MLT"][order_B]
RBSP_B["L"] = RBSP_B["L"][order_B]
RBSP_B["CHORUS"] = RBSP_B["CHORUS"][order_B]

within_time_range_of_storm_A = (start.timestamp() < RBSP_A["UNIX_TIME"]) & (RBSP_A["UNIX_TIME"] < end.timestamp())
within_time_range_of_storm_B = (start.timestamp() < RBSP_B["UNIX_TIME"]) & (RBSP_B["UNIX_TIME"] < end.timestamp())

RBSP_A["UNIX_TIME"] = RBSP_A["UNIX_TIME"][within_time_range_of_storm_A]
RBSP_A["EPOCH"] = RBSP_A["EPOCH"][within_time_range_of_storm_A]
RBSP_A["MLT"] = RBSP_A["MLT"][within_time_range_of_storm_A]
RBSP_A["L"] = RBSP_A["L"][within_time_range_of_storm_A]
RBSP_A["CHORUS"] = RBSP_A["CHORUS"][within_time_range_of_storm_A]

RBSP_B["UNIX_TIME"] = RBSP_B["UNIX_TIME"][within_time_range_of_storm_B]
RBSP_B["EPOCH"] = RBSP_B["EPOCH"][within_time_range_of_storm_B]
RBSP_B["MLT"] = RBSP_B["MLT"][within_time_range_of_storm_B]
RBSP_B["L"] = RBSP_B["L"][within_time_range_of_storm_B]
RBSP_B["CHORUS"] = RBSP_B["CHORUS"][within_time_range_of_storm_B]

RBSP_WITHIN_MLT_RANGE_A = (MLT_min_for_L_v_Time_Plots < RBSP_A["MLT"]) & (RBSP_A["MLT"] < MLT_max_for_L_v_Time_Plots)
RBSP_WITHIN_MLT_RANGE_B = (MLT_min_for_L_v_Time_Plots < RBSP_B["MLT"]) & (RBSP_B["MLT"] < MLT_max_for_L_v_Time_Plots)

Began loading RBSP Data for year: 2016


In [4]:
cumulative_chorus_A, num_points_in_each_epoch_L_bin_A = plot_tools.bin_3D_data(xdata = RBSP_A["UNIX_TIME"][RBSP_WITHIN_MLT_RANGE_A],
                                                                               ydata = RBSP_A["L"][RBSP_WITHIN_MLT_RANGE_A],
                                                                               zdata = RBSP_A["CHORUS"][RBSP_WITHIN_MLT_RANGE_A],
                                                                               xstart = start.timestamp(),
                                                                               xend = end.timestamp(),
                                                                               xstep = epoch_bin_size.seconds,
                                                                               ystart = L_start,
                                                                               yend = L_end,
                                                                               ystep = L_bin_size)


cumulative_chorus_B, num_points_in_each_epoch_L_bin_B = plot_tools.bin_3D_data(xdata = RBSP_B["UNIX_TIME"][RBSP_WITHIN_MLT_RANGE_B],
                                                                               ydata = RBSP_B["L"][RBSP_WITHIN_MLT_RANGE_B], 
                                                                               zdata = RBSP_B["CHORUS"][RBSP_WITHIN_MLT_RANGE_B],
                                                                               xstart = start.timestamp(),
                                                                               xend = end.timestamp(),
                                                                               xstep = epoch_bin_size.seconds,
                                                                               ystart = L_start,
                                                                               yend = L_end,
                                                                               ystep = L_bin_size)


average_chorus_RBSP = np.nansum([cumulative_chorus_A, cumulative_chorus_B], axis=0) / (num_points_in_each_epoch_L_bin_A + num_points_in_each_epoch_L_bin_B)

  average_chorus_RBSP = np.nansum([cumulative_chorus_A, cumulative_chorus_B], axis=0) / (num_points_in_each_epoch_L_bin_A + num_points_in_each_epoch_L_bin_B)


In [6]:
fig, ax = plt.subplots(1, 1, figsize=(16, 9), sharex=True)

image = ax.imshow(average_chorus_RBSP.T, 
                  origin = "lower",
                  extent=[start, end, L_start, L_end],
                  norm=colors.LogNorm(vmin=1, vmax=100),
                  aspect="auto",
                  interpolation="none")

image.cmap.set_under("black")

cbar = plt.colorbar(image, ax=ax, pad=0.01)

cbar.set_label("Chorus Bw (pT)\n", loc="center", labelpad=15, rotation=270)

ax.set_title(f"RBSP Observed Chorus ({MLT_min_for_L_v_Time_Plots} < MLT < {MLT_max_for_L_v_Time_Plots})")
ax.set_ylabel("L")
ax.set_xlabel("Time")

plt.tight_layout()

In [7]:
POES = chorus_machine_learning_helper.load_MPE_year(start.year)

SUPERMAG = chorus_machine_learning_helper.load_SUPERMAG_SME_year(start.year)

OMNI = chorus_machine_learning_helper.load_OMNI_year(start.year)

Finished loading POES data for year : 2016
Began loading SUPERMAG data for year : 2016
Finished loading SUPERMAG data for year : 2016
Began loading OMNI data for year : 2016
Finished loading OMNI data for year : 2016




In [8]:
print(f"Number of POES satellites loaded: {len(POES)}")

FEATURE_REFS = chorus_machine_learning_helper.find_average_SUPERMAG_and_OMNI_values_for_each_POES_data_point(POES, SUPERMAG, OMNI)


Number of POES satellites loaded: 4


  AVG_AVG_B = np.nanmean(OMNI["AVG_B"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_FLOW_SPEED = np.nanmean(OMNI["FLOW_SPEED"][TIME_RANGE[0]:TIME_RANGE[1]])
  AVG_PROTON_DENSITY = np.nanmean(OMNI["PROTON_DENSITY"][TIME_RANGE[0]:TIME_RANGE[1]])
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
5951471it [06:50, 14485.98it/s]


In [15]:
POES_TIMES_OF_FEATURES = FEATURE_REFS["POES_TIMES_OF_FEATURES"].flatten()
MLT_FEATURES_PREPROCESSING = FEATURE_REFS["MLT_FEATURES"].flatten()
L_FEATURES_PREPROCESSING = FEATURE_REFS["L_FEATURES"].flatten()

FEATURES_POST_PROCESSING = chorus_machine_learning_helper.normalize_features(FEATURE_REFS, version = "v4a")

MLT_FEATURES_POST_PROCESSING_1 = FEATURES_POST_PROCESSING[:, 1].flatten()
MLT_FEATURES_POST_PROCESSING_2 = FEATURES_POST_PROCESSING[:, 2].flatten()

In [16]:
model = xgb.Booster({'nthread': 8, "device" : "cuda"})  # init model
model.load_model(f"./../processed_data_chorus_neural_network/TRAINED_MODELS/Weighted_L2/XG_BOOSTED_REGRESSION_MSE_WEIGHTED_ON_L_AND_AMPLITUDE.model")  # load model data

CHORUS_PREDICTED = model.predict(xgb.DMatrix(FEATURES_POST_PROCESSING))
CHORUS_WITHIN_MLT = ((MLT_min_for_L_v_Time_Plots < MLT_FEATURES_PREPROCESSING) & (MLT_FEATURES_PREPROCESSING < MLT_max_for_L_v_Time_Plots))


In [17]:
cumulative_chorus, num_points_in_each_epoch_L_bin = plot_tools.bin_3D_data(xdata = POES_TIMES_OF_FEATURES[CHORUS_WITHIN_MLT],
                                                                            ydata = L_FEATURES_PREPROCESSING[CHORUS_WITHIN_MLT], 
                                                                            zdata = CHORUS_PREDICTED[CHORUS_WITHIN_MLT],
                                                                            xstart = start.timestamp(),
                                                                            xend = end.timestamp(),
                                                                            xstep = epoch_bin_size.seconds,
                                                                            ystart = L_start,
                                                                            yend = L_end,
                                                                            ystep = L_bin_size)


averaged_model_predictions = cumulative_chorus / num_points_in_each_epoch_L_bin

  averaged_model_predictions = cumulative_chorus / num_points_in_each_epoch_L_bin


In [18]:
fig, ax = plt.subplots(1, 1, figsize=(16, 9), sharex=True)

image = ax.imshow(averaged_model_predictions.T, 
                  origin = "lower",
                  extent=[start, end, L_start, L_end],
                  norm=colors.LogNorm(vmin=1, vmax=100),
                  aspect="auto",
                  interpolation="none")

image.cmap.set_under("black")

cbar = plt.colorbar(image, ax=ax, pad=0.01)

cbar.set_label("Chorus Bw (pT)\n", loc="center", labelpad=15, rotation=270)

ax.set_title(f"Model Predicted Chorus ({MLT_min_for_L_v_Time_Plots} < MLT < {MLT_max_for_L_v_Time_Plots})")
ax.set_ylabel("L")
ax.set_xlabel("Time")

plt.tight_layout()

In [19]:
fig, axs = plt.subplots(3, 1)

image_pred = axs[0].imshow(averaged_model_predictions.T, 
                  origin = "lower",
                  extent=[start, end, L_start, L_end],
                  norm=colors.LogNorm(vmin=1, vmax=100),
                  aspect="auto",
                  interpolation="none")

image_pred.cmap.set_under("black")

cbar = plt.colorbar(image_pred, ax=axs[0], pad=0.02)

cbar.set_label("Chorus Bw (pT)\n\n\n", loc="center", labelpad=15, rotation=270)

axs[0].set_title(f"Model Predicted Chorus ({MLT_min_for_L_v_Time_Plots} < MLT < {MLT_max_for_L_v_Time_Plots})")
axs[0].set_ylabel("L")


image_observed = axs[1].imshow(average_chorus_RBSP.T, 
                  origin = "lower",
                  extent=[start, end, L_start, L_end],
                  norm=colors.LogNorm(vmin=1, vmax=100),
                  aspect="auto",
                  interpolation="none")

image_observed.cmap.set_under("black")

cbar = plt.colorbar(image_observed, ax=axs[1], pad=0.02)

cbar.set_label("Chorus Bw (pT)\n\n\n", loc="center", labelpad=15, rotation=270)

axs[1].set_title(f"RBSP Observed Chorus ({MLT_min_for_L_v_Time_Plots} < MLT < {MLT_max_for_L_v_Time_Plots})")
axs[1].set_ylabel("L")

average_chorus_RBSP[average_chorus_RBSP <= 0] = -np.infty
absolute_error = (np.abs(average_chorus_RBSP - averaged_model_predictions) / average_chorus_RBSP) * 100

image_error = axs[2].imshow(absolute_error.T, 
                  origin = "lower",
                  extent=[start, end, L_start, L_end],
                  norm=colors.LogNorm(vmin=10, vmax=1000),
                  aspect="auto",
                  interpolation="none")

image_error.cmap.set_under("black")

cbar = plt.colorbar(image_error, ax=axs[2], pad=0.02)

cbar.set_label("Percent Error (%)\n\n\n", loc="center", labelpad=15, rotation=270)

axs[2].set_title("Error")
axs[2].set_ylabel("L")
axs[2].set_xlabel("Time (UTC)")


Text(0.5, 0, 'Time (UTC)')

In [20]:
fig, ax = plt.subplots(1, 1, subplot_kw=dict(projection="polar"))

ax.set_xlim(xmin = 0, xmax = 2 * np.pi)
ax.set_ylim(ymin = 0, ymax=7)

#ax[1].set_xlim(xmin = 0, xmax = 2 * np.pi)
#ax[1].set_ylim(ymin=0, ymax=7)


start_of_storm = datetime.datetime(year = 2016, month = 2, day = 15)
end_of_storm = datetime.datetime(year = 2016, month = 2, day = 21)

within_time_range_of_storm_A = (start_of_storm.timestamp() < RBSP_A["UNIX_TIME"]) & (RBSP_A["UNIX_TIME"] < end_of_storm.timestamp())
within_time_range_of_storm_B = (start_of_storm.timestamp() < RBSP_B["UNIX_TIME"]) & (RBSP_B["UNIX_TIME"] < end_of_storm.timestamp())


RBSP_A["UNIX_TIME"] = RBSP_A["UNIX_TIME"][within_time_range_of_storm_A]
RBSP_A["EPOCH"] = RBSP_A["EPOCH"][within_time_range_of_storm_A]
RBSP_A["MLT"] = RBSP_A["MLT"][within_time_range_of_storm_A]
RBSP_A["L"] = RBSP_A["L"][within_time_range_of_storm_A]
RBSP_A["CHORUS"] = RBSP_A["CHORUS"][within_time_range_of_storm_A]

RBSP_B["UNIX_TIME"] = RBSP_B["UNIX_TIME"][within_time_range_of_storm_B]
RBSP_B["EPOCH"] = RBSP_B["EPOCH"][within_time_range_of_storm_B]
RBSP_B["MLT"] = RBSP_B["MLT"][within_time_range_of_storm_B]
RBSP_B["L"] = RBSP_B["L"][within_time_range_of_storm_B]
RBSP_B["CHORUS"] = RBSP_B["CHORUS"][within_time_range_of_storm_B]

radius_of_points_rbsp_A = RBSP_A["L"]
x_rbsp_A = np.cos((RBSP_A["MLT"] * 2 * np.pi) / 24.0)
y_rbsp_A = np.sin((RBSP_A["MLT"] * 2 * np.pi) / 24.0)

radius_of_points_rbsp_B = RBSP_B["L"]
x_rbsp_B = np.cos((RBSP_B["MLT"] * 2 * np.pi) / 24.0)
y_rbsp_B = np.sin((RBSP_B["MLT"] * 2 * np.pi) / 24.0)

angles_of_points_rbsp_A = np.arctan2(y_rbsp_A, x_rbsp_A)
angles_of_points_rbsp_B = np.arctan2(y_rbsp_B, x_rbsp_B)

#LOOK AT PREPROCESSING CODE TO FIND CORRECT WAY TO DO ARCTAN HERE

within_time_range_of_storm_poes = (start_of_storm.timestamp() < np.array(POES_TIMES_OF_FEATURES)) & (np.array(POES_TIMES_OF_FEATURES) < end_of_storm.timestamp())

radius_of_points_poes = L_FEATURES_PREPROCESSING[within_time_range_of_storm_poes]
angles_of_points_poes = np.arctan2(MLT_FEATURES_POST_PROCESSING_1[within_time_range_of_storm_poes], MLT_FEATURES_POST_PROCESSING_2[within_time_range_of_storm_poes])
chorus_within_time_range = CHORUS_PREDICTED[within_time_range_of_storm_poes]

rbins = np.linspace(0, 8, 30)
abins = np.linspace(-np.pi, np.pi, 120)

A, R = np.meshgrid(abins, rbins)

average_chorus_rbsp_A = np.zeros_like(A)
average_chorus_rbsp_B = np.zeros_like(A)
average_chorus_model_predicted = np.zeros_like(A)


for r in range(len(rbins) - 1):
    for a in range(len(abins) - 1):
        
        average_chorus_rbsp_A[r, a] += np.nanmean(RBSP_A["CHORUS"][(rbins[r] <= radius_of_points_rbsp_A) & (radius_of_points_rbsp_A < rbins[r+1]) & (abins[a] < angles_of_points_rbsp_A) & (angles_of_points_rbsp_A < abins[a+1])])
        average_chorus_rbsp_B[r, a] += np.nanmean(RBSP_B["CHORUS"][(rbins[r] <= radius_of_points_rbsp_B) & (radius_of_points_rbsp_B < rbins[r+1]) & (abins[a] < angles_of_points_rbsp_B) & (angles_of_points_rbsp_B < abins[a+1])])
        average_chorus_model_predicted[r, a] += np.nanmean(chorus_within_time_range[(rbins[r] <= radius_of_points_poes) & (radius_of_points_poes < rbins[r+1]) & (abins[a] < angles_of_points_poes) & (angles_of_points_poes < abins[a+1])])

average_chorus_rbsp = np.nanmean([average_chorus_rbsp_A, average_chorus_rbsp_B], axis=0)

#pc = ax.pcolormesh(A, R, average_chorus_rbsp, norm=colors.LogNorm(vmin = 1, vmax = 100))
#cbar = plt.colorbar(pc)
#cbar.set_label("Chorus Bw (pT)\n\n", loc="center", labelpad=15, rotation=270)

pc = ax.pcolormesh(A, R, average_chorus_model_predicted, norm=colors.LogNorm(vmin = 1, vmax = 100))
cbar = plt.colorbar(pc)
cbar.set_label("Chorus Bw (pT)\n\n", loc="center", labelpad=15, rotation=270)

ax.set_xticklabels(['      MLT 0', "", 'MLT 6', "", 'MLT 12        ', "", 'MLT 18'])
ax.set_rlim(0)
#ax[1].set_xticklabels(['      MLT 0', "", 'MLT 6', "", 'MLT 12        ', "", 'MLT 18'])

#ax[0].set_title("Observed by RBSP\n")
#ax[1].set_title("Predicted by model")

plt.show()

  average_chorus_rbsp_A[r, a] += np.nanmean(RBSP_A["CHORUS"][(rbins[r] <= radius_of_points_rbsp_A) & (radius_of_points_rbsp_A < rbins[r+1]) & (abins[a] < angles_of_points_rbsp_A) & (angles_of_points_rbsp_A < abins[a+1])])
  average_chorus_rbsp_B[r, a] += np.nanmean(RBSP_B["CHORUS"][(rbins[r] <= radius_of_points_rbsp_B) & (radius_of_points_rbsp_B < rbins[r+1]) & (abins[a] < angles_of_points_rbsp_B) & (angles_of_points_rbsp_B < abins[a+1])])
  average_chorus_model_predicted[r, a] += np.nanmean(chorus_within_time_range[(rbins[r] <= radius_of_points_poes) & (radius_of_points_poes < rbins[r+1]) & (abins[a] < angles_of_points_poes) & (angles_of_points_poes < abins[a+1])])
  average_chorus_rbsp = np.nanmean([average_chorus_rbsp_A, average_chorus_rbsp_B], axis=0)
  ax.set_xticklabels(['      MLT 0', "", 'MLT 6', "", 'MLT 12        ', "", 'MLT 18'])
