In [1]:
from sklearnex import patch_sklearn

patch_sklearn()

import os
import sys
import warnings

# caution: path[0] is reserved for script path (or '' in REPL).
sys.path.insert(1, os.path.abspath("./../src"))

import datetime

import matplotlib.colors
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
import seaborn as sns
import sklearn
import tqdm
from skopt import BayesSearchCV
from skopt.space import Categorical, Integer, Real

import chorus_machine_learning_helper

plt.rcdefaults()
warnings.filterwarnings("ignore")

%matplotlib qt

Intel(R) Extension for Scikit-learn* enabled (https://github.com/uxlfoundation/scikit-learn-intelex)


In [2]:
%%time

pdata_folder = os.path.abspath(r"./../processed_data/chorus_neural_network/")
input_folder = os.path.join(pdata_folder, "STAGE_1", "DENSITY_AND_CHORUS")

DENSITY = []
L = []
MLT = []
MLAT = []
TIME = []

OMNI_TIME = []
AVG_B = []
PROTON_DENSITY = []
FLOW_SPEED = []
SYM_H = []

SUPERMAG_TIME = []
SME = []


for _year in range(2012, 2020):

    print(f"Loading year : {_year}")

    print(f"Loading RBSP DATA : {_year}")
    
    refs = np.load(
        file=os.path.join(input_folder, f"RBSP_EMFISIS_CHORUS_AND_DENSITY_{_year}.npz")
    )

    RBSP_A = {}
    RBSP_A["UNIX_TIME"] = refs["UNIX_TIME_A"]
    RBSP_A["MLT"] = refs["MLT_A"]
    RBSP_A["MLAT"] = refs["MAGLAT_A"]
    RBSP_A["L"] = refs["L_A"]
    RBSP_A["DENSITY"] = refs["DENSITY_A"]

    RBSP_B = {}
    RBSP_B["UNIX_TIME"] = refs["UNIX_TIME_B"]
    RBSP_B["MLT"] = refs["MLT_B"]
    RBSP_B["MLAT"] = refs["MAGLAT_B"]
    RBSP_B["L"] = refs["L_B"]
    RBSP_B["DENSITY"] = refs["DENSITY_B"]

    RBSP = [RBSP_A, RBSP_B]

    refs.close()

    print(f"Removing NANS in RBSP for Year :{_year}")

    for PROBE in RBSP:

        not_nan = np.isfinite(PROBE["UNIX_TIME"]) & np.isfinite(PROBE["MLT"]) & np.isfinite(PROBE["MLAT"]) & np.isfinite(PROBE["L"]) & np.isfinite(PROBE["DENSITY"])
        PROBE["UNIX_TIME"] = PROBE["UNIX_TIME"][not_nan]
        PROBE["MLT"] = PROBE["MLT"][not_nan]
        PROBE["MLAT"] = PROBE["MLAT"][not_nan]
        PROBE["L"] = PROBE["L"][not_nan]
        PROBE["DENSITY"] = PROBE["DENSITY"][not_nan]
        
    
    start_averaging = datetime.datetime(year=_year, month=1, day=1, tzinfo=datetime.UTC).timestamp()
    end_averaging = datetime.datetime(year=_year + 1, month=1, day=1, tzinfo=datetime.UTC).timestamp()
    evenly_spaced_minutes = np.arange(start_averaging, end_averaging, step=60)

    for PROBE in RBSP:

        T_MAPPING = np.digitize(PROBE["UNIX_TIME"], evenly_spaced_minutes, right=False) - 1
        
        CUM_DENSITY = np.zeros_like(evenly_spaced_minutes)
        CUM_L = np.zeros_like(evenly_spaced_minutes)
        CUM_MLT_X = np.zeros_like(evenly_spaced_minutes)
        CUM_MLT_Y = np.zeros_like(evenly_spaced_minutes)
        CUM_MLAT = np.zeros_like(evenly_spaced_minutes)
        NUM_POINTS = np.zeros_like(evenly_spaced_minutes)

        for T in tqdm.tqdm(range(len(PROBE["UNIX_TIME"]))):
    
            x_bin = T_MAPPING[T]
            
            CUM_DENSITY[x_bin] += PROBE["DENSITY"][T]
            
            CUM_L[x_bin] += PROBE["L"][T]
    
            X_A = np.cos((PROBE["MLT"][T] * 2 * np.pi) / 24)
            Y_A = np.sin((PROBE["MLT"][T] * 2 * np.pi) / 24)
            
            CUM_MLT_X[x_bin] += X_A
            CUM_MLT_Y[x_bin] += Y_A
                        
            CUM_MLAT[x_bin] = PROBE["MLAT"][T]

            NUM_POINTS += 1

        DENSITY.append(CUM_DENSITY / NUM_POINTS)
        L.append(CUM_L / NUM_POINTS)

        AVG_MLT_X = CUM_MLT_X / NUM_POINTS
        AVG_MLT_Y = CUM_MLT_Y / NUM_POINTS
        
        ANGLE_IN_RADIANS = np.mod(np.arctan2(AVG_MLT_X, AVG_MLT_Y) + 2 * np.pi, 2 * np.pi)
        
        MLT.append((ANGLE_IN_RADIANS * 24.0) / (2 * np.pi))
        MLAT.append(CUM_MLAT / NUM_POINTS)
        TIME.append(evenly_spaced_minutes)

    OMNI_REFS = chorus_machine_learning_helper.load_OMNI_year(_year)
    SUPERMAG_REFS = chorus_machine_learning_helper.load_SUPERMAG_SME_year(_year)

    OMNI_TIME.append(OMNI_REFS["UNIX_TIME"])
    AVG_B.append(OMNI_REFS["AVG_B"])
    PROTON_DENSITY.append(OMNI_REFS["PROTON_DENSITY"])
    FLOW_SPEED.append(OMNI_REFS["FLOW_SPEED"])
    SYM_H.append(OMNI_REFS["SYM_H"])

    SUPERMAG_TIME.append(SUPERMAG_REFS["UNIX_TIME"])
    SME.append(SUPERMAG_REFS["SME"])

DENSITY = np.hstack(DENSITY)
L = np.hstack(L)
MLT = np.hstack(MLT)
MLAT = np.hstack(MLAT)
TIME = np.hstack(TIME)

OMNI_TIME = np.hstack(OMNI_TIME)
AVG_B = np.hstack(AVG_B)
PROTON_DENSITY = np.hstack(PROTON_DENSITY)
FLOW_SPEED = np.hstack(FLOW_SPEED)
SYM_H = np.hstack(SYM_H)

SUPERMAG_TIME = np.hstack(SUPERMAG_TIME)
SME = np.hstack(SME)

print(DENSITY.shape)
print(L.shape)
print(MLT.shape)
print(MLAT.shape)
print(TIME.shape)

print(OMNI_TIME.shape)
print(AVG_B.shape)
print(PROTON_DENSITY.shape)
print(FLOW_SPEED.shape)
print(SYM_H.shape)

print(SUPERMAG_TIME.shape)
print(SME.shape)

Loading year : 2012
Loading RBSP DATA : 2012
Removing NANS in RBSP for Year :2012


  5%|███▊                                                                    | 90563/1736637 [01:09<21:05, 1301.18it/s]


KeyboardInterrupt: 

In [3]:
order = np.argsort(TIME)
DENSITY = DENSITY[order]
L = L[order]
MLT = MLT[order]
MLAT = MLAT[order]
TIME = TIME[order]

AVG_B_INTERP = np.interp(x=TIME, xp=OMNI_TIME, fp=AVG_B, left=np.nan, right=np.nan)

PROTON_DENSITY_INTERP = np.interp(x=TIME, xp=OMNI_TIME, fp=PROTON_DENSITY, left=np.nan, right=np.nan)

FLOW_SPEED_INTERP = np.interp(x=TIME, xp=OMNI_TIME, fp=FLOW_SPEED, left=np.nan, right=np.nan)

SYM_H_INTERP = np.interp(x=TIME, xp=OMNI_TIME, fp=SYM_H, left=np.nan, right=np.nan)

SME_INTERP = np.interp(x=TIME, xp=SUPERMAG_TIME, fp=SME, left=np.nan, right=np.nan)

print(AVG_B_INTERP.shape)
print(PROTON_DENSITY_INTERP.shape)
print(FLOW_SPEED_INTERP.shape)
print(SYM_H_INTERP.shape)
print(SME_INTERP.shape)

TypeError: only integer scalar arrays can be converted to a scalar index

In [None]:
%% time
FEATURES = np.vstack([

    np.expand_dims(L, axis=1),
    np.expand_dims(MLT, axis=1),
    np.expand_dims(MLAT, axis=1),
    np.expand_dims(AVG_B_INTERP, axis=1),
    np.expand_dims(PROTON_DENSITY_INTERP, axis=1),
    np.expand_dims(FLOW_SPEED_INTERP, axis=1),
    np.expand_dims(SYM_H_INTERP, axis=1),
    np.expand_dims(SME_INTERP, axis=1),
])

LABELS = DENSITY

shuffled_indices = [i for i in range(len(DENSITY))]
np.random.shuffle(shuffled_indices)

FEATURES = FEATURES[shuffled_indices, :]
LABEL = LABELS[shuffled_indices]

In [None]:
print(FEATURES.shape, LABELS.shape)

In [None]:
ax0 = sns.displot(LABELS, log_scale=True)
ax0.set(ylabel='N', xlabel='DENSITY (cm^-3)', title='Training Set')
plt.tight_layout()

In [None]:
regr = sklearn.ensemble.RandomForestRegressor(max_depth=5,
                                              n_estimators=10)

regr.fit(FEATURES, LABELS)