In [72]:
import os

import matplotlib.pyplot as plt
import numpy as np
import sklearn
import pynndescent
import seaborn as sns
import numba

%matplotlib qt

In [73]:
VERSION = "v3"
FIELD_MODEL = "T89"
MODEL_TYPE = "LOWER_BAND"
pdata_folder = os.path.abspath(r"./../processed_data/chorus_neural_network/")
STAGE_4_folder = os.path.join(pdata_folder, "STAGE_4", VERSION)
CONJUNCTIONS_REFS = np.load(
    file=os.path.join(STAGE_4_folder, f"MODEL_READY_DATA_{VERSION}_{FIELD_MODEL}_{MODEL_TYPE}.npz")
)

X = CONJUNCTIONS_REFS["FEATURES"]
y = CONJUNCTIONS_REFS["LABELS"].flatten()
day = CONJUNCTIONS_REFS["TRAINING_DAY_IDS"].flatten()

MLT_train = CONJUNCTIONS_REFS["TRAINING_MLT"]
MEAN_L = CONJUNCTIONS_REFS["MEAN_L"]
STD_L = CONJUNCTIONS_REFS["STD_L"]


X_valid = CONJUNCTIONS_REFS["VALIDATION_FEATURES"]
y_valid = CONJUNCTIONS_REFS["VALIDATION_LABELS"].flatten()

CONJUNCTIONS_REFS.close()

print(f"Training set shape: {X.shape, y.shape}")
print(f"Validation set shape: {X_valid.shape, y_valid.shape}")

Training set shape: ((100405, 7), (100405,))
Validation set shape: ((1238, 7), (1238,))


In [74]:
print(f"Training set shape: {X.shape, y.shape}")
print(f"Validation set shape: {X_valid.shape, y_valid.shape}")

ax0 = sns.displot(y, log_scale=True)
ax0.set(ylabel='N', xlabel='Chorus (pT)', title='Training Set')
plt.tight_layout()

ax1 = sns.displot(y_valid, log_scale=True)
ax1.set(ylabel='N', xlabel='Chorus (pT)', title='Validation Set')
plt.tight_layout()

Training set shape: ((100405, 7), (100405,))
Validation set shape: ((1238, 7), (1238,))


In [75]:
standardized_X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
standardized_X_valid = (X_valid - np.mean(X, axis=0)) / np.std(X, axis=0)

In [76]:
%%time

knn = pynndescent.NNDescent(standardized_X, n_jobs=10)
knn.prepare()

CPU times: total: 23.6 s
Wall time: 6.61 s


In [42]:
%%time

validation_prediction = knn.query(standardized_X_valid, k=10)

CPU times: total: 31.2 ms
Wall time: 24.6 ms


In [43]:
def sigmoid_relevance(yp, k, c):

    return 1 / (1 + np.exp(-1 * k * (yp - c)))

In [70]:
predictions = []

for neighbors, distances in zip(validation_prediction[0], validation_prediction[1]):

    values_of_neighbors = y[neighbors]

    w = sigmoid_relevance(np.log10(values_of_neighbors), k=10, c=1.0)
    predictions.append(np.average(y[neighbors], weights=w))


In [71]:
plt.scatter(y_valid, predictions, s=2.0)
plt.plot(np.logspace(-2, 4), np.logspace(-2, 4), color="black")
plt.grid()
plt.xlabel("RBSP OBSERVED CHORUS")
plt.ylabel("MODEL PREDICTED CHORUS")
plt.title("VALIDATION SET")
plt.xlim(1e-1, 1e3)
plt.ylim(1e-1, 1e3)
plt.xscale("log")
plt.yscale("log")