In [1]:

import matplotlib.colors
import matplotlib.pyplot as plt
import numpy as np
import xgboost as xgb

%matplotlib qt

In [None]:
VERSION = "v1b"
FIELD_MODEL = "T89"
MODEL_TYPE = "LOWER_BAND"

CONJUNCTIONS_REFS = np.load(
    f"./../chorus_neural_network/STAGE_4/{VERSION}/MODEL_READY_DATA_{VERSION}_{FIELD_MODEL}_{MODEL_TYPE}.npz"
)

TRAINING_FEATURES = CONJUNCTIONS_REFS["FEATURES"]
TRAINING_LABELS = CONJUNCTIONS_REFS["LABELS"].flatten()
MEAN_L = CONJUNCTIONS_REFS[""]

TRAINING_MLT = CONJUNCTIONS_REFS["TRAINING_MLT"]

VALIDATION_FEATURES = CONJUNCTIONS_REFS["VALIDATION_FEATURES"]
VALIDATION_LABELS = CONJUNCTIONS_REFS["VALIDATION_LABELS"].flatten()

# BINS = CONJUNCTIONS_REFS["BINS"]

CONJUNCTIONS_REFS.close()

print(f"Training set shape: {TRAINING_FEATURES.shape, TRAINING_LABELS.shape}")
print(f"Validation set shape: {VALIDATION_FEATURES.shape, VALIDATION_LABELS.shape}")

Training set shape: ((865758, 10), (865758,))
Validation set shape: ((16887, 10), (16887,))


In [3]:
# Find weights

MEAN_CHORUS = np.nanmean(TRAINING_LABELS)
STD_CHORUS = np.nanstd(TRAINING_LABELS)

SAMPLE_WEIGHTS_FROM_CHORUS_DISTRIBUTION = np.abs((TRAINING_LABELS - MEAN_CHORUS) / STD_CHORUS) + 1
SAMPLE_WEIGHTS_FROM_CHORUS_DISTRIBUTION[SAMPLE_WEIGHTS_FROM_CHORUS_DISTRIBUTION > 3] = 3

order_of_labels = np.argsort(TRAINING_LABELS.flatten())


plt.ylabel("Weights of dataset")
plt.xlabel("Chorus Amplitude (pT)")
plt.plot(
    TRAINING_LABELS[order_of_labels],
    SAMPLE_WEIGHTS_FROM_CHORUS_DISTRIBUTION[order_of_labels].flatten(),
    label="SAMPLE_WEIGHTS",
)

plt.legend()
plt.show()

In [4]:
training_set = xgb.DMatrix(TRAINING_FEATURES, TRAINING_LABELS)
validation_set = xgb.DMatrix(VALIDATION_FEATURES, VALIDATION_LABELS)

In [5]:
# Define custom learning rate schedule
def custom_learning_rate(current_iter):
    base_learning_rate = 1.0

    lr = base_learning_rate * np.power(0.5, np.floor(current_iter / 50))
    return lr

In [10]:
params = {
    "colsample_bynode": 0.8,
    "colsample_bytree": 0.8,
    "learning_rate": 0.5,
    "max_depth": 15,
    "objective": "reg:squarederror",
    "subsample": 0.5,
    "gamma": 0.5,
    "lambda": 1000.0,
    "tree_method": "hist",
    "device": "cuda",
    "nthread": 10,
}

evals = [(training_set, "train"), (validation_set, "validation")]

n = 1000

lr_scheduler = xgb.callback.LearningRateScheduler(custom_learning_rate)

"""results = xgb.cv(
   params = params,
   nfold = 5,
   dtrain = training_set,
   num_boost_round = n,
   verbose_eval=1,
   seed = random.randint(0, int(1e5)),
   early_stopping_rounds=100,
   shuffle = True,
   metrics = ["mape", "rmse"],
   callbacks=[lr_scheduler])"""

model = xgb.train(
    params=params,
    dtrain=training_set,
    num_boost_round=n,
    evals=evals,
    verbose_eval=100,
    callbacks=[lr_scheduler],
)


# model.save_model(f"./../processed_data_chorus_neural_network/TRAINED_MODELS/Weighted_L2/XG_BOOSTED_REGRESSION_MSE_WEIGHTED_LINEAR_WEIGHTING.model")

[0]	train-rmse:9.80898	validation-rmse:11.35851
[100]	train-rmse:5.06888	validation-rmse:11.97424
[200]	train-rmse:4.59592	validation-rmse:11.98886
[300]	train-rmse:4.49362	validation-rmse:11.99425
[400]	train-rmse:4.46865	validation-rmse:11.99486
[500]	train-rmse:4.46249	validation-rmse:11.99507
[600]	train-rmse:4.46092	validation-rmse:11.99509
[700]	train-rmse:4.46055	validation-rmse:11.99510
[800]	train-rmse:4.46045	validation-rmse:11.99510
[900]	train-rmse:4.46043	validation-rmse:11.99511
[999]	train-rmse:4.46042	validation-rmse:11.99511


In [13]:
model.save_model(
    f"./../chorus_neural_network/TRAINED_MODELS/WEIGHTED_CHORUS_WITH_MEDIAN_INSTEAD_OF_MEAN.model"
)



In [14]:
# model = xgb.Booster({'nthread': 4})  # init model
# model.load_model(f"./../processed_data_chorus_neural_network/TRAINED_MODELS/XG_BOOSTED_REGRESSION.model")  # load model data

validation_pred = model.predict(validation_set)
training_pred = model.predict(training_set)

In [18]:
mean_L = np.nanmean(TRAINING_FEATURES[:, 0])
std_L = np.std(TRAINING_FEATURES[:, 0])

print(mean_L)

plt.scatter(TRAINING_LABELS, training_pred, s=0.8, c=(TRAINING_FEATURES[:, 0] * std_L) + mean_L)
plt.plot(np.logspace(-2, 4), np.logspace(-2, 4), color="black")
plt.colorbar()
plt.grid()

plt.xlim(1e-1, 1e3)
plt.ylim(1e-1, 1e3)
plt.xlabel("RBSP OBSERVED CHORUS")
plt.ylabel("MODEL PREDICTED CHORUS")
plt.title("TRAINING SET")
plt.xscale("log")
plt.yscale("log")

-1.8305282975604355e-16


In [None]:
print(np.min(TRAINING_FEATURES[:, 0]))
print(np.max(TRAINING_FEATURES[:, 0]))

print(np.nanmean((training_pred.flatten() - TRAINING_LABELS.flatten()) ** 2))

plt.scatter(
    TRAINING_FEATURES[:, 0],
    np.abs((training_pred.flatten() - TRAINING_LABELS.flatten()) / TRAINING_LABELS.flatten()) * 100,
    s=0.8,
    c=TRAINING_LABELS,
    norm=matplotlib.colors.LogNorm(),
)
plt.colorbar()

plt.grid()
plt.yscale("log")

In [60]:
plt.scatter(VALIDATION_LABELS, validation_pred, s=0.8, c=VALIDATION_FEATURES[:, 0])
plt.plot(np.logspace(-2, 4), np.logspace(-2, 4), color="black")
plt.grid()
plt.colorbar()
plt.xlim(1e-1, 1e3)
plt.ylim(1e-1, 1e3)
plt.xscale("log")
plt.yscale("log")

In [61]:
print(model.get_score(importance_type="gain"))
print(model.get_score(importance_type="weight"))

{'f0': 139.73208618164062, 'f1': 205.9730682373047, 'f2': 209.689453125, 'f3': 176.5742950439453, 'f4': 194.00253295898438, 'f5': 235.54627990722656, 'f6': 241.77963256835938, 'f7': 208.5730743408203, 'f8': 240.8218994140625, 'f9': 230.89112854003906}
{'f0': 101372.0, 'f1': 66591.0, 'f2': 60473.0, 'f3': 61993.0, 'f4': 64411.0, 'f5': 56883.0, 'f6': 55257.0, 'f7': 55568.0, 'f8': 54463.0, 'f9': 45974.0}


In [None]:
fig, ax = plt.subplots(2, 2, subplot_kw=dict(projection="polar"))

ax[0][0].set_xlim(xmin=0, xmax=2 * np.pi)
ax[0][0].set_ylim(ymin=0, ymax=7)

ax[0][1].set_xlim(xmin=0, xmax=2 * np.pi)
ax[0][1].set_ylim(ymin=0, ymax=7)

ax[1][0].set_xlim(xmin=0, xmax=2 * np.pi)
ax[1][0].set_ylim(ymin=0, ymax=7)

ax[1][1].set_xlim(xmin=0, xmax=2 * np.pi)
ax[1][1].set_ylim(ymin=0, ymax=7)

radius_of_points_training = (
    TRAINING_FEATURES[:, 0] * 1.345 + 4.1
)  # <---------------------------------------------
angles_of_points_training = np.arctan2(TRAINING_FEATURES[:, 1], TRAINING_FEATURES[:, 2])

radius_of_points_validation = (
    VALIDATION_FEATURES[:, 0] * 1.345 + 4.1
)  # <---------------------------------------------------------
angles_of_points_validation = np.arctan2(VALIDATION_FEATURES[:, 1], VALIDATION_FEATURES[:, 2])

rbins = np.linspace(0, 8, 30)
abins = np.linspace(-1 * np.pi, np.pi, 60)

A, R = np.meshgrid(abins, rbins)

average_chorus_pred_training = np.zeros_like(A)
average_chorus_pred_validation = np.zeros_like(A)
average_chorus_real_training = np.zeros_like(A)
average_chorus_real_validation = np.zeros_like(A)

for r in range(len(rbins) - 1):
    for a in range(len(abins) - 1):

        average_chorus_pred_training[r, a] += np.nanmean(
            training_pred[
                (rbins[r] <= radius_of_points_training) & (radius_of_points_training < rbins[r + 1]) & (abins[a] < angles_of_points_training) & (angles_of_points_training < abins[a + 1])
            ]
        )
        average_chorus_real_training[r, a] += np.nanmean(
            TRAINING_LABELS[
                (rbins[r] <= radius_of_points_training) & (radius_of_points_training < rbins[r + 1]) & (abins[a] < angles_of_points_training) & (angles_of_points_training < abins[a + 1])
            ]
        )
        average_chorus_pred_validation[r, a] += np.nanmean(
            validation_pred[
                (rbins[r] <= radius_of_points_validation) & (radius_of_points_validation < rbins[r + 1]) & (abins[a] < angles_of_points_validation) & (angles_of_points_validation < abins[a + 1])
            ]
        )
        average_chorus_real_validation[r, a] += np.nanmean(
            VALIDATION_LABELS[
                (rbins[r] <= radius_of_points_validation) & (radius_of_points_validation < rbins[r + 1]) & (abins[a] < angles_of_points_validation) & (angles_of_points_validation < abins[a + 1])
            ]
        )


pc = ax[0][0].pcolormesh(
    A, R, average_chorus_pred_training, norm=matplotlib.colors.LogNorm(vmin=1, vmax=100)
)
plt.colorbar(pc)

ax[0][0].set_xticklabels(["MLT 0", "", "MLT 6", "", "MLT 12", "", "MLT 18"])
ax[0][1].set_xticklabels(["MLT 0", "", "MLT 6", "", "MLT 12", "", "MLT 18"])
ax[1][0].set_xticklabels(["MLT 0", "", "MLT 6", "", "MLT 12", "", "MLT 18"])
ax[1][1].set_xticklabels(["MLT 0", "", "MLT 6", "", "MLT 12", "", "MLT 18"])


pc = ax[0][1].pcolormesh(
    A, R, average_chorus_pred_validation, norm=matplotlib.colors.LogNorm(vmin=1, vmax=100)
)
plt.colorbar(pc)

pc = ax[1][0].pcolormesh(
    A, R, average_chorus_real_training, norm=matplotlib.colors.LogNorm(vmin=1, vmax=100)
)
plt.colorbar(pc)

pc = ax[1][1].pcolormesh(
    A, R, average_chorus_real_validation, norm=matplotlib.colors.LogNorm(vmin=1, vmax=100)
)
plt.colorbar(pc)

ax[0][0].set_title("Predicted on Training Set")
ax[0][1].set_title("Predicted on Validation Set")
ax[1][0].set_title("Labels of Training Set")
ax[1][1].set_title("Labels of Validation Set")

plt.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(2, 1)