## Original Publication

See the original publication of this work in the following link: https://www.swsc-journal.org/articles/swsc/abs/2021/01/swsc210024/swsc210024.html

Aminalragia-Giamini, Sigiava, et al. "Solar Energetic Particle Event occurrence prediction using Solar Flare Soft X-ray measurements and Machine Learning." Journal of Space Weather and Space Climate 11 (2021): 59.


# Loading Data 

## Importing libraries and data files

In [1]:
import os
import tensorflow as tf
import datetime
import sys
from os import path
import numpy as np
import pandas as pd
from tensorflow import keras 
import xgboost as xgb

x_train = pd.read_pickle("x_train.pkl")  
x_test = pd.read_pickle("x_test.pkl")  
y_train = pd.read_pickle("y_train.pkl")  
y_test = pd.read_pickle("y_test.pkl")  

# number of different classes
n_SEPS =  226
n_flares = 17875
n_sample = n_SEPS +n_flares
columns_to_use = ['0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','17','20','21','22','28','39','40',
                       '41','42','43','44','45','46','47','48']
number_of_classes = 2 

In [2]:
x_total = x_train.append(x_test)
y_total = y_train.append(y_test)

  x_total = x_train.append(x_test)
  y_total = y_train.append(y_test)


## Prepare validation

Establish validation method below

In [3]:
from sklearn.model_selection import LeaveOneOut
from sklearn.utils.class_weight import compute_sample_weight
from xgboost import cv

sample_weight = compute_sample_weight(class_weight='balanced',y=y_total)

In [4]:
d_train = xgb.DMatrix(x_total, label=y_total)

params={"objective" : "binary:logistic", 'learning_rate' :0.01, 'max_depth':20}

xgb_cv = cv(dtrain=d_train, params =params, nfold=50, metrics ="auc", num_boost_round=50,as_pandas =True)

In [5]:
xgb_cv

Unnamed: 0,train-auc-mean,train-auc-std,test-auc-mean,test-auc-std
0,0.893975,0.008823,0.865043,0.10824
1,0.895883,0.008165,0.870884,0.110656
2,0.897707,0.008441,0.870993,0.110608
3,0.899231,0.008645,0.870957,0.110688
4,0.900011,0.01021,0.870782,0.110735
5,0.901944,0.010644,0.874984,0.105207
6,0.90364,0.010396,0.877366,0.103711
7,0.905018,0.011112,0.878862,0.104663
8,0.908456,0.013688,0.884619,0.105666
9,0.91194,0.015478,0.884255,0.106524


## Example of Training proccedure


Let's now train our model

In [52]:
 bst=xgb.XGBClassifier(max_depth=10,learning_rate=0.01, n_estimators=50, objective='binary:logistic')
# fit model
model_used=bst.fit(x_total, y_total)

<bound method XGBModel.evals_result of XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, feature_types=None, gamma=0, gpu_id=-1,
              grow_policy='depthwise', importance_type=None,
              interaction_constraints='', learning_rate=0.01, max_bin=256,
              max_cat_threshold=64, max_cat_to_onehot=4, max_delta_step=0,
              max_depth=10, max_leaves=0, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=50, n_jobs=0,
              num_parallel_tree=1, predictor='auto', random_state=0, ...)>

# Visualziating results (simple metrics)

Let's see the results

In [54]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

predictions = model_used.predict(x_test)
predictions_binary = np.argmax(predictions, axis = 1)
y_test_binary = np.argmax(y_test, axis=1)
    
cr = classification_report(y_test_binary, predictions_binary)
cm = confusion_matrix(y_test_binary, predictions_binary)

print(cm)
print("Class 1 accuracy")
print(cm[0,0]/(cm[0,0]+cm[0,1]))

print("Class 2 accuracy")
print(cm[1,1]/(cm[1,0]+cm[1,1]))

#print(model_used.summary())
print(cr)

AxisError: axis 1 is out of bounds for array of dimension 1

# Visualziating results (Plots)

Let's see the results

In [30]:
bst.feature_importances_

array([0.010448  , 0.02207899, 0.01320809, 0.01584152, 0.03114783,
       0.43417573, 0.        , 0.        , 0.        , 0.        ,
       0.01752777, 0.02267887, 0.1015267 , 0.04768894, 0.02140178,
       0.01614504, 0.01399788, 0.        , 0.        , 0.01146915,
       0.01567686, 0.02016117, 0.03098541, 0.02174044, 0.0136462 ,
       0.03390979, 0.01562648, 0.01158983, 0.01026439, 0.02290859,
       0.02415452], dtype=float32)

In [25]:
import matplotlib.pyplot as plt

val_f1 = model_used.history['val_f1_m']
val_loss= model_used.history['val_loss']
val_acc= model_used.history['val_acc']
val_prec= model_used.history['val_precision_m']

train_f1 = model_used.history['f1_m']
train_loss = model_used.history['loss']
train_acc = model_used.history['acc']
train_prec = model_used.history['precision_m']

plt.figure(figsize=(11.69,8.27))
plt.subplot(2, 2, 1)
plt.plot(train_f1)
plt.plot(val_f1)
plt.ylabel('f1 score',fontsize=22)
plt.legend(['train', 'test'], loc='lower right',fontsize=16)

plt.subplot(2, 2, 2)
plt.plot(train_loss)
plt.plot(val_loss)
plt.ylabel('loss',fontsize=22)

plt.subplot(2, 2, 3)
plt.plot(train_acc)
plt.plot(val_acc)
plt.xlabel('epochs',fontsize=22)
plt.ylabel('acc',fontsize=22)

plt.subplot(2, 2, 4)
plt.plot(train_prec)
plt.plot(val_prec)
plt.xlabel('epochs',fontsize=22)
plt.ylabel('precision',fontsize=22)

plt.show()



AttributeError: 'XGBClassifier' object has no attribute 'history'