We use solar flare [data from the RHESSI Mission](https://www.kaggle.com/datasets/khsamaha/solar-flares-rhessi/data) to predict the energy of a flare given observed characteristics

### Preprocessing

In [1]:
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
import keras_tuner
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

Using TensorFlow backend


In [2]:
df = pd.read_csv('./solarflares_rhessi.csv')
df

Unnamed: 0,flare,start.date,start.time,peak,end,duration.s,peak.c/s,total.counts,energy.kev,x.pos.asec,y.pos.asec,radial,active.region.ar,flag.1,flag.2,flag.3,flag.4,flag.5
0,2021213,2002-02-12,21:29:56,21:33:38,21:41:48,712,136,167304.0,12-25,592,-358,692,0,A1,P1,,,
1,2021228,2002-02-12,21:44:08,21:45:06,21:48:56,288,7,9504.0,6-12,604,-341,694,9811,A1,P1,PE,Q1,
2,2021332,2002-02-13,00:53:24,00:54:54,00:57:00,216,15,11448.0,6-12,-310,375,487,9825,A1,P1,,,
3,2021308,2002-02-13,04:22:52,04:23:50,04:26:56,244,20,17400.0,12-25,-277,378,469,9822,A1,P1,,,
4,2021310,2002-02-13,07:03:52,07:05:14,07:07:48,236,336,313392.0,25-50,-272,390,476,9825,A1,GS,P1,PE,Q2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116138,18020903,2018-02-09,16:41:28,16:42:54,16:43:32,124,18,2888.0,6-12,-345,-38,347,2699,A0,DF,P1,PE,Q2
116139,18020904,2018-02-09,18:15:56,18:17:26,18:17:40,104,16,1656.0,6-12,-268,-38,271,2699,A0,DF,P1,PE,Q2
116140,18021001,2018-02-10,13:04:36,13:06:46,13:07:04,148,15,2224.0,6-12,-115,-38,121,2699,A0,DF,P1,PE,Q2
116141,18022601,2018-02-26,15:49:56,15:51:18,15:53:52,236,16,3312.0,6-12,115,192,223,2700,A0,DF,P1,PE,Q2


In [3]:
df.columns

Index(['flare', 'start.date', 'start.time', 'peak', 'end', 'duration.s',
       'peak.c/s', 'total.counts', 'energy.kev', 'x.pos.asec', 'y.pos.asec',
       'radial', 'active.region.ar', 'flag.1', 'flag.2', 'flag.3', 'flag.4',
       'flag.5'],
      dtype='object')

In [4]:
df = df[[
    'duration.s',
    'peak.c/s',
    'total.counts',
    'energy.kev', 
    'x.pos.asec', 'y.pos.asec',
    'radial',
    'active.region.ar'
]]
NUM_FEATURES = len(df.columns) - 1

In [5]:
X = df.drop('energy.kev', axis=1).values

In [6]:
encoder = LabelEncoder()
encoder.fit(df['energy.kev'])
y = encoder.transform(df['energy.kev'])
y = to_categorical(y)
NUM_CLASSES = len(encoder.classes_)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## Train and optimise the DNN

In [8]:
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Input(shape=(NUM_FEATURES,)))
    model.add(
        layers.Dense(
            units=hp.Int("units", min_value=32, max_value=512, step=32),
            activation=hp.Choice("activation", ["relu", "tanh"]),
        )
    )
    if hp.Boolean("dropout"):
        model.add(layers.Dropout(rate=0.25))
    model.add(layers.Dense(NUM_CLASSES, activation="softmax"))

    learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

tuner = keras_tuner.Hyperband(
    hypermodel=build_model,
    objective="val_accuracy",
    factor=3,
    directory="tuning",
    project_name="solar_flare",
    max_epochs=10
)
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=10,
    restore_best_weights=True,
    start_from_epoch=5
)
tuner.search(X_train, y_train, epochs=50, validation_split=0.2, callbacks=[early_stopping])

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"""Optimal number of units in the first densely-connected layer:{best_hps.get('units')}\n
      Optimal learning rate for the optimizer: {best_hps.get('lr')}.""")

Reloading Tuner from tuning/solar_flare/tuner0.json
Optimal number of units in the first densely-connected layer:320

      Optimal learning rate for the optimizer: 0.0001809681061508976.


In [9]:
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, epochs=100, validation_split=0.2, callbacks=[early_stopping])



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100


In [10]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test loss: {test_loss}')
print(f'Test accuracy: {test_accuracy}')

Test loss: 476.5716857910156
Test accuracy: 0.8096345067024231
