# Solar flares 

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from pmdarima.arima import auto_arima
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math
import tensorflow as tf
from tensorflow import keras
import seaborn as sns
from pylab import rcParams
from matplotlib import rc
from sklearn.model_selection import cross_val_score

In [4]:
def load_data(filepath_train,filepath_valid,filepath_test):
    
    train = pd.read_csv(filepath_train)
    valid = pd.read_csv(filepath_valid)
    test = pd.read_csv(filepath_test)
    #setting timestamp as index
    train = train.set_index('timestamp')
    valid = valid.set_index('timestamp')
    test = test.set_index('timestamp')
    #replacing 'negative' & 'positive' label values with 0 & 1
    train = train.replace({'label' : { 'Negative' : 0, 'Positive' : 1}})
    valid = valid.replace({'label' : { 'Negative' : 0, 'Positive' : 1}})
    test = test.replace({'label' : { 'Negative' : 0, 'Positive' : 1}})
    #converting label type to int
    train['label'] = train['label'].astype('int')
    valid['label'] = valid['label'].astype('int')
    test['label'] = test['label'].astype('int')
    
    return (train,valid,test)

In [5]:
#class M flares
filepath_train ='~/Downloads/M/normalized_training.csv'
filepath_valid = '~/Downloads/M/normalized_validation.csv'
filepath_test= '~/Downloads/M/normalized_testing.csv'
M_train, M_valid, M_test = load_data(filepath_train,filepath_valid,filepath_test)

In [6]:
#class M5 flares
filepath_train ='~/Downloads/M5/normalized_training.csv'
filepath_valid = '~/Downloads/M5/normalized_validation.csv'
filepath_test= '~/Downloads/M5/normalized_testing.csv'
M5_train, M5_valid, M5_test = load_data(filepath_train,filepath_valid,filepath_test)

In [7]:
#C class flares
filepath_train ='~/Downloads/C/normalized_training.csv'
filepath_valid = '~/Downloads/C/normalized_validation.csv'
filepath_test= '~/Downloads/C/normalized_testing.csv'
C_train, C_valid, C_test = load_data(filepath_train,filepath_valid,filepath_test)

## ARIMA

In [40]:
def optimal_pqd(y_train):
    model_autoARIMA = auto_arima(y_train, start_p=0, start_q=0,      
                      test='adf'       ,# using adftest to find optimal 'd'
                      max_p=5, max_q=5,# maximum p and q
                      m=1,              # frequency of series
                      d=None,           # let model determine 'd'
                      seasonal=False,   # No Seasonality
                      start_P=0, 
                      D=1, 
                      trace=True,
                      error_action='ignore',  
                      suppress_warnings=True, 
                      stepwise=True)

    return model_autoARIMA

In [41]:
#result_m = optimal_pqd(M_train['label'])
#result_m5 = optimal_pqd(M5_train['label'])
result_c = optimal_pqd(C_train['label'])

Performing stepwise search to minimize aic
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=110396.646, Time=2.37 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=-131682.357, Time=1.38 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=20353.221, Time=5.67 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=-131680.499, Time=2.98 sec
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=-131680.500, Time=2.86 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=-131678.390, Time=17.28 sec
 ARIMA(1,0,0)(0,0,0)[0] intercept   : AIC=-132024.302, Time=4.73 sec
 ARIMA(0,0,0)(0,0,0)[0] intercept   : AIC=89820.507, Time=10.77 sec
 ARIMA(2,0,0)(0,0,0)[0] intercept   : AIC=-132024.738, Time=8.42 sec
 ARIMA(3,0,0)(0,0,0)[0] intercept   : AIC=-132022.858, Time=11.97 sec
 ARIMA(2,0,1)(0,0,0)[0] intercept   : AIC=-132029.763, Time=79.58 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=-132024.729, Time=31.88 sec
 ARIMA(3,0,1)(0,0,0)[0] intercept   : AIC=-132021.713, Time=103.45 sec
 ARIMA(2,0,2)(0,0,0)[0] intercept   : AIC=-132021.213, Tim

In [74]:
# Defining ARIMA model
def arima(x_train, y_train,train_set,valid_set,test_set,x_test,x_valid,order): 
    pred=[]
    prediction_1=[]
    model1=ARIMA(endog=y_train,exog=x_train,order=order)
    results=model1.fit()
    #prediction for validation set
    predictions = results.predict(start=len(train_set), end=len(train_set)+len(valid_set)-1 ,exog = x_valid)
    #prediction for test
    test_pred_1= results.predict(start=len(train_set), end=len(train_set)+len(test_set)-1 ,exog = x_test)
    
    #validation data
    for i in predictions:
        if i<=0:
            pred.append(0)
        else:
            pred.append(1)
    #test data
    for i in test_pred_1:
        if i<=0:
            prediction_1.append(0)
        else:
            prediction_1.append(1)
    
    return (pred, prediction_1)
   

In [6]:
# M class
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score

pred, prediction_1 = arima(M_train.loc[:,'TOTUSJH':'MEANGBT'], M_train['label'],M_train,M_valid,
                          M_test,M_test.loc[:,'TOTUSJH':'MEANGBT'],M_valid.loc[:,'TOTUSJH':'MEANGBT'],[3,0,0]) 
#validation set
print ('Accuracy:', accuracy_score(M_valid['label'], pred))
print ('F1 score:', f1_score(M_valid['label'], pred))
print ('Recall:', recall_score(M_valid['label'], pred))
print ('Precision:', precision_score(M_valid['label'], pred))
print('-------------------------------\n')
#test set
print ('Test Accuracy:', accuracy_score(M_test['label'], prediction_1))
print ('Test F1 score:', f1_score(M_test['label'], prediction_1))
print ('Test Recall:', recall_score(M_test['label'], prediction_1))
print ('Test Precision:', precision_score(M_test['label'], prediction_1))



Accuracy: 0.43704906886261474
F1 score: 0.13420089467263116
Recall: 0.8574610244988864
Precision: 0.07279717635194756
-------------------------------

Test Accuracy: 0.5201951263174383
Test F1 score: 0.0938979039891819
Test Recall: 0.8693270735524257
Test Precision: 0.049629232556061824


In [11]:
# M5 class
pred, prediction_1 = arima(M5_train.loc[:,'TOTUSJH':'MEANGBT'], M5_train['label'],M5_train,M5_valid,
                          M5_test,M5_test.loc[:,'TOTUSJH':'MEANGBT'],M5_valid.loc[:,'TOTUSJH':'MEANGBT'],[1,0,0]) 
#validation set
print ('Accuracy:', accuracy_score(M5_valid['label'], pred))
print ('F1 score:', f1_score(M5_valid['label'], pred))
print ('Recall:', recall_score(M5_valid['label'], pred))
print ('Precision:', precision_score(M5_valid['label'], pred))
print('-------------------------------\n')
#test set
print ('Test Accuracy:', accuracy_score(M5_test['label'], prediction_1))
print ('Test F1 score:', f1_score(M5_test['label'], prediction_1))
print ('Test Recall:', recall_score(M5_test['label'], prediction_1))
print ('Test Precision:', precision_score(M5_test['label'], prediction_1))



Accuracy: 0.5645374532542591
F1 score: 0.04316069057104914
Recall: 0.8904109589041096
Precision: 0.022116366110922082
-------------------------------

Test Accuracy: 0.6907740159770861
Test F1 score: 0.019859564508121142
Test Recall: 0.7777777777777778
Test Precision: 0.010058193835764064


In [None]:
# C class
pred, prediction_1 = arima(C_train.loc[:,'TOTUSJH':'MEANGBT'], C_train['label'],C_train,C_valid,
                          C_test,C_test.loc[:,'TOTUSJH':'MEANGBT'],C_valid.loc[:,'TOTUSJH':'MEANGBT'],[2,0,2]) 
#validation set
print ('Accuracy:', accuracy_score(C_valid['label'], pred))
print ('F1 score:', f1_score(C_valid['label'], pred))
print ('Recall:', recall_score(C_valid['label'], pred))
print ('Precision:', precision_score(C_valid['label'], pred))
print('-------------------------------\n')
#test set
print ('Test Accuracy:', accuracy_score(C_test['label'], prediction_1))
print ('Test F1 score:', f1_score(C_test['label'], prediction_1))
print ('Test Recall:', recall_score(C_test['label'], prediction_1))
print ('Test Precision:', precision_score(C_test['label'], prediction_1))



## AR model

In [20]:
from math import sqrt
from statsmodels.tsa.ar_model import AutoReg

def auto_reg(y_train,x_train,train_set,valid_set,test_set,x_test,x_valid):
    pred_2=[]
    prediction_2=[]
   # train autoregression
    model2 = AutoReg(endog=y_train,exog=x_train,lags=1).fit()
    # make validation predictions
    prediction = model2.predict(start=len(train_set),end=len(train_set)+len(valid_set)-1,
                            exog_oos= x_valid,dynamic=False)
    #test predictions
    test_pred_2= model2.predict(start=len(train_set),end=len(train_set)+len(test_set)-1,
                                exog_oos= x_test,dynamic=False)
    
   #validation
    for i in prediction:
        if i <= 0.05:
            pred_2.append(0)
        else:
            pred_2.append(1)
    #test
    for i in test_pred_2:
        if i <= 0.05:
            prediction_2.append(0)
        else:
            prediction_2.append(1)
    return (pred_2, prediction_2)

In [22]:
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
# M class
pred_2,prediction_2 = auto_reg( M_train['label'],M_train.loc[:,'TOTUSJH':'MEANGBT'],M_train,M_valid,
                          M_test,M_test.loc[:,'TOTUSJH':'MEANGBT'],M_valid.loc[:,'TOTUSJH':'MEANGBT'])
print ('Accuracy:', accuracy_score(M_valid['label'], pred_2))
print ('F1 score:', f1_score(M_valid['label'], pred_2))
print ('Recall:', recall_score(M_valid['label'], pred_2))
print ('Precision:', precision_score(M_valid['label'], pred_2))
print('-------------------------------\n')
#test set
print ('Test Accuracy:', accuracy_score(M_test['label'], prediction_2))
print ('Test F1 score:', f1_score(M_test['label'], prediction_2))
print ('Test Recall:', recall_score(M_test['label'], prediction_2))
print ('Test Precision:', precision_score(M_test['label'], prediction_2))



Accuracy: 0.6826955766252408
F1 score: 0.19786096256684488
Recall: 0.7691165553080921
Precision: 0.11353424657534246
-------------------------------

Test Accuracy: 0.7920964890688984
Test F1 score: 0.1708165997322624
Test Recall: 0.7488262910798122
Test Precision: 0.09640374735569658


In [21]:
# M5 class
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score

pred_2,prediction_2 = auto_reg( M5_train['label'],M5_train.loc[:,'TOTUSJH':'MEANGBT'],M5_train,M_valid,
                          M5_test,M5_test.loc[:,'TOTUSJH':'MEANGBT'],M5_valid.loc[:,'TOTUSJH':'MEANGBT'])
print ('Accuracy:', accuracy_score(M5_valid['label'], pred_2))
print ('F1 score:', f1_score(M5_valid['label'], pred_2))
print ('Recall:', recall_score(M5_valid['label'], pred_2))
print ('Precision:', precision_score(M5_valid['label'], pred_2))
print('-------------------------------\n')
#test set
print ('Test Accuracy:', accuracy_score(M5_test['label'], prediction_2))
print ('Test F1 score:', f1_score(M5_test['label'], prediction_2))
print ('Test Recall:', recall_score(M5_test['label'], prediction_2))
print ('Test Precision:', precision_score(M5_test['label'], prediction_2))



Accuracy: 0.8857326332489707
F1 score: 0.12344248044045204
Recall: 0.7294520547945206
Precision: 0.06742640075973409
-------------------------------

Test Accuracy: 0.9328470093311553
Test F1 score: 0.0671433012123096
Test Recall: 0.6
Test Precision: 0.035561409285479094


In [23]:
# C class
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
# M class
pred_2,prediction_2 = auto_reg(C_train['label'],C_train.loc[:,'TOTUSJH':'MEANGBT'],C_train,C_valid,
                          C_test,C_test.loc[:,'TOTUSJH':'MEANGBT'],C_valid.loc[:,'TOTUSJH':'MEANGBT'])

print ('Accuracy:', accuracy_score(C_valid['label'], pred_2))
print ('F1 score:', f1_score(C_valid['label'], pred_2))
print ('Recall:', recall_score(C_valid['label'], pred_2))
print ('Precision:', precision_score(C_valid['label'], pred_2))
print('-------------------------------\n')
#test set
print ('Test Accuracy:', accuracy_score(C_test['label'], prediction_2))
print ('Test F1 score:', f1_score(C_test['label'], prediction_2))
print ('Test Recall:', recall_score(C_test['label'], prediction_2))
print ('Test Precision:', precision_score(C_test['label'], prediction_2))



Accuracy: 0.36429569750311636
F1 score: 0.43728892901327443
Recall: 0.9268603827072998
Precision: 0.28614563276737265
-------------------------------

Test Accuracy: 0.3132090671082369
Test F1 score: 0.3410340089316386
Test Recall: 0.9095281722400367
Test Precision: 0.20986153683542966


## VAR

In [14]:
from statsmodels.tsa.api import VAR

def order(train):
    model3 = VAR(train[list(train.loc[:,'TOTUSJH':'MEANGBT'])+['label']])
    x = model3.select_order(maxlags=35)
    return x.summary()

In [15]:
x1 = order(M_train)
print('For M-class solar flares\n',x1)
x2 = order(M5_train)
print('For M5-class\n',x2)
x3 = order(C_train)
print('For C-class\n',x3)



For M-class solar flares
  VAR Order Selection (* highlights the minimums)  
       AIC         BIC         FPE         HQIC   
--------------------------------------------------
0       -93.80      -93.79   1.840e-41      -93.80
1       -155.7      -155.6   2.461e-68      -155.7
2       -156.1      -156.0   1.629e-68      -156.0
3       -156.3      -156.1   1.282e-68      -156.3
4       -156.5      -156.2   1.084e-68      -156.4
5       -156.6     -156.3*   9.976e-69      -156.5
6       -156.6      -156.2   9.537e-69      -156.5
7       -156.7      -156.2   9.009e-69      -156.5
8       -156.7      -156.2   8.596e-69      -156.6
9       -156.8      -156.2   8.230e-69      -156.6
10      -156.8      -156.2   7.985e-69      -156.6
11      -156.8      -156.1   7.743e-69      -156.6
12      -156.9      -156.1   7.446e-69      -156.6
13      -156.9      -156.1   7.246e-69      -156.6
14      -156.9      -156.0   7.094e-69      -156.6
15      -156.9      -156.0   6.882e-69      -156.7
16   



For M5-class
  VAR Order Selection (* highlights the minimums)  
       AIC         BIC         FPE         HQIC   
--------------------------------------------------
0       -90.67      -90.67   4.174e-40      -90.67
1       -150.0      -150.0   6.885e-66      -150.0
2       -150.4      -150.3   4.729e-66      -150.4
3       -150.6      -150.4   3.840e-66      -150.6
4       -150.8      -150.6   3.260e-66      -150.7
5       -150.9     -150.6*   3.000e-66      -150.8
6       -150.9      -150.6   2.866e-66      -150.8
7       -151.0      -150.6   2.707e-66      -150.8
8       -151.0      -150.6   2.582e-66      -150.9
9       -151.1      -150.5   2.470e-66      -150.9
10      -151.1      -150.5   2.395e-66      -150.9
11      -151.1      -150.5   2.337e-66      -150.9
12      -151.2      -150.5   2.259e-66      -150.9
13      -151.2      -150.4   2.204e-66      -150.9
14      -151.2      -150.4   2.160e-66      -150.9
15      -151.2      -150.4   2.093e-66      -151.0
16      -151.3   



For C-class
  VAR Order Selection (* highlights the minimums)  
       AIC         BIC         FPE         HQIC   
--------------------------------------------------
0       -76.64      -76.64   5.195e-34      -76.64
1       -142.6      -142.5   1.228e-62      -142.5
2       -143.0      -142.8   8.146e-63      -142.9
3       -143.2      -143.0   6.523e-63      -143.1
4       -143.3      -143.1   5.623e-63      -143.3
5       -143.4     -143.1*   5.262e-63      -143.3
6       -143.4      -143.1   5.083e-63      -143.3
7       -143.5      -143.0   4.853e-63      -143.3
8       -143.5      -143.0   4.690e-63      -143.4
9       -143.5      -143.0   4.596e-63      -143.4
10      -143.6      -142.9   4.506e-63      -143.4
11      -143.6      -142.9   4.454e-63      -143.4
12      -143.6      -142.8   4.391e-63      -143.3
13      -143.6      -142.8   4.337e-63      -143.3
14      -143.6      -142.7   4.299e-63      -143.3
15      -143.6      -142.7   4.264e-63      -143.3
16      -143.6    

In [30]:
def vector_ar(train,valid,test,lags):
    pred_3=[]
    predictions_3=[]
    
    model3 = VAR(train[list(train.loc[:,'TOTUSJH':'MEANGBT'])+['label']])
    model_fitted = model3.fit(lags)
    model_fitted.summary()
    # Get the lag order
    lag_order = model_fitted.k_ar
    print(f'Lag order {lag_order}\n')

    # Input data for forecasting
    forecast_input =valid[list(valid.loc[:,'TOTUSJH':'MEANGBT'])+['label']].values[-lag_order:]
    test_forecast_input = test[list(test.loc[:,'TOTUSJH':'MEANGBT'])+['label']].values[-lag_order:]
    
    # Forecast on validation set
    fc = model_fitted.forecast(y=forecast_input, steps=len(valid))
    df_forecast = pd.DataFrame(fc, index=valid.index[0:len(valid)])
    df_forecast= df_forecast.rename(columns={0:'label'})
    
    # Test Forecast
    fc = model_fitted.forecast(y=test_forecast_input, steps=len(test))
    test_forecast = pd.DataFrame(fc, index=test.index[0:len(test)])
    test_forecast= test_forecast.rename(columns={0:'label'})
    
    #predictions for validation set

    for i in df_forecast['label'].values:
        if i<=0.212:
            pred_3.append(0)
        else:
            pred_3.append(1)
    
    #predictions for test set
    
    for i in test_forecast['label'].values:
        if i<=0.212:
            predictions_3.append(0)
        else:
            predictions_3.append(1)
    
    return (pred_3, predictions_3) 

In [25]:
#M-class
pred_3, predictions_3= vector_ar(M_train,M_valid,M_test,35)
#validation data
print ('Accuracy:', accuracy_score(M_valid['label'], pred_3))
print ('F1 score:', f1_score(M_valid['label'], pred_3))
print ('Recall:', recall_score(M_valid['label'], pred_3))
print ('Precision:', precision_score(M_valid['label'], pred_3))
print('-------------------------------\n')
#test data
print ('Test Accuracy:', accuracy_score(M_test['label'], predictions_3))
print ('Test F1 score:', f1_score(M_test['label'], predictions_3))
print ('Test Recall:', recall_score(M_test['label'], predictions_3))
print ('Test Precision:', precision_score(M_test['label'], predictions_3))



Lag order 35

Accuracy: 0.9376723454085294
F1 score: 0.04513888888888889
Recall: 0.028953229398663696
Precision: 0.10236220472440945
-------------------------------

Test Accuracy: 0.9663004318736155
Test F1 score: 0.02207792207792208
Test Recall: 0.013302034428794992
Test Precision: 0.0648854961832061


In [31]:
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
#M5-class
pred_3, predict_3= vector_ar(M5_train,M5_valid,M5_test,34)
#validation data
print ('Accuracy:', accuracy_score(M5_valid['label'], pred_3))
print ('F1 score:', f1_score(M5_valid['label'], pred_3))
print ('Recall:', recall_score(M5_valid['label'], pred_3))
print ('Precision:', precision_score(M5_valid['label'], pred_3))
print('-------------------------------\n')
#test data
print ('Test Accuracy:', accuracy_score(M5_test['label'], predict_3))
print ('Test F1 score:', f1_score(M5_test['label'], predict_3))
print ('Test Recall:', recall_score(M5_test['label'], predict_3))
print ('Test Precision:', precision_score(M5_test['label'], predict_3))



Lag order 34

Accuracy: 0.9723491859630568
F1 score: 0.07341772151898733
Recall: 0.09931506849315068
Precision: 0.05823293172690763
-------------------------------

Test Accuracy: 0.9882968963279555
Test F1 score: 0.0
Test Recall: 0.0
Test Precision: 0.0


In [23]:
#C-class
pred_3, predictions_3= vector_ar(C_train,C_valid,C_test,30)
#validation data
print ('Accuracy:', accuracy_score(C_valid['label'], pred_3))
print ('F1 score:', f1_score(C_valid['label'], pred_3))
print ('Recall:', recall_score(C_valid['label'], pred_3))
print ('Precision:', precision_score(C_valid['label'], pred_3))
print('-------------------------------\n')
#test data
print ('Test Accuracy:', accuracy_score(C_test['label'], predictions_3))
print ('Test F1 score:', f1_score(C_test['label'], predictions_3))
print ('Test Recall:', recall_score(C_test['label'], predictions_3))
print ('Test Precision:', precision_score(C_test['label'], predictions_3))



Lag order 30

Accuracy: 0.7268537755448948
F1 score: 0.030307094005632293
Recall: 0.016017009213323885
Precision: 0.2810945273631841
-------------------------------

Test Accuracy: 0.8023450961086621
Test F1 score: 0.017354544443208365
Test Recall: 0.008932661475034357
Test Precision: 0.3035019455252918


## Gaussian Process Classifier

In [None]:
import sklearn.gaussian_process as gp
kernel =  gp.kernels.ConstantKernel(2.0) * gp.kernels.RBF(0.1)

model4 = gp.GaussianProcessClassifier(kernel=kernel).fit(M5_train.loc[:,'TOTUSJH':'MEANGBT'], M5_train['label'])
params = model4.kernel_.get_params()

In [None]:
y_pred = model4.predict_proba(M_valid.loc[:,'TOTUSJH':'MEANGBT'])

## LSTM

In [8]:
def convert_categorical(train,valid,test):
    train['label'] = train['label'].astype('category',copy=False)
    valid['label'] = valid['label'].astype('category',copy=False)
    test['label'] = test['label'].astype('category',copy=False)
    return (train['label'],valid['label'],test['label'])

In [9]:
#M class
M_train['label'],M_valid['label'],M_test['label'] = convert_categorical(M_train,M_valid,M_test)
#M5 class
M5_train['label'],M5_valid['label'],M5_test['label'] = convert_categorical(M5_train,M5_valid,M5_test)
#C class
C_train['label'],C_valid['label'],C_test['label'] = convert_categorical(C_train,C_valid,C_test)

In [64]:
def split_data(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)


In [65]:
time_steps = 40

# reshape to [samples, time_steps, n_features]
X_train_M, y_train_M = split_data(M_train.loc[:,'TOTUSJH':'MEANGBT'], M_train['label'], time_steps)
X_valid_M, y_valid_M = split_data(M_valid.loc[:,'TOTUSJH':'MEANGBT'], M_valid['label'], time_steps)
X_test_M, y_test_M = split_data(M_test.loc[:,'TOTUSJH':'MEANGBT'], M_test['label'], time_steps)

# M5 class
X_train_M5, y_train_M5 = split_data(M5_train.loc[:,'TOTUSJH':'MEANGBT'], M5_train['label'], time_steps)
X_valid_M5, y_valid_M5 = split_data(M5_valid.loc[:,'TOTUSJH':'MEANGBT'], M5_valid['label'], time_steps)
X_test_M5, y_test_M5 = split_data(M5_test.loc[:,'TOTUSJH':'MEANGBT'], M5_test['label'], time_steps)

# C class
X_train_C, y_train_C = split_data(C_train.loc[:,'TOTUSJH':'MEANGBT'], C_train['label'], time_steps)
X_valid_C, y_valid_C = split_data(C_valid.loc[:,'TOTUSJH':'MEANGBT'], C_valid['label'], time_steps)
X_test_C, y_test_C = split_data(C_test.loc[:,'TOTUSJH':'MEANGBT'], C_test['label'], time_steps)

In [49]:
RANDOM_SEED = 40

np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

def lstm(X_train,y_train,X_valid,X_test):
    
    pred_5=[]
    prediction_5=[]
    
    model5 = keras.Sequential()
    #LSTM
    model5.add(keras.layers.LSTM(
      units=10,
      input_shape=(X_train.shape[1], X_train.shape[2])
        ))
    #Dense Layer
    model5.add(keras.layers.Dense(units=200,activation='relu'))
    model5.add(keras.layers.Dropout(0.5))
    model5.add(keras.layers.Dense(units=2,activation='softmax'))
    model5.compile(
      loss='sparse_categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
    history = model5.fit(X_train, y_train,
            epochs=10,batch_size=256,verbose=1,shuffle=False)
    y_pred = model5.predict(X_valid)
    y_pred5 = model5.predict(X_test)
    
    #validation prediction
    
    for i in y_pred:
        if i[0] > i[1]:
            pred_5.append(0)
        else:
            pred_5.append(1)
    #for test data
    for i in y_pred5:
        if i[0] > i[1]:
            prediction_5.append(0)
        else:
            prediction_5.append(1)
    
    return (pred_5,prediction_5)

In [52]:
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
#M class
pred_5, prediction_5 = lstm(X_train_M,y_train_M,X_valid_M,X_test_M)
#validation set
print ('\nAccuracy:', accuracy_score(y_valid_M, pred_5))
print ('F1 score:', f1_score(y_valid_M, pred_5))
print ('Recall:', recall_score(y_valid_M, pred_5))
print ('Precision:', precision_score(y_valid_M, pred_5))
print('-------------------------------\n')
#test set
print ('Test Accuracy:', accuracy_score(y_test_M, prediction_5))
print ('Test F1 score:', f1_score(y_test_M, prediction_5))
print ('Test Recall:', recall_score(y_test_M, prediction_5))
print ('Test Precision:', precision_score(y_test_M, prediction_5))

Train on 84537 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.9532024363485038
F1 score: 0.3640102827763496
Recall: 0.26280623608017817
Precision: 0.5919732441471572
-------------------------------

Test Accuracy: 0.9754977715066406
Test F1 score: 0.40608034744842564
Test Recall: 0.29264475743348983
Test Precision: 0.6631205673758865


In [53]:
#M5 class
pred_5, preidction_5 = lstm(X_train_M5,y_train_M5,X_valid_M5,X_test_M5)
#validation set
print ('\nAccuracy:', accuracy_score(y_valid_M5, pred_5))
print ('F1 score:', f1_score(y_valid_M5, pred_5))
print ('Recall:', recall_score(y_valid_M5, pred_5))
print ('Precision:', precision_score(y_valid_M5, pred_5))
print('-------------------------------\n')
#test set
print ('Test Accuracy:', accuracy_score(y_test_M5, prediction_5))
print ('Test F1 score:', f1_score(y_test_M5, prediction_5))
print ('Test Recall:', recall_score(y_test_M5, prediction_5))
print ('Test Precision:', precision_score(y_test_M5, prediction_5))

Train on 84537 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Accuracy: 0.9889532024363485
F1 score: 0.0
Recall: 0.0
Precision: 0.0
-------------------------------

Test Accuracy: 0.9866962305986696
Test F1 score: 0.20161290322580647
Test Recall: 0.4166666666666667
Test Precision: 0.13297872340425532


  _warn_prf(average, modifier, msg_start, len(result))


In [54]:
#C class
pred_5, preidction_5 = lstm(X_train_C,y_train_C,X_valid_C,X_test_C)
#validation set
print ('\nAccuracy:', accuracy_score(y_valid_C, pred_5))
print ('F1 score:', f1_score(y_valid_C, pred_5))
print ('Recall:', recall_score(y_valid_C, pred_5))
print ('Precision:', precision_score(y_valid_C, pred_5))
print('-------------------------------\n')
#test set
print ('Test Accuracy:', accuracy_score(y_test_C, prediction_5))
print ('Test F1 score:', f1_score(y_test_C, prediction_5))
print ('Test Recall:', recall_score(y_test_C, prediction_5))
print ('Test Precision:', precision_score(y_test_C, prediction_5))

Train on 84537 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Accuracy: 0.8077781560927628
F1 score: 0.5921168820743358
Recall: 0.5227498228206945
Precision: 0.6827101073676416
-------------------------------

Test Accuracy: 0.816166095545253
Test F1 score: 0.11398963730569948
Test Recall: 0.060689655172413794
Test Precision: 0.9361702127659575


## Bidirectional LSTM

In [67]:
def bi_lstm(X_train,y_train,X_valid,X_test,n_features=23):
    n_steps=40
    
    pred_6=[]
    predictions_test=[]
    # Bidirectional LSTM model
    model6 =keras.Sequential()
    model6.add(keras.layers.Bidirectional(keras.layers.LSTM(20), merge_mode='concat', input_shape=(n_steps, n_features)))
    model6.add(keras.layers.Dense(units=200,activation='relu'))
    model6.add(keras.layers.Dropout(0.5))
    model6.add(keras.layers.Dense(units=2,activation='softmax'))
    model6.compile(loss='sparse_categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
    
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], n_features))
    # fit model
    model6.fit(X_train, y_train, epochs=15, verbose=0)
    # demonstrate prediction
    X_test = X_test.reshape((len(X_test), n_steps, n_features))
    #prediction for validation data
    y_hat = model6.predict(X_valid, verbose=0)
    #prediction on test set 
    y_pred6 = model6.predict(X_test, verbose=0)
    
    #predictions for validation data

    for i in y_hat:
        if i[0] > i[1]:
            pred_6.append(0)
        else:
            pred_6.append(1)
    #test predictions

    for i in y_pred6:
        if i[0] > i[1]:
            predictions_test.append(0)
        else:
            predictions_test.append(1)
            
    return (pred_6,predictions_test)

In [20]:
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
#getting predictions for train and test set
pred_6, predictions_test = bi_lstm(X_train_M,y_train_M,X_valid_M,X_test_M)

print ('\nAccuracy:', accuracy_score(y_valid_M, pred_6))
print ('F1 score:', f1_score(y_valid_M, pred_6))
print ('Recall:', recall_score(y_valid_M, pred_6))
print ('Precision:', precision_score(y_valid_M, pred_6))
print('--------------------------\n')
print ('Test Accuracy:', accuracy_score(y_test_M, predictions_test))
print ('Test F1 score:', f1_score(y_test_M, predictions_test))
print ('Test Recall:', recall_score(y_test_M, predictions_test))
print ('Test Precision:', precision_score(y_test_M, predictions_test))


Accuracy: 0.9435175727310559
F1 score: 0.19860440150295222
Recall: 0.13734224201930215
Precision: 0.35852713178294576
--------------------------

Test Accuracy: 0.9622611928598624
Test F1 score: 0.18084589207583857
Test Recall: 0.14553990610328638
Test Precision: 0.23876765083440307


In [68]:
pred_6, predictions_test = bi_lstm(X_train_M5,y_train_M5,X_valid_M5,X_test_M5,n_features=22)

print ('\nAccuracy:', accuracy_score(y_valid_M5, pred_6))
print ('F1 score:', f1_score(y_valid_M5, pred_6))
print ('Recall:', recall_score(y_valid_M5, pred_6))
print ('Precision:', precision_score(y_valid_M5, pred_6))
print('--------------------------\n')
print ('Test Accuracy:', accuracy_score(y_test_M5, predictions_test))
print ('Test F1 score:', f1_score(y_test_M5, predictions_test))
print ('Test Recall:', recall_score(y_test_M5, predictions_test))
print ('Test Precision:', precision_score(y_test_M5, predictions_test))


Accuracy: 0.9846025801081981
F1 score: 0.16427104722792607
Recall: 0.136986301369863
Precision: 0.20512820512820512
--------------------------

Test Accuracy: 0.9912875988264015
Test F1 score: 0.010178117048346057
Test Recall: 0.011111111111111112
Test Precision: 0.009389671361502348


In [24]:
pred_6, predictions_test = bi_lstm(X_train_C,y_train_C,X_valid_C,X_test_C)
print ('\nAccuracy:', accuracy_score(y_valid_C, pred_6))
print ('F1 score:', f1_score(y_valid_C, pred_6))
print ('Recall:', recall_score(y_valid_C, pred_6))
print ('Precision:', precision_score(y_valid_C, pred_6))
print('--------------------------\n')
print ('Test Accuracy:', accuracy_score(y_test_C, predictions_test))
print ('Test F1 score:', f1_score(y_test_C, predictions_test))
print ('Test Recall:', recall_score(y_test_C, predictions_test))
print ('Test Precision:', precision_score(y_test_C, predictions_test))


Accuracy: 0.764120606817236
F1 score: 0.5437916148386623
Recall: 0.526718639262934
Precision: 0.5620084694494858
--------------------------

Test Accuracy: 0.8125601917176196
Test F1 score: 0.49471714061462296
Test Recall: 0.4709195402298851
Test Precision: 0.5210479460765611


## Cascaded bidirectional & unidirectional LSTM model

In [71]:
# Cascaded bidirectional and unidirectional model
def cascaded_lstm(X_train,y_train,X_valid,X_test,n_features=23):
    n_steps=40
    pred_7=[]
    pred_test7=[]
    
    model7 =keras.Sequential()
    model7.add(keras.layers.Bidirectional(keras.layers.LSTM(20, input_shape=(X_train.shape[1],X_train.shape[2]),return_sequences=True),merge_mode='concat'))
    model7.add(keras.layers.LSTM(10,input_shape=(X_train.shape[1],X_test.shape[2])))
    model7.add(keras.layers.Dense(units=200,activation='relu'))
    model7.add(keras.layers.Dropout(0.5))
    model7.add(keras.layers.Dense(units=2,activation='softmax'))
    model7.compile(loss='sparse_categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
    
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], n_features))
    # fit model
    model7.fit(X_train, y_train, epochs=15, verbose=0)
    # demonstrate prediction
    X_test = X_test.reshape((len(X_test), n_steps, n_features))
    yhat = model7.predict(X_valid, verbose=0)
    #predicting for test set 
    y_pred7 = model7.predict(X_test, verbose=0)
    #predictions for validation data
    for i in yhat:
        if i[0] > i[1]:
            pred_7.append(0)
        else:
            pred_7.append(1)
    #predictions for test data
    for i in y_pred7:
        if i[0] > i[1]:
            pred_test7.append(0)
        else:
            pred_test7.append(1)
    return (pred_7, pred_test7)

In [29]:
#M class flares
pred_7, pred_test7 = cascaded_lstm(X_train_M,y_train_M,X_valid_M,X_test_M)
print ('\nAccuracy:', accuracy_score(y_valid_M, pred_7))
print ('F1 score:', f1_score(y_valid_M, pred_7))
print ('Recall:', recall_score(y_valid_M, pred_7))
print ('Precision:', precision_score(y_valid_M, pred_7))
print('--------------------------\n')
print ('Test Accuracy:', accuracy_score(y_test_M, pred_test7))
print ('Test F1 score:', f1_score(y_test_M, pred_test7))
print ('Test Recall:', recall_score(y_test_M, pred_test7))
print ('Test Precision:', precision_score(y_test_M, pred_test7))


Accuracy: 0.945371316157833
F1 score: 0.37597234226447707
Recall: 0.32293986636971045
Precision: 0.4498448810754912
--------------------------

Test Accuracy: 0.9610293623597393
Test F1 score: 0.25321888412017174
Test Recall: 0.23082942097026604
Test Precision: 0.2804182509505703


In [72]:
#M5 class flares
pred_7, pred_test7 = cascaded_lstm(X_train_M5,y_train_M5,X_valid_M5,X_test_M5,n_features=22)

print ('\nAccuracy:', accuracy_score(y_valid_M5, pred_7))
print ('F1 score:', f1_score(y_valid_M5, pred_7))
print ('Recall:', recall_score(y_valid_M5, pred_7))
print ('Precision:', precision_score(y_valid_M5, pred_7))
print('--------------------------\n')
print ('Test Accuracy:', accuracy_score(y_test_M5, pred_test7))
print ('Test F1 score:', f1_score(y_test_M5, pred_test7))
print ('Test Recall:', recall_score(y_test_M5, pred_test7))
print ('Test Precision:', precision_score(y_test_M5, pred_test7))


Accuracy: 0.9862671660424469
F1 score: 0.1768707482993197
Recall: 0.13356164383561644
Precision: 0.26174496644295303
--------------------------

Test Accuracy: 0.9920938878810276
Test F1 score: 0.005633802816901408
Test Recall: 0.005555555555555556
Test Precision: 0.005714285714285714


In [31]:
#C class flares
pred_7, pred_test7 = cascaded_lstm(X_train_C,y_train_C,X_valid_C,X_test_C)
print ('\nAccuracy:', accuracy_score(y_valid_C, pred_7))
print ('F1 score:', f1_score(y_valid_C, pred_7))
print ('Recall:', recall_score(y_valid_C, pred_7))
print ('Precision:', precision_score(y_valid_C, pred_7))
print('--------------------------\n')
print ('Test Accuracy:', accuracy_score(y_test_C, pred_test7))
print ('Test F1 score:', f1_score(y_test_C, pred_test7))
print ('Test Recall:', recall_score(y_test_C, pred_test7))
print ('Test Precision:', precision_score(y_test_C, pred_test7))


Accuracy: 0.7714599175273332
F1 score: 0.5725606736007924
Recall: 0.5734939759036145
Precision: 0.571630404068946
--------------------------

Test Accuracy: 0.8051020179623284
Test F1 score: 0.5016036655211913
Test Recall: 0.5033333333333333
Test Precision: 0.49988584474885844


## Table of comparision

## M-class

Models             | Accuracy     | Precision | Recall | F1-Score
-----------------: |-------------:|----------:|-------:|---------:
ARIMA              | 0.520 | 0.049 |  0.869 | 0.093  
LSTM               | 0.976 | 0.663 | 0.343 | __*0.452*__
AR                 | 0.792 | 0.096 | 0.748 | 0.171
VAR                | 0.966 | 0.065 | 0.013 | 0.022
Bidirectional LSTM | 0.967 | 0.337 | 0.144 | 0.203
Cascaded bidirectional and unidirectional based LSTM | 0.967 | 0.308 | 0.129 | 0.182

## M5 class

Models             | Accuracy     | Precision | Recall | F1-Score
-----------------: |-------------:|----------:|-------:|---------:
ARIMA              | 0.691 | 0.022 |  0.890 | 0.020  
LSTM               | 0.987 | 0.133 | 0.417 | __*0.202*__
AR                 | 0.933 | 0.036 | 0.6 | 0.067
VAR                | 0.988 | 0.0 | 0.0 | 0.0
Bidirectional LSTM | 0.992 | 0.009 | 0.011 | 0.010
Cascaded bidirectional and unidirectional based LSTM | 0.992 | 0.005 | 0.005 | 0.006

## C class

Models             | Accuracy     | Precision | Recall | F1-Score
-----------------: |-------------:|----------:|-------:|---------:
ARIMA              | .. | .. |  .. | ..  
LSTM               | 0.816 | 0.936 | 0.061 | 0.114
AR                 | 0.313 | 0.209 | 0.909 | 0.341
VAR                | 0.802 | 0.303 | 0.008 | 0.017
Bidirectional LSTM | 0.813 | 0.521 | 0.471 | 0.494
Cascaded bidirectional and unidirectional based LSTM | 0.805 | 0.499 | 0.503 | __*0.501*__