In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import tensorflow
from tensorflow import keras
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense, Flatten, Input, Dropout, LSTM
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Pre-processing

In [3]:
df = pd.read_csv(r"/content/drive/MyDrive/Nasa_Space_Apps/data.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,index,BGSM1,BGSM2,BGSM3,BGSE1,BGSE2,BGSE3,Range,Label
0,0,0,6.396383,-10.167744,-2.116706,6.396373,-9.142916,-4.936532,2.0,0.0
1,1,1,6.330517,-7.975523,-8.170177,6.330477,-5.355678,-10.08282,2.0,0.0
2,2,2,8.211111,-8.562181,1.172102,8.211116,-8.563263,-1.225468,2.0,0.0
3,3,3,9.740716,-5.876574,1.38982,9.740722,-6.025173,-0.228289,2.0,0.0
4,4,4,8.951678,-10.19767,0.271943,8.951679,-9.935565,-2.366858,2.0,0.0


In [4]:
df = df.drop(['index', 'Unnamed: 0'], axis = 1)

In [5]:
df.head()

Unnamed: 0,BGSM1,BGSM2,BGSM3,BGSE1,BGSE2,BGSE3,Range,Label
0,6.396383,-10.167744,-2.116706,6.396373,-9.142916,-4.936532,2.0,0.0
1,6.330517,-7.975523,-8.170177,6.330477,-5.355678,-10.08282,2.0,0.0
2,8.211111,-8.562181,1.172102,8.211116,-8.563263,-1.225468,2.0,0.0
3,9.740716,-5.876574,1.38982,9.740722,-6.025173,-0.228289,2.0,0.0
4,8.951678,-10.19767,0.271943,8.951679,-9.935565,-2.366858,2.0,0.0


In [6]:
df['Label'].value_counts()

0.0    31304
1.0     1001
2.0      347
Name: Label, dtype: int64

In [7]:
y = df[['Label', 'Range']]
X = df.drop(['Label', 'Range'], axis=1)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
y_train_range = y_train['Range']
y_train = y_train['Label']

y_test_range = y_test['Range']
y_test = y_test['Label']

In [10]:
y_train_range = pd.get_dummies(y_train_range).values
y_train = pd.get_dummies(y_train).values
y_test_range = pd.get_dummies(y_test_range).values
y_test = pd.get_dummies(y_test).values

In [11]:
X_train = np.array(X_train).reshape((X_train.shape[0],X_train.shape[1],1))
X_test = np.array(X_test).reshape((X_test.shape[0],X_test.shape[1],1))

## Model

In [12]:
input1 = Input(shape = (X_train.shape[1], 1))
x = LSTM(256, return_sequences=True)(input1)
x = Flatten()(x)
x = Dense(64)(x)
x = Dropout(0.5)(x)
x = Dense(32)(x)
output = Dense(3, activation='softmax', name='kp_type')(x)

model = Model(inputs=input1, outputs=output)

In [13]:
opt_adam = keras.optimizers.Adam(learning_rate=0.001)

model.compile(optimizer=opt_adam,
                  loss=['categorical_crossentropy'],
                  metrics=['accuracy'])

In [14]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
tensorflow.config.run_functions_eagerly(True)

In [15]:
model_history = model.fit(X_train, y_train,
         validation_data=(X_test, y_test),
         batch_size = 32,
         epochs = 15,
         callbacks=[es], verbose = 2)

Epoch 1/15


  "Even though the `tf.config.experimental_run_functions_eagerly` "


817/817 - 26s - loss: 0.1631 - accuracy: 0.9597 - val_loss: 0.1311 - val_accuracy: 0.9668 - 26s/epoch - 32ms/step
Epoch 2/15
817/817 - 21s - loss: 0.1268 - accuracy: 0.9648 - val_loss: 0.1191 - val_accuracy: 0.9677 - 21s/epoch - 26ms/step
Epoch 3/15
817/817 - 23s - loss: 0.1258 - accuracy: 0.9652 - val_loss: 0.1180 - val_accuracy: 0.9686 - 23s/epoch - 28ms/step
Epoch 4/15
817/817 - 31s - loss: 0.1233 - accuracy: 0.9666 - val_loss: 0.1204 - val_accuracy: 0.9677 - 31s/epoch - 38ms/step
Epoch 5/15
817/817 - 25s - loss: 0.1222 - accuracy: 0.9660 - val_loss: 0.1187 - val_accuracy: 0.9674 - 25s/epoch - 31ms/step
Epoch 6/15
817/817 - 27s - loss: 0.1194 - accuracy: 0.9665 - val_loss: 0.1181 - val_accuracy: 0.9688 - 27s/epoch - 33ms/step
Epoch 7/15
817/817 - 25s - loss: 0.1195 - accuracy: 0.9668 - val_loss: 0.1218 - val_accuracy: 0.9680 - 25s/epoch - 31ms/step
Epoch 8/15
817/817 - 23s - loss: 0.1170 - accuracy: 0.9669 - val_loss: 0.1160 - val_accuracy: 0.9678 - 23s/epoch - 28ms/step
Epoch 9/15


In [16]:
model.evaluate(X_test, y_test)



[0.11289668828248978, 0.9702954888343811]

In [17]:
model.predict(X_train[0].reshape(1, 6, 1))

array([[9.9589133e-01, 3.4771475e-03, 6.3145475e-04]], dtype=float32)

In [18]:
model.save("predication_model.h5")

In [19]:
y_pred = model.predict(X_test)
y_pred.shape

(6531, 3)

In [20]:
np.argmax(y_pred, axis = 1).shape

(6531,)

In [21]:
f1_score(np.argmax(y_test, axis = 1), np.argmax(y_pred, axis = 1), average='micro')

0.9702955137038738

In [25]:
from sklearn.metrics import confusion_matrix
confusion_matrix(np.argmax(y_test, axis = 1), np.argmax(y_pred, axis = 1))

array([[6252,   14,    0],
       [ 150,   39,    2],
       [  19,    9,   46]])