In [1]:
import os
import numpy as np
import pandas as pd
from datetime import datetime

In [2]:
ace = pd.DataFrame()
for name in os.listdir('proc_data/total')[:-1]:
    ins_data = pd.read_csv(f'proc_data/total/{name}', index_col=0, parse_dates=['date'])
    ace = pd.concat([ace, ins_data], axis=0)

problem = pd.read_csv("proc_data/problem.csv", index_col=0, parse_dates=['date'])
problem.head()

Unnamed: 0_level_0,Np,Tp,Vp,B_gsm_x,B_gsm_y,B_gsm_z,Bmag
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
01-01 00:00,1.225355,32956.035503,443.036509,5.142349,1.988692,-12.914,14.14332
01-01 03:00,1.613686,55713.597041,431.723491,3.574822,-2.570586,-9.271053,11.855373
01-01 06:00,1.191851,80571.958333,432.390536,4.361542,-5.262113,-7.125196,10.517149
01-01 09:00,1.1,149231.295858,428.213609,3.533574,-6.503805,4.220485,9.589148
01-01 12:00,1.1,77718.39645,413.764024,6.511308,-6.137467,-0.664426,9.313183


In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping

print(tf.__version__, (tf.test.gpu_device_name() if tf.test.is_gpu_available() else None))

from sklearn.model_selection import train_test_split

2.0.0-rc0 /device:GPU:0


In [4]:
ace_x, ace_y = ace.iloc[:, :-1].values, \
    keras.utils.to_categorical(ace.target.values, 10)
problem_x = tf.constant(problem.values[:, :, np.newaxis])

x_train, x_test, y_train, y_test = \
    train_test_split(ace_x, ace_y, test_size=0.2, random_state=1, shuffle=False)
x_train, x_val, y_train, y_val = \
    train_test_split(x_train, y_train, test_size=0.2, random_state=1)

ace_x, ace_y, x_train, y_train, x_val, y_val, x_test, y_test = \
    tf.constant(ace_x[:, :, np.newaxis]), tf.constant(ace_y), \
    tf.constant(x_train[:, :, np.newaxis]), tf.constant(y_train), \
    tf.constant(x_val[:, :, np.newaxis]), tf.constant(y_val), \
    tf.constant(x_test[:, :, np.newaxis]), tf.constant(y_test)

print(ace_x.shape, ace_y.shape, problem_x.shape)
print(x_train.shape, x_val.shape, x_test.shape)

(40912, 7, 1) (40912, 10) (2920, 7, 1)
(26183, 7, 1) (6546, 7, 1) (8183, 7, 1)


In [7]:
optim = keras.optimizers.Adam()
loss_fn = keras.losses.MeanSquaredError()

model = keras.models.Sequential()
model.add(layers.GRU(64, input_shape=(7, 1)))
model.add(layers.BatchNormalization())
model.add(layers.Dense(48, activation='tanh'))
model.add(layers.Dense(10, activation='linear'))
model.compile(optimizer=optim, loss=loss_fn, metrics=['acc'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_1 (GRU)                  (None, 64)                12864     
_________________________________________________________________
batch_normalization_1 (Batch (None, 64)                256       
_________________________________________________________________
dense_2 (Dense)              (None, 48)                3120      
_________________________________________________________________
dense_3 (Dense)              (None, 10)                490       
Total params: 16,730
Trainable params: 16,602
Non-trainable params: 128
_________________________________________________________________


In [6]:
early_stop = EarlyStopping(monitor='loss', patience=2)

hist = model.fit(x_train, y_train, batch_size=16, epochs=100, callbacks=[early_stop], validation_data=(x_val, y_val))
hist.params

Train on 26183 samples, validate on 6546 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
 2480/26183 [=>............................] - ETA: 11s - loss: 0.0702 - acc: 0.4101

KeyboardInterrupt: 

In [53]:
pred = model.predict_classes(problem_x)
loss, acc = model.evaluate(x_test, y_test, verbose=0)

timestamp = datetime.now().strftime('%Y-%m%d_%H%M')
pred = pd.DataFrame(pred.reshape(365, 8), index=range(1, 366), columns=[f"kp_{_}h" for _ in range(0, 24, 3)])
pred.to_csv(f"models/gru{timestamp}(loss{round(float(loss), 3)}_acc{round(float(acc), 2)}).csv")

print(f"loss: {loss} - acc: {acc}")

loss: 0.07022452567486982 - acc: 0.39386531710624695


In [8]:
model.fit(ace_x, ace_y, batch_size=16, epochs=25, verbose=2)
loss, acc = model.evaluate(x_test, y_test, verbose=0)
timestamp = datetime.now().strftime('%Y-%m%d_%H%M')
pred = model.predict_classes(problem_x)
pred = pd.DataFrame(pred.reshape(365, 8), index=range(1, 366), columns=[f"kp_{_}h" for _ in range(0, 24, 3)])
pred.to_csv(f"models/ace_gru{timestamp}(loss{round(float(loss), 3)}_acc{round(float(acc), 2)}).csv")

Train on 40912 samples
Epoch 1/25
40912/40912 - 15s - loss: 0.0771 - acc: 0.3551
Epoch 2/25
40912/40912 - 14s - loss: 0.0716 - acc: 0.3907
Epoch 3/25
40912/40912 - 14s - loss: 0.0703 - acc: 0.4034
Epoch 4/25
40912/40912 - 14s - loss: 0.0697 - acc: 0.4098
Epoch 5/25
40912/40912 - 14s - loss: 0.0692 - acc: 0.4141
Epoch 6/25
40912/40912 - 14s - loss: 0.0691 - acc: 0.4160
Epoch 7/25
40912/40912 - 14s - loss: 0.0688 - acc: 0.4167
Epoch 8/25
40912/40912 - 14s - loss: 0.0685 - acc: 0.4203
Epoch 9/25
40912/40912 - 14s - loss: 0.0682 - acc: 0.4245
Epoch 10/25
40912/40912 - 14s - loss: 0.0680 - acc: 0.4225
Epoch 11/25
40912/40912 - 14s - loss: 0.0679 - acc: 0.4276
Epoch 12/25
40912/40912 - 14s - loss: 0.0678 - acc: 0.4271
Epoch 13/25
40912/40912 - 14s - loss: 0.0676 - acc: 0.4283
Epoch 14/25
40912/40912 - 14s - loss: 0.0675 - acc: 0.4313
Epoch 15/25
40912/40912 - 14s - loss: 0.0671 - acc: 0.4381
Epoch 16/25
40912/40912 - 14s - loss: 0.0672 - acc: 0.4335
Epoch 17/25
40912/40912 - 14s - loss: 0.06