In [1]:
import os
import numpy as np
import pandas as pd
from datetime import datetime

In [2]:
ace = pd.DataFrame()
for name in os.listdir('proc_data/total')[:-1]:
    ins_data = pd.read_csv(f'proc_data/total/{name}', index_col=0, parse_dates=['date'])
    ace = pd.concat([ace, ins_data], axis=0)

problem = pd.read_csv("proc_data/problem.csv", index_col=0, parse_dates=['date'])
problem.head()

Unnamed: 0_level_0,Np,Tp,Vp,B_gsm_x,B_gsm_y,B_gsm_z,Bmag
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
01-01 00:00,1.225355,32956.035503,443.036509,5.142349,1.988692,-12.914,14.14332
01-01 03:00,1.613686,55713.597041,431.723491,3.574822,-2.570586,-9.271053,11.855373
01-01 06:00,1.191851,80571.958333,432.390536,4.361542,-5.262113,-7.125196,10.517149
01-01 09:00,1.1,149231.295858,428.213609,3.533574,-6.503805,4.220485,9.589148
01-01 12:00,1.1,77718.39645,413.764024,6.511308,-6.137467,-0.664426,9.313183


In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping

print(tf.__version__, (tf.test.gpu_device_name() if tf.test.is_gpu_available() else None))

from sklearn.model_selection import train_test_split

2.0.0-rc0 /device:GPU:0


In [8]:
ace_x, ace_y = ace.iloc[:, :-1].values, \
    keras.utils.to_categorical(ace.target.values, 10)
problem_x = tf.constant(problem.values[:, :, np.newaxis])

x_train, x_test, y_train, y_test = \
    train_test_split(ace_x, ace_y, test_size=0.2, random_state=1, shuffle=False)
x_train, x_val, y_train, y_val = \
    train_test_split(x_train, y_train, test_size=0.2, random_state=1)

ace_x, ace_y, x_train, y_train, x_val, y_val, x_test, y_test = \
    tf.constant(ace_x[:, :, np.newaxis]), tf.constant(ace_y), \
    tf.constant(x_train[:, :, np.newaxis]), tf.constant(y_train), \
    tf.constant(x_val[:, :, np.newaxis]), tf.constant(y_val), \
    tf.constant(x_test[:, :, np.newaxis]), tf.constant(y_test)

print(ace_x.shape, ace_y.shape, problem_x.shape)
print(x_train.shape, x_val.shape, x_test.shape)

(40912, 7, 1) (40912, 10) (2920, 7, 1)
(26183, 7, 1) (6546, 7, 1) (8183, 7, 1)


In [14]:
optim = keras.optimizers.Adam()
loss_fn = keras.losses.MeanSquaredError()

model = keras.models.Sequential()
model.add(layers.GRU(64, input_shape=(7, 1)))
model.add(layers.BatchNormalization())
model.add(layers.Dense(48, activation='tanh'))
model.add(layers.Dense(32, activation='tanh'))
model.add(layers.Dense(24, activation='tanh'))
model.add(layers.Dense(10, activation='linear'))
model.compile(optimizer=optim, loss=loss_fn, metrics=['acc'])

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_3 (GRU)                  (None, 64)                12864     
_________________________________________________________________
batch_normalization_3 (Batch (None, 64)                256       
_________________________________________________________________
dense_6 (Dense)              (None, 48)                3120      
_________________________________________________________________
dense_7 (Dense)              (None, 32)                1568      
_________________________________________________________________
dense_8 (Dense)              (None, 24)                792       
_________________________________________________________________
dense_9 (Dense)              (None, 10)                250       
Total params: 18,850
Trainable params: 18,722
Non-trainable params: 128
________________________________________________

In [10]:
early_stop = EarlyStopping(monitor='loss', patience=2)

hist = model.fit(x_train, y_train, batch_size=16, epochs=100, callbacks=[early_stop], validation_data=(x_val, y_val))
hist.params

Train on 26183 samples, validate on 6546 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100


{'batch_size': 16,
 'epochs': 100,
 'steps': 1637,
 'samples': 26183,
 'verbose': 0,
 'do_validation': True,
 'metrics': ['loss', 'acc', 'val_loss', 'val_acc']}

In [11]:
pred = model.predict_classes(problem_x)
loss, acc = model.evaluate(x_test, y_test, verbose=0)

timestamp = datetime.now().strftime('%Y-%m%d_%H%M')
pred = pd.DataFrame(pred.reshape(365, 8), index=range(1, 366), columns=[f"kp_{_}h" for _ in range(0, 24, 3)])
pred.to_csv(f"models/gru{timestamp}(loss{round(float(loss), 3)}_acc{round(float(acc), 2)}).csv")

print(f"loss: {loss} - acc: {acc}")

loss: 0.0698235612915749 - acc: 0.39520958065986633


In [15]:
model.fit(ace_x, ace_y, batch_size=16, epochs=25, verbose=2)
loss, acc = model.evaluate(x_test, y_test, verbose=0)
timestamp = datetime.now().strftime('%Y-%m%d_%H%M')
pred = model.predict_classes(problem_x)
pred = pd.DataFrame(pred.reshape(365, 8), index=range(1, 366), columns=[f"kp_{_}h" for _ in range(0, 24, 3)])
pred.to_csv(f"models/ace_gru{timestamp}(loss{round(float(loss), 3)}_acc{round(float(acc), 2)}).csv")

Train on 40912 samples
Epoch 1/25
40912/40912 - 16s - loss: 0.0735 - acc: 0.3849
Epoch 2/25
40912/40912 - 14s - loss: 0.0695 - acc: 0.4115
Epoch 3/25
40912/40912 - 14s - loss: 0.0689 - acc: 0.4183
Epoch 4/25
40912/40912 - 15s - loss: 0.0686 - acc: 0.4191
Epoch 5/25
40912/40912 - 14s - loss: 0.0683 - acc: 0.4214
Epoch 6/25
40912/40912 - 14s - loss: 0.0682 - acc: 0.4239
Epoch 7/25
40912/40912 - 14s - loss: 0.0680 - acc: 0.4264
Epoch 8/25
40912/40912 - 14s - loss: 0.0678 - acc: 0.4264
Epoch 9/25
40912/40912 - 14s - loss: 0.0676 - acc: 0.4307
Epoch 10/25
40912/40912 - 14s - loss: 0.0677 - acc: 0.4289
Epoch 11/25
40912/40912 - 15s - loss: 0.0680 - acc: 0.4257
Epoch 12/25
40912/40912 - 14s - loss: 0.0687 - acc: 0.4181
Epoch 13/25
40912/40912 - 14s - loss: 0.0678 - acc: 0.4274
Epoch 14/25
40912/40912 - 14s - loss: 0.0675 - acc: 0.4314
Epoch 15/25
40912/40912 - 14s - loss: 0.0674 - acc: 0.4323
Epoch 16/25
40912/40912 - 14s - loss: 0.0674 - acc: 0.4319
Epoch 17/25
40912/40912 - 14s - loss: 0.06