In [1]:
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM, GRU
from keras.models import Sequential
from keras import optimizers
import numpy as np
import random
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from keras.layers.normalization import BatchNormalization

Using TensorFlow backend.


In [2]:
def split_train_val_id(n, val_percent=0.1):
    ids = [i for i in range(n)]
    random.shuffle(ids)
    piv = int(n*val_percent)
    if piv==0: 
        piv=1
    return {'train': ids[:-piv], 'val': ids[-piv:]}

In [3]:
datas = np.load('train_features.npy')
labels = np.load('train_labels.npy')

In [4]:
NUM_DATA_USE = 5000
INPUT_DIM_1D = datas.shape[1]*datas.shape[2]
OUTPUT_DIM = labels.shape[1]-1
val_percent = 0.2

train_val_id = split_train_val_id(NUM_DATA_USE, val_percent)
train_id = train_val_id['train']
val_id = train_val_id['val']

train_datas = np.zeros((len(train_id), INPUT_DIM_1D))
train_labels = np.zeros((len(train_id), OUTPUT_DIM))
for i, idx in enumerate(train_id):
    train_datas[i] = datas[idx].reshape(INPUT_DIM_1D)
    train_labels[i] = labels[idx][1:]
    
#reduce data information
train_datas = train_datas[:, :10000]


# #seven labels
day1_labels = np.zeros(len(train_id))
day2_labels = np.zeros(len(train_id))
day3_labels = np.zeros(len(train_id))
day4_labels = np.zeros(len(train_id))
day5_labels = np.zeros(len(train_id))
day6_labels = np.zeros(len(train_id))
day7_labels = np.zeros(len(train_id))

for i in range(len(train_id)):
    if train_labels[i, :4].sum()!=0:
        day1_labels[i] = 1
    if train_labels[i, 4:8].sum()!=0:
        day2_labels[i] = 1
    if train_labels[i, 8:12].sum()!=0:
        day3_labels[i] = 1
    if train_labels[i, 12:16].sum()!=0:
        day4_labels[i] = 1
    if train_labels[i, 16:20].sum()!=0:
        day5_labels[i] = 1
    if train_labels[i, 20:24].sum()!=0:
        day6_labels[i] = 1
    if train_labels[i, 24:28].sum()!=0:
        day7_labels[i] = 1

In [5]:
NUM_OF_VAL = len(val_id)
val_datas = np.zeros((NUM_OF_VAL, INPUT_DIM_1D))
val_labels = np.zeros((NUM_OF_VAL, OUTPUT_DIM))
for i, idx in enumerate(val_id):
    val_datas[i] = datas[idx].reshape(INPUT_DIM_1D)
    val_labels[i] = labels[idx][1:]
    
#reduce data information
val_datas = val_datas[:, :10000]

# #seven labels
val_day1_labels = np.zeros(NUM_OF_VAL)
val_day2_labels = np.zeros(NUM_OF_VAL)
val_day3_labels = np.zeros(NUM_OF_VAL)
val_day4_labels = np.zeros(NUM_OF_VAL)
val_day5_labels = np.zeros(NUM_OF_VAL)
val_day6_labels = np.zeros(NUM_OF_VAL)
val_day7_labels = np.zeros(NUM_OF_VAL)

for i in range(NUM_OF_VAL):
    if val_labels[i, :4].sum()!=0:
        val_day1_labels[i] = 1
    if val_labels[i, 4:8].sum()!=0:
        val_day2_labels[i] = 1
    if val_labels[i, 8:12].sum()!=0:
        val_day3_labels[i] = 1
    if val_labels[i, 12:16].sum()!=0:
        val_day4_labels[i] = 1
    if val_labels[i, 16:20].sum()!=0:
        val_day5_labels[i] = 1
    if val_labels[i, 20:24].sum()!=0:
        val_day6_labels[i] = 1
    if val_labels[i, 24:28].sum()!=0:
        val_day7_labels[i] = 1

In [9]:
def proba2onehot(pred, length, threshold):
    for i in range(length):
        if pred[i]>=threshold:
            pred[i] = 1
        else:
            pred[i] = 0
    return pred

In [65]:
model = Sequential()
model.add(GRU(units=200, input_shape=(1, train_datas.shape[1]), return_sequences=True))
model.add(Dropout(0.2))
model.add(GRU(units=100, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(64, activation = "relu"))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(32, activation = "relu"))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(1, activation = "relu"))

model.compile(loss='mse', optimizer='adam')
model.summary()

BATCH_SIZE = 32
NUM_EPOCH = 30


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_25 (GRU)                 (None, 1, 200)            6120600   
_________________________________________________________________
dropout_29 (Dropout)         (None, 1, 200)            0         
_________________________________________________________________
gru_26 (GRU)                 (None, 100)               90300     
_________________________________________________________________
dropout_30 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_19 (Dense)             (None, 64)                6464      
_________________________________________________________________
batch_normalization_5 (Batch (None, 64)                256       
_________________________________________________________________
dropout_31 (Dropout)         (None, 64)                0         
__________

In [66]:
model.fit(X_train, day2_labels, batch_size=BATCH_SIZE, epochs=NUM_EPOCH, validation_data = (X_val, val_day2_labels))

Train on 4000 samples, validate on 1000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fdc42547630>

In [70]:
THRESHOLD = 0.6
y_pred = model.predict(X_val)
y_pred = np.reshape(y_pred, (NUM_OF_VAL))
y_pred = proba2onehot(y_pred, NUM_OF_VAL, THRESHOLD)
print(classification_report(val_day2_labels, y_pred)) 

              precision    recall  f1-score   support

         0.0       0.88      0.94      0.91       750
         1.0       0.76      0.61      0.68       250

    accuracy                           0.85      1000
   macro avg       0.82      0.77      0.79      1000
weighted avg       0.85      0.85      0.85      1000



In [39]:
model = Sequential()
model.add(GRU(units=200, input_shape=(1, train_datas.shape[1]), return_sequences=True))
model.add(Dropout(0.2))
model.add(GRU(units=100, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1, activation = "relu"))

model.compile(loss='mse', optimizer='adam')
model.summary()

BATCH_SIZE = 32
NUM_EPOCH = 20

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_15 (GRU)                 (None, 1, 200)            6120600   
_________________________________________________________________
dropout_15 (Dropout)         (None, 1, 200)            0         
_________________________________________________________________
gru_16 (GRU)                 (None, 100)               90300     
_________________________________________________________________
dropout_16 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 101       
Total params: 6,211,001
Trainable params: 6,211,001
Non-trainable params: 0
_________________________________________________________________


In [40]:
X_train = np.reshape(train_datas, (train_datas.shape[0], 1, train_datas.shape[1]))
X_val = np.reshape(val_datas, (val_datas.shape[0], 1, val_datas.shape[1]))
model.fit(X_train, day7_labels, batch_size=BATCH_SIZE, epochs=NUM_EPOCH, validation_data = (X_val, val_day7_labels))

Train on 4000 samples, validate on 1000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7faa06a02da0>

In [44]:
THRESHOLD = 0.5
y_pred = model.predict(X_val)
y_pred = np.reshape(y_pred, (NUM_OF_VAL))
y_pred = proba2onehot(y_pred, NUM_OF_VAL, THRESHOLD)
print(classification_report(val_day7_labels, y_pred)) 

              precision    recall  f1-score   support

         0.0       0.90      0.87      0.88       776
         1.0       0.59      0.65      0.62       224

    accuracy                           0.82      1000
   macro avg       0.74      0.76      0.75      1000
weighted avg       0.83      0.82      0.82      1000

