In [None]:
# google drive mount
from os.path import join
from google.colab import drive

ROOT = '/content/drive'
drive.mount(ROOT)

Mounted at /content/drive


In [None]:
import os
import sys

DATA_PATH = '/content/drive/MyDrive/GitHub/DL_Study/datasets'
MODULE_PATH = '/content/drive/MyDrive/GitHub/DL_Study/Base'

sys.path.insert(0, DATA_PATH)
sys.path.insert(0, MODULE_PATH)
sys.path

['/content/drive/MyDrive/GitHub/DL_Study/Base',
 '/content/drive/MyDrive/GitHub/DL_Study/datasets',
 '',
 '/content',
 '/env/python',
 '/usr/lib/python37.zip',
 '/usr/lib/python3.7',
 '/usr/lib/python3.7/lib-dynload',
 '/usr/local/lib/python3.7/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/local/lib/python3.7/dist-packages/IPython/extensions',
 '/root/.ipython']

In [None]:
# import
from config import *
from layers import LSTM, TimeLSTM, FullyConnected
from optim import Adam

# for time series split
!pip install scikit-learn==0.24.2

Collecting scikit-learn==0.24.2
[?25l  Downloading https://files.pythonhosted.org/packages/a8/eb/a48f25c967526b66d5f1fa7a984594f0bf0a5afafa94a8c4dbc317744620/scikit_learn-0.24.2-cp37-cp37m-manylinux2010_x86_64.whl (22.3MB)
[K     |████████████████████████████████| 22.3MB 1.6MB/s 
Collecting threadpoolctl>=2.0.0
  Downloading https://files.pythonhosted.org/packages/f7/12/ec3f2e203afa394a149911729357aa48affc59c20e2c1c8297a60f33f133/threadpoolctl-2.1.0-py3-none-any.whl
Installing collected packages: threadpoolctl, scikit-learn
  Found existing installation: scikit-learn 0.22.2.post1
    Uninstalling scikit-learn-0.22.2.post1:
      Successfully uninstalled scikit-learn-0.22.2.post1
Successfully installed scikit-learn-0.24.2 threadpoolctl-2.1.0


In [None]:
def MSE(y, t):
    return 0.5*np.mean((y-t)**2)

class ReLU:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x<=0)
        y = x.copy()
        y[self.mask] = 0

        return y
    
    def backward(self, dy):
        dy[self.mask] = 0
        dx = dy

        return dx


class TimeFC:
    def __init__(self, W, b):
        self.params = [W, b]
        self.grads = [np.zeros_like(W), np.zeros_like(b)]
        self.x = None

    def forward(self, x):
        W, b = self.params
        N, D = x.shape

        reshaped_x = x.reshape(N, -1)
        y = np.dot(reshaped_x, W) + b
        
        self.x = x
        y = y.reshape(N, -1)
        return y

    def backward(self, dy):
        W, b = self.params
        x = self.x
        N, D = x.shape

        dy = dy.reshape(N, -1)
        reshaped_x = x.reshape(N, -1)

        db = np.sum(dy, axis=0)
        dx = np.matmul(dy, W.T)
        dW = np.matmul(reshaped_x.T, dy)
        
        dx = dx.reshape(*x.shape)

        self.grads[0][...] = dW
        self.grads[1][...] = db

        return dx

class TimeMSE:
    def __init__(self):
        self.params, self.grads = [], []
        self.activation = ReLU()
        self.cache = None

    def forward(self, xs, ts):
        N, V = xs.shape
        xs = xs.reshape(N, V)
        xs = self.activation.forward(xs)
        ts = ts.reshape(N, V)

        loss = MSE(xs, ts)
        self.cache = (ts, xs, (N, V))

        return loss

    def backward(self, dy = 1):

        ts, xs, (N,  V) = self.cache
        
        dx = dy * (xs - ts) / (N)

        dx = self.activation.backward(dx)
        dx = dx.reshape(N , V)
        return dx

In [None]:
class LstmModelReg:
    def __init__(self, time_size, hidden_size, feature_size):
        T, H, F = time_size, hidden_size, feature_size
        H2 = 64
        rand = np.random.randn

        # weights (Xavier)
        lstm_Wx = (rand(F, 4*H)/ np.sqrt(F)).astype('f')
        lstm_Wh = (rand(H, 4*H)/ np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4*H).astype('f')

        # He initialize
        fc_W1 = (rand(H, H2)/ np.sqrt(H/2)).astype('f')
        fc_b1 = np.zeros(H2).astype('f')

        fc_W2 = (rand(H2, 1)/ np.sqrt(H2/2)).astype('f')
        fc_b2 = np.zeros(1).astype('f')

        # layer
        self.layers = [
            TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True),
            TimeFC(fc_W1, fc_b1),
            TimeFC(fc_W2, fc_b2)
        ]
        self.loss_layer = TimeMSE()

        self.params, self.grads = [], []

        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads


    def predict(self, xs):
        xs = np.array(xs)
        for layer in self.layers:
            xs = layer.forward(xs)
        return xs

    def forward(self, xs, ts):
        xs = np.array(xs)
        ts = np.array(ts)
        for layer in self.layers:
            xs = layer.forward(xs)
        loss = self.loss_layer.forward(xs, ts)
        return loss

    def backward(self, dy = 1):
        dy = self.loss_layer.backward(dy)
        for layer in reversed(self.layers):
            dy = layer.backward(dy)
        return dy

    def fit(self, train_X=None, train_y=None,learning_rate=0.01, epochs=10, batch_size=32, verbose=0):
        optimizer = Adam(learning_rate)

        data_size = train_X.shape[0]
        max_iters = data_size//batch_size

        for epoch in range(1, epochs+1):
            idx = numpy.random.permutation(numpy.arange(data_size))
            train_X = train_X[idx]
            train_y = train_y[idx]

            epoch_loss = 0
            start_time=time.time()
            
            for iter in range(max_iters):
                batch_x = train_X[iter*batch_size:(iter+1)*batch_size]
                batch_y = train_y[iter*batch_size:(iter+1)*batch_size]

                loss = self.forward(batch_x, batch_y)
                self.backward()
                params, grads = self.params, self.grads
                optimizer.update(params, grads)

                epoch_loss += loss
            avg_loss = epoch_loss/max_iters

            if verbose:
                duration = start_time-time.time()
                print(f'epoch:{epoch}/{epochs}, 시간:{duration:.2f}[s], loss:{avg_loss:.5f}')


    def reset_state(self):
        self.layers[0].reset_state()

In [None]:
# configuration setting
def model_config():
    # parameter for LSTM Model
    epochs = [30]
    batch_size = [8, 16]
    learning_rate = [0.01, 0.001]
    
    # create config data
    configs = []
    for i in epochs:
        for j in batch_size:
            for k in learning_rate:
                config = [i, j, k]
                configs.append(config)
    return configs

# fucntion for fit cnn model using configs
def model_fit(train_X, train_y, config):
    # unpack config
    n_epochs, n_batch, learning_rate = config
    model = LstmModelReg(time_size=8, hidden_size=100, feature_size=20)
    # fit model and return
    model.fit(train_X=train_X, train_y=train_y, epochs=n_epochs, 
              batch_size=n_batch, learning_rate=learning_rate)
    return model

def MAE_metric(x, t):
    return np.mean(np.abs(x-t))

def MSE_metric(x, t):
    return np.mean((x-t)**2)

In [None]:
# datset
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/GitHub/DL_Study/datasets/hungary_chickenpox/hungary_chickenpox.csv')
df.drop('Date', axis=1, inplace=True)       # drop Date column
df.head()

Unnamed: 0,BUDAPEST,BARANYA,BACS,BEKES,BORSOD,CSONGRAD,FEJER,GYOR,HAJDU,HEVES,JASZ,KOMAROM,NOGRAD,PEST,SOMOGY,SZABOLCS,TOLNA,VAS,VESZPREM,ZALA
0,168,79,30,173,169,42,136,120,162,36,130,57,2,178,66,64,11,29,87,68
1,157,60,30,92,200,53,51,70,84,28,80,50,29,141,48,29,58,53,68,26
2,96,44,31,86,93,30,93,84,191,51,64,46,4,157,33,33,24,18,62,44
3,163,49,43,126,46,39,52,114,107,42,63,54,14,107,66,50,25,21,43,31
4,122,78,53,87,103,34,95,131,172,40,61,49,11,124,63,56,7,47,85,60


In [None]:
df.isnull().sum()

BUDAPEST    0
BARANYA     0
BACS        0
BEKES       0
BORSOD      0
CSONGRAD    0
FEJER       0
GYOR        0
HAJDU       0
HEVES       0
JASZ        0
KOMAROM     0
NOGRAD      0
PEST        0
SOMOGY      0
SZABOLCS    0
TOLNA       0
VAS         0
VESZPREM    0
ZALA        0
dtype: int64

In [None]:
# task : budapest
# series data to img function
def series_to_img(dataset, time_step=1):
    num = dataset.shape[1]      # features num
    df = pd.DataFrame(dataset)
    cols, names = list(), list()
    # sequence t-n to t-1
    for i in range(time_step, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(num)]

    for i in range(0, 1):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(num)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(num)]

    agg = pd.concat(cols, axis=1)
    agg.columns = names
    agg.dropna(inplace=True)
    return agg

dataset = df.values
dataset = dataset.astype('float')

n_inputs = 8
n_features = 20
del_idx = n_inputs*n_features+1
del_cols = [i for i in range(del_idx, del_idx+n_features-1)]
new_df = series_to_img(dataset, n_inputs)
new_df.drop(new_df.columns[del_cols], axis=1, inplace=True)
new_df.head()

Unnamed: 0,var1(t-8),var2(t-8),var3(t-8),var4(t-8),var5(t-8),var6(t-8),var7(t-8),var8(t-8),var9(t-8),var10(t-8),var11(t-8),var12(t-8),var13(t-8),var14(t-8),var15(t-8),var16(t-8),var17(t-8),var18(t-8),var19(t-8),var20(t-8),var1(t-7),var2(t-7),var3(t-7),var4(t-7),var5(t-7),var6(t-7),var7(t-7),var8(t-7),var9(t-7),var10(t-7),var11(t-7),var12(t-7),var13(t-7),var14(t-7),var15(t-7),var16(t-7),var17(t-7),var18(t-7),var19(t-7),var20(t-7),...,var2(t-2),var3(t-2),var4(t-2),var5(t-2),var6(t-2),var7(t-2),var8(t-2),var9(t-2),var10(t-2),var11(t-2),var12(t-2),var13(t-2),var14(t-2),var15(t-2),var16(t-2),var17(t-2),var18(t-2),var19(t-2),var20(t-2),var1(t-1),var2(t-1),var3(t-1),var4(t-1),var5(t-1),var6(t-1),var7(t-1),var8(t-1),var9(t-1),var10(t-1),var11(t-1),var12(t-1),var13(t-1),var14(t-1),var15(t-1),var16(t-1),var17(t-1),var18(t-1),var19(t-1),var20(t-1),var1(t)
8,168.0,79.0,30.0,173.0,169.0,42.0,136.0,120.0,162.0,36.0,130.0,57.0,2.0,178.0,66.0,64.0,11.0,29.0,87.0,68.0,157.0,60.0,30.0,92.0,200.0,53.0,51.0,70.0,84.0,28.0,80.0,50.0,29.0,141.0,48.0,29.0,58.0,53.0,68.0,26.0,...,103.0,54.0,192.0,148.0,65.0,100.0,118.0,129.0,40.0,88.0,56.0,10.0,119.0,104.0,85.0,20.0,32.0,153.0,70.0,115.0,74.0,64.0,174.0,140.0,56.0,111.0,175.0,138.0,60.0,112.0,70.0,21.0,178.0,70.0,75.0,5.0,66.0,149.0,54.0,119.0
9,157.0,60.0,30.0,92.0,200.0,53.0,51.0,70.0,84.0,28.0,80.0,50.0,29.0,141.0,48.0,29.0,58.0,53.0,68.0,26.0,96.0,44.0,31.0,86.0,93.0,30.0,93.0,84.0,191.0,51.0,64.0,46.0,4.0,157.0,33.0,33.0,24.0,18.0,62.0,44.0,...,74.0,64.0,174.0,140.0,56.0,111.0,175.0,138.0,60.0,112.0,70.0,21.0,178.0,70.0,75.0,5.0,66.0,149.0,54.0,119.0,86.0,57.0,171.0,90.0,65.0,118.0,105.0,194.0,60.0,67.0,46.0,12.0,112.0,116.0,76.0,22.0,45.0,102.0,42.0,114.0
10,96.0,44.0,31.0,86.0,93.0,30.0,93.0,84.0,191.0,51.0,64.0,46.0,4.0,157.0,33.0,33.0,24.0,18.0,62.0,44.0,163.0,49.0,43.0,126.0,46.0,39.0,52.0,114.0,107.0,42.0,63.0,54.0,14.0,107.0,66.0,50.0,25.0,21.0,43.0,31.0,...,86.0,57.0,171.0,90.0,65.0,118.0,105.0,194.0,60.0,67.0,46.0,12.0,112.0,116.0,76.0,22.0,45.0,102.0,42.0,114.0,81.0,129.0,217.0,167.0,64.0,93.0,154.0,119.0,34.0,118.0,73.0,6.0,130.0,68.0,59.0,31.0,85.0,96.0,54.0,127.0
11,163.0,49.0,43.0,126.0,46.0,39.0,52.0,114.0,107.0,42.0,63.0,54.0,14.0,107.0,66.0,50.0,25.0,21.0,43.0,31.0,122.0,78.0,53.0,87.0,103.0,34.0,95.0,131.0,172.0,40.0,61.0,49.0,11.0,124.0,63.0,56.0,7.0,47.0,85.0,60.0,...,81.0,129.0,217.0,167.0,64.0,93.0,154.0,119.0,34.0,118.0,73.0,6.0,130.0,68.0,59.0,31.0,85.0,96.0,54.0,127.0,59.0,81.0,243.0,99.0,81.0,72.0,107.0,117.0,57.0,72.0,91.0,9.0,113.0,62.0,22.0,26.0,19.0,118.0,43.0,135.0
12,122.0,78.0,53.0,87.0,103.0,34.0,95.0,131.0,172.0,40.0,61.0,49.0,11.0,124.0,63.0,56.0,7.0,47.0,85.0,60.0,174.0,76.0,77.0,152.0,189.0,26.0,74.0,181.0,157.0,44.0,95.0,97.0,26.0,146.0,59.0,54.0,27.0,54.0,48.0,60.0,...,59.0,81.0,243.0,99.0,81.0,72.0,107.0,117.0,57.0,72.0,91.0,9.0,113.0,62.0,22.0,26.0,19.0,118.0,43.0,135.0,74.0,51.0,271.0,215.0,48.0,115.0,148.0,171.0,21.0,114.0,82.0,10.0,141.0,55.0,45.0,23.0,83.0,127.0,36.0,116.0


In [None]:
import numpy
import time
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler

n_splits = 10
train_test_split = TimeSeriesSplit(n_splits=n_splits+1, gap=n_inputs).split(new_df)
next(train_test_split)

configs = model_config()
history = []
best_error = []
i = 1

print('config : epochs, batch_size, learning_rate')

# neted cross validation
for train_cv_indices, test_cv_indices in train_test_split:
    print(f'fold : {i}/{n_splits}')
    i+=1

    # split x, y data
    train_cv_X, train_cv_y = new_df.iloc[train_cv_indices, :-1].values, new_df.iloc[train_cv_indices,-1].values
    test_cv_X, test_cv_y = new_df.iloc[test_cv_indices, :-1].values, new_df.iloc[test_cv_indices, -1].values

    # length for validation set
    test_length = len(test_cv_X)

    # scaling data
    scaler_x = MinMaxScaler()
    train_cv_X = scaler_x.fit_transform(train_cv_X)
    test_cv_X = scaler_x.transform(test_cv_X)

    train_X, val_X = train_cv_X[:-test_length, :], train_cv_X[-test_length:, :]
    train_y, val_y = train_cv_y[:-test_length], train_cv_y[-test_length:]

    # reshape
    # inner loop
    train_X = train_X.reshape(-1,  n_inputs, n_features)
    val_X = val_X.reshape(-1, n_inputs, n_features)
    train_y = train_y.reshape(-1, 1)
    val_y = val_y.reshape(-1, 1)

    # outer loop
    train_cv_X = train_cv_X.reshape(-1,  n_inputs, n_features)
    test_cv_X = test_cv_X.reshape(-1, n_inputs, n_features)
    train_cv_y = train_cv_y.reshape(-1, 1)
    test_cv_y = test_cv_y.reshape(-1, 1)

    # model fit, inner
    errors = []
    for idx, cfg in enumerate(configs):
        print(f' == train {cfg} model == ', end=' ')
        model = model_fit(train_X, train_y, cfg)
        model.reset_state()
        predicted = model.predict(val_X)
        if GPU:
            predicted = np.asnumpy(predicted)
        error = np.sqrt(MSE_metric(predicted, val_y))   # rmse
        print(f' error(RMSE):{error}')
        if errors:
            if error < min(errors):
                param = idx
        else:
            param = idx
        errors.append(error)

    history.append(errors)

    selected_model = model_fit(train_cv_X,train_cv_y, configs[param])
    selected_model.reset_state()
    predicted = selected_model.predict(test_cv_X)
    if GPU:
        predicted = np.asnumpy(predicted)
    error = np.sqrt(MSE_metric(predicted, test_cv_y))
    best_error.append(error)

    # model eval
    print(f'best_model => error(rmse) : {error}, param:{configs[param]}')

config : epochs, batch_size, learning_rate
fold : 1/10
 == train [30, 8, 0.01] model ==   error(RMSE):167.21779009537838
 == train [30, 8, 0.001] model ==   error(RMSE):102.7697439171926
 == train [30, 16, 0.01] model ==   error(RMSE):84.84705130577332
 == train [30, 16, 0.001] model ==   error(RMSE):112.46977758832696
best_model => error(rmse) : 65.83051872333382, param:2
fold : 2/10
 == train [30, 8, 0.01] model ==   error(RMSE):64.80174027209944
 == train [30, 8, 0.001] model ==   error(RMSE):62.39768961385993
 == train [30, 16, 0.01] model ==   error(RMSE):70.84605414762446
 == train [30, 16, 0.001] model ==   error(RMSE):87.41327511950276
best_model => error(rmse) : 74.48305595029728, param:1
fold : 3/10
 == train [30, 8, 0.01] model ==   error(RMSE):71.31769926115776
 == train [30, 8, 0.001] model ==   error(RMSE):75.45506887182951
 == train [30, 16, 0.01] model ==   error(RMSE):85.80418861232671
 == train [30, 16, 0.001] model ==   error(RMSE):68.46703751463416
best_model => err

In [None]:
model_evaluation = sum(best_error)
model_evaluation /= n_splits
print(f'evaluation [Mean RMSE] : {model_evaluation}')

evaluation [Mean RMSE] : 53.88225205093032


In [None]:
selected_model.reset_state()
predicted = selected_model.predict(test_cv_X)
if GPU:
    predicted = np.asnumpy(predicted)
print(f'MSE : {MSE_metric(predicted, test_cv_y)}')
print(f'RMSE : {np.sqrt(MSE_metric(predicted, test_cv_y))}')

def MAE_metric(x, t):
    return np.mean(numpy.abs(x-t))
print(f'MAE : {MAE_metric(predicted, test_cv_y)}')

MSE : 4965.536574174417
RMSE : 70.46656351897981
MAE : 39.21686846869333
