# Convolutional Neural Network(CNN) 구현 & Time Series Data 학습

## 1. GPU 사용 여부 설정
GPU 사용 시 런타임 유형 GPU러 살장

- True : cupy 사용
- False : numpy 사용

In [1]:
# GPU 
GPU = False
if GPU:
    import cupy as np
    np.cuda.set_allocator(np.cuda.MemoryPool().malloc)
else:
    import numpy as np

# 2. Optimizer 및 FC, Loss layer 구현
- optimizer : Adam
- FC : Fully Connected Layer
- Loss : Mean Squared Error(MSE)

In [2]:
# Optimizer Adam
class Adam:
    def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.iter = 0
        self.m = None
        self.v = None
        
    def update(self, params, grads):
        if self.m is None:
            self.m, self.v = [], []
            for param in params:
                self.m.append(np.zeros_like(param))
                self.v.append(np.zeros_like(param))
        
        self.iter += 1
        lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)

        for i in range(len(params)):
            self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i])
            self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i])
            
            params[i] -= lr_t * self.m[i] / (np.sqrt(self.v[i]) + 1e-7)

In [3]:
# Fully Connected Layer
class FullyConnected:
    def __init__(self, W, b):
        self.params = [W, b]
        self.grads = [np.zeros_like(W), np.zeros_like(b)]
        self.x = None

    def forward(self, x):
        W, b = self.params
        self.x_shape = x.shape

        x = x.reshape(x.shape[0], -1)
        y = np.dot(x, W) + b        # y = X*W + b
        self.x = x

        return y

    def backward(self, dy):
        W, b = self.params
        x = self.x

        db = np.sum(dy, axis=0)
        dW = np.dot(x.T, dy)
        dx = np.dot(dy, W.T)

        self.grads[0][...] = dW
        self.grads[1][...] = db
        dx = dx.reshape(*self.x_shape)

        return dx

# activation function (ReLU)
class ReLU:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x<=0)
        y = x.copy()
        y[self.mask] = 0

        return y
    
    def backward(self, dy):
        dy[self.mask] = 0
        dx = dy

        return dx
# MSE
def MSE(y, t):
    return 0.5*np.mean((y-t)**2)

# ReLU with MSE
class ReluWithLoss:
    def __init__(self):
        self.params, self.grads = [], []
        self.activation = ReLU()
        self.cache = None

    def forward(self, x, t):
        N, V = x.shape      # batch, ouput

        x = x.reshape(N, V)
        t = t.reshape(N, V)
        x = self.activation.forward(x)

        loss = MSE(x, t)
        self.cache = (t, x, (N, V))
        return loss

    def backward(self, dy = 1):
        t, x, (N, V) = self.cache
        dx = dy*(x-t) / N

        dx = self.activation.backward(dx)
        dx = dx.reshape(N, V)

        return dx

## 3. CNN Layer 구현
- im2col : image to column data
- col2im : column data to image



In [4]:
# CNN Layer (Conv, Pooling)
def im2col(data, filter_h, filter_w, stride = 1, padding = 0):
    # flatten data to 2D array
    N, C, H, W = data.shape
    
    out_h = (H + 2*padding - filter_h)//stride + 1
    out_w = (W + 2*padding - filter_w)//stride + 1

    # padding for H, W
    img = np.pad(data, 
                 [(0, 0), (0, 0), (padding, padding), (padding, padding)], 
                 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))
    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]
            
    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)

    return col

def col2im(col, shape, filter_h, filter_w, stride=1, padding=0):
    # 2D for img data
    # shape = original data shape
    N, C, H, W = shape
    out_h = (H + 2*padding - filter_h)//stride + 1
    out_w = (W + 2*padding - filter_w)//stride + 1
    col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)

    img = np.zeros((N, C, H+2*padding + stride -1, W+2*padding+stride-1))
    
    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            img[:, :, y:y_max:stride, x:x_max:stride] =+ col[:, :, y, x, :, :]

    return img[:, :, padding:H+padding, padding:W+padding]


class Convolution:
    def __init__(self, W, b, stride=1, padding=0):
        self.params = [W, b]
        self.grads = [np.zeros_like(W), np.zeros_like(b)]
        self.stride = stride
        self.padding = padding
        self.cache = None

    def forward(self, x):
        weight, b = self.params
        FN, FC, FH, FW = weight.shape
        N, C, H, W = x.shape        # Samples, Channel, Time steps(24), Features

        out_h = (H + 2*self.padding - FH)//self.stride + 1
        out_w = (W + 2*self.padding - FW)//self.stride + 1

        col = im2col(x, FH, FW, self.stride, self.padding)
        col_W = weight.reshape(FN, -1).T

        y = np.dot(col, col_W) + b
        y = y.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

        self.cache = (x, col, col_W)

        return y

    def backward(self, dy):
        W, b = self.params
        x, col, col_W = self.cache
        FN, C, FH, FW = W.shape
        
        dy = dy.transpose(0, 2, 3, 1).reshape(-1, FN)

        db = np.sum(dy, axis=0)
        dW = np.dot(col.T, dy)
        dW = dW.transpose(1, 0).reshape(FN, C, FH, FW)

        self.grads[0][...] = dW
        self.grads[1][...] = db

        dx = np.dot(dy, col_W.T)
        dx = col2im(dx, x.shape, FH, FW, self.stride, self.padding)

        return dx

class Pooling:
    def __init__(self, pool_h, pool_w, stride=1, padding=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.padding = padding

    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)

        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.padding)
        col = col.reshape(-1, self.pool_h*self.pool_w)

        arg_max = np.argmax(col, axis=1)
        y = np.max(col, axis=1)
        y = y.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)

        self.cache = (x, arg_max)

        return y

    def backward(self, dy):
        x, arg_max = self.cache

        dy = dy.transpose(0, 2, 3, 1)      # N, C, H, W
        pool_size = self.pool_h*self.pool_w

        dmax = np.zeros((dy.size, pool_size))
        dmax[np.arange(arg_max.size), arg_max.flatten()] = dy.flatten()
        dmax = dmax.reshape(dy.shape + (pool_size, ))
        dcol = dmax.reshape(dmax.shape[0]*dmax.shape[1]*dmax.shape[2], -1)
        dx = col2im(dcol, x.shape, 
                    self.pool_h, 
                    self.pool_w, 
                    self.stride, 
                    self.padding)
        
        return dx

## 4. Model 설계
### Model Architecture
- conv -> relu -> pool -> fc -> relu -> fc -> relu

In [5]:
# CNN Model
class Model:
    def __init__(self, input_dim = (1, 24, 8), 
                 params={'filter_num':30, 'filter_size':5, 
                         'padding':0, 'stride':1},
                 hidden_size=100, output_size=1):
        filter_num = params['filter_num']
        filter_size = params['filter_size']
        padding = params['padding']
        stride = params['stride']

        # not square
        input_size = input_dim[1]
        input_size2 = input_dim[2]
        conv_output_size_h = (input_size - filter_size + 2*padding)/stride + 1
        conv_output_size_w = (input_size2 - filter_size + 2*padding)/stride + 1
        pool_output_size = int(filter_num*(conv_output_size_h/2)*(conv_output_size_w/2))

        self.params = {}
        rand = np.random.randn

        # He initialize
        self.params['cnn_W'] = rand(filter_num, input_dim[0], filter_size, filter_size) / np.sqrt(filter_num/2)
        self.params['cnn_b'] = np.zeros(filter_num)
        self.params['W1'] = rand(pool_output_size, hidden_size)/np.sqrt(pool_output_size/2)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = rand(hidden_size, output_size)/np.sqrt(hidden_size/2)
        self.params['b2'] = np.zeros(output_size)

        self.layers=[
                     Convolution(self.params['cnn_W'], self.params['cnn_b'], stride, padding),
                     ReLU(),
                     Pooling(pool_h=2, pool_w=2, stride=2),
                     FullyConnected(self.params['W1'], self.params['b1']),
                     ReLU(),
                     FullyConnected(self.params['W2'], self.params['b2']),
        ]
        self.loss_layer = ReluWithLoss()

        self.grads = []
        self.params = []
        for i in [0, 3, 5]:
            self.params += self.layers[i].params
            self.grads+=self.layers[i].grads


    def predict(self, x):
        x = np.array(x)
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def forward(self, x, t):
        x = np.array(x)
        t = np.array(t)
        x = self.predict(x)
        loss = self.loss_layer.forward(x, t)
        return loss

    def backward(self, dy=1):
        dy = self.loss_layer.backward(dy)

        for layer in reversed(self.layers):
            dy = layer.backward(dy)
        return dy

    def fit(self, train_X=None, train_y=None, epochs=1, batch_size=1, verbose=0):
        
        optimizer = Adam(0.01)

        data_size = train_X.shape[0]
        max_iters = data_size // batch_size

        for epoch in range(1, epochs+1):
            # shuffle
            idx = np.random.permutation(np.arange(data_size))
            x_data = train_X[idx]
            y_data = train_y[idx]

            epoch_loss = 0
            start_time = time.time()

            for iter in range(max_iters):
                batch_x = x_data[iter*batch_size:(iter+1)*batch_size]
                batch_y = y_data[iter*batch_size:(iter+1)*batch_size]

                loss = self.forward(batch_x, batch_y)
                self.backward()
                params, grads = self.params, self.grads
                optimizer.update(params, grads)

                epoch_loss += loss
            avg_loss = epoch_loss / max_iters

            if verbose:
                duration = time.time()-start_time
                print(f'epoch:{epoch}/{epochs}, 시간:{duration:.2f}[s], loss:{avg_loss:.5f}')

In [6]:
import numpy as np
import pandas as pd
import time

nRows = 365     #days

df = pd.DataFrame(np.random.randint(0, 5, size=(nRows, 2)), columns=["X", "y"], index = pd.date_range("20210101", periods=nRows))
df.head()

from sklearn.model_selection import TimeSeriesSplit

n_splits = 3

trainTestSplit = TimeSeriesSplit(n_splits+1).split(df)
next(trainTestSplit) #Skip the first fold

for trainCvIndices, testIndices in trainTestSplit:
    # split Train, Cv, Test
    XTrainCv, yTrainCv = df.iloc[trainCvIndices, 0], df.iloc[trainCvIndices, 1]
    XTest, yTest = df.iloc[testIndices, 0], df.iloc[testIndices, 1]

    test_length = len(XTest)
    trainCvSplit = [(list(range(trainCvIndices[0], trainCvIndices[-test_length])),
                     list(range(trainCvIndices[-test_length], trainCvIndices[-1]+1)))]

    print(f'Training : {XTrainCv.index[0].date()} -- {XTrainCv.index[-test_length-1].date()}\
          , Cv : {XTrainCv.index[-test_length].date()} -- {XTrainCv.index[-1].date()}\
          , Test : {XTest.index[0].date()} -- {XTest.index[-1].date()}')

Training : 2021-01-01 -- 2021-03-14          , Cv : 2021-03-15 -- 2021-05-26          , Test : 2021-05-27 -- 2021-08-07
Training : 2021-01-01 -- 2021-05-26          , Cv : 2021-05-27 -- 2021-08-07          , Test : 2021-08-08 -- 2021-10-19
Training : 2021-01-01 -- 2021-08-07          , Cv : 2021-08-08 -- 2021-10-19          , Test : 2021-10-20 -- 2021-12-31


In [19]:
import pandas as pd
import time
def series_to_img(dataset, time_step=1):
    num = dataset.shape[1]      # features num
    df = pd.DataFrame(dataset)
    cols, names = list(), list()
    # sequence t-n to t-1
    for i in range(time_step, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(num)]

    for i in range(0, 1):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(num)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(num)]

    agg = pd.concat(cols, axis=1)
    agg.columns = names
    agg.dropna(inplace=True)
    return agg

def model_config():
    # config for CNN
    filter_num = [10, 30]
    filter_size = [5]
    epochs = [5]
    batch_size = [64]
    # create config
    config = []
    for i in filter_num:
        for j in filter_size:
            for k in epochs:
                for l in batch_size:
                    c = [i, j, k, l]
                    config.append(c)
    return config

def fit_model(train_x, train_y, config):
    '''
    train : train data
    config : parameters
    '''
    # unpack config
    filter_num, filter_size, n_epochs, n_batch = config

    params = {'filter_num':filter_num, 'filter_size':filter_size, 'padding':0, 'stride':1}
    model = Model(params=params)
    # train_X=None, train_y=None, epochs=1, batch_size=1, verbose=0
    model.fit(train_X=train_x, train_y=train_y, epochs=n_epochs, batch_size=n_batch, verbose=1)
    return model

# use beijing air pollution data
from datetime import datetime

df_parser = lambda x: datetime.strptime(x, '%Y %m %d %H')    # string to datetime
# data_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00381/PRSA_data_2010.1.1-2014.12.31.csv'
data_url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/pollution.csv'
df = pd.read_csv(data_url, sep=',', parse_dates=[['year', 'month', 'day', 'hour']], date_parser=df_parser, index_col=0)

del df['No']
df.columns = ['pm2.5', 'dewp', 'temp', 'pres', 'cbwd','wind_speed', 'snow', 'rain']
df = df[24:]       # NaN values in first 24hours

# sklearn library for time series split
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import mean_squared_error

dataset = df.values
label_encoder = LabelEncoder()
dataset[:, 4] = label_encoder.fit_transform(dataset[:, 4])  # for wind direction

n_inputs = 24       # input time dim
n_features = 8      # input feature dim
del_idx = n_inputs * n_features + 1
del_cols = [i for i in range(del_idx, del_idx+n_features-1)]
new_df = series_to_img(dataset, n_inputs)
new_df.drop(new_df.columns[del_cols], axis=1, inplace=True)


n_splits = 3

# Time split for nested cross validation
train_test_split = TimeSeriesSplit(n_splits=n_splits+1, gap=n_inputs).split(new_df)
next(train_test_split)

for train_cv_indices, test_cv_indices in train_test_split:
    train_cv_X, train_cv_y = new_df.iloc[train_cv_indices, :-1].values, new_df.iloc[train_cv_indices,-1].values
    test_cv_X, test_cv_y = new_df.iloc[test_cv_indices, :-1].values, new_df.iloc[test_cv_indices, -1].values

    test_length = len(test_cv_X)
    train_cv_split = [(list(range(train_cv_indices[0], train_cv_indices[-test_length])),
                       list(range(train_cv_indices[-test_length], train_cv_indices[-1]+1)))]

    # scaling data
    scaler_x = MinMaxScaler()
    train_cv_X = scaler_x.fit_transform(train_cv_X)
    test_cv_X = scaler_x.transform(test_cv_X)

    train_X, val_X = train_cv_X[:-test_length, :], train_cv_X[-test_length:, :]
    train_y, val_y = train_cv_y[:-test_length], train_cv_y[-test_length:]

    #reshape
    # inner
    train_X = train_X.reshape(-1, 1, n_inputs, n_features)
    val_X = val_X.reshape(-1, 1, n_inputs, n_features)

    # outter
    train_cv_X = train_cv_X.reshape(-1, 1, n_inputs, n_features)
    test_cv_X = test_cv_X.reshape(-1, 1, n_inputs, n_features)

    # model fit
    configs = model_config()
    errors = []
    for idx, cfg in enumerate(configs):
        model = fit_model(train_X, train_y, cfg)
        predicted = model.predict(val_X)
        error = np.sqrt(mean_squared_error(predicted, val_y))   # rmse
        if errors:
            if error < min(errors):
                param = idx
        else:
            param = idx
        errors.append(error)

    selected_model = fit_model(train_cv_X,train_cv_y, configs[param])
    predicted = selected_model.predict(test_cv_X)
    error = np.sqrt(mean_squared_error(predicted, test_cv_y))

    # model eval
    # print(f'train : {train_cv.index[0].date()} -- {train_cv.index[-test_length-1].date()}, error : {error}, param:{param}')



epoch:1/5, 시간:16.78[s], loss:9904.78673
epoch:2/5, 시간:16.84[s], loss:9908.93429
epoch:3/5, 시간:16.82[s], loss:9889.35964
epoch:4/5, 시간:16.73[s], loss:9893.54354
epoch:5/5, 시간:16.77[s], loss:9903.31223
epoch:1/5, 시간:37.93[s], loss:3468.09924
epoch:2/5, 시간:38.53[s], loss:1925.19312
epoch:3/5, 시간:38.67[s], loss:1405.35273
epoch:4/5, 시간:38.74[s], loss:1032.47665
epoch:5/5, 시간:38.92[s], loss:993.86983
epoch:1/5, 시간:77.90[s], loss:2647.39309
epoch:2/5, 시간:77.79[s], loss:1294.85724
epoch:3/5, 시간:78.42[s], loss:983.88978
epoch:4/5, 시간:78.61[s], loss:765.29004
epoch:5/5, 시간:77.65[s], loss:643.45741


AttributeError: ignored

In [17]:
import sklearn
print(sklearn.__version__)

0.22.2.post1


In [21]:
!pip install scikit-learn==0.24.2

Collecting scikit-learn==0.24.2
[?25l  Downloading https://files.pythonhosted.org/packages/a8/eb/a48f25c967526b66d5f1fa7a984594f0bf0a5afafa94a8c4dbc317744620/scikit_learn-0.24.2-cp37-cp37m-manylinux2010_x86_64.whl (22.3MB)
[K     |████████████████████████████████| 22.3MB 1.5MB/s 
Installing collected packages: scikit-learn
  Found existing installation: scikit-learn 0.24.0
    Uninstalling scikit-learn-0.24.0:
      Successfully uninstalled scikit-learn-0.24.0
Successfully installed scikit-learn-0.24.2
