# LSTM Transfer Learning

In [1]:
# !pip install tensorflow
# !pip install tensorflow_hub

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import random
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
import os

In [3]:
print(tf.__version__)

2.0.0


In [4]:
# 재생산성을 위해 시드 고정
np.random.seed(7)
random.seed(7)
tf.random.set_seed(7)

In [5]:
train = pd.read_csv('data/train.csv', index_col = 'id')
test = pd.read_csv('data/test.csv', index_col = 'id')

In [6]:
train

Unnamed: 0_level_0,X00,X01,X02,X03,X04,X05,X06,X07,X08,X09,...,Y09,Y10,Y11,Y12,Y13,Y14,Y15,Y16,Y17,Y18
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,9.7,988.8,1.2,0.6,0.0,1009.3,989.6,12.2,1009.9,1009.8,...,7.0,7.5,7.0,9.0,10.0,9.5,9.0,8.0,9.0,
1,9.3,988.9,1.7,1.9,0.0,1009.3,989.6,12.1,1010.0,1009.9,...,6.5,7.5,7.0,8.5,10.0,9.5,9.0,7.5,9.0,
2,9.4,989.0,1.1,2.3,0.0,1009.2,989.7,12.1,1010.1,1010.1,...,6.5,7.5,6.5,8.0,9.5,9.5,8.5,7.5,8.5,
3,9.4,988.9,1.5,0.7,0.0,1009.2,989.6,12.0,1010.0,1010.0,...,6.0,7.0,6.0,8.0,9.5,9.0,8.5,7.5,8.5,
4,9.2,988.9,0.8,1.7,0.0,1009.2,989.7,12.0,1010.1,1010.0,...,6.0,7.0,6.0,7.5,9.5,9.0,8.5,7.5,8.5,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4747,19.9,987.6,0.9,0.8,0.0,1006.9,987.7,21.7,1007.5,1007.4,...,,,,,,,,,,21.5
4748,19.9,987.6,0.5,0.7,0.0,1006.8,987.7,21.6,1007.5,1007.4,...,,,,,,,,,,21.5
4749,19.7,987.7,0.9,0.6,0.0,1006.9,987.6,21.4,1007.4,1007.5,...,,,,,,,,,,21.5
4750,19.4,987.7,0.9,0.8,0.0,1006.9,987.8,21.3,1007.6,1007.5,...,,,,,,,,,,21.5


# 2. Data Cleasing & Pre-Processing

In [7]:
# 기상청 데이터만 추출
X_train = train.loc[:,'X00':'X39']

# standardization을 위해 평균과 표준편차 구하기
MEAN = X_train.mean()
STD = X_train.std()

# 정규화 / 표준편차가 0일 경우 대비하여 1e-07 추가 
X_train = (X_train - MEAN) / (STD + 1e-07)

In [8]:
X_train

Unnamed: 0_level_0,X00,X01,X02,X03,X04,X05,X06,X07,X08,X09,...,X30,X31,X32,X33,X34,X35,X36,X37,X38,X39
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-2.236377,0.053243,-0.288997,-0.911345,-0.286742,0.284562,0.174782,-2.160752,0.315216,0.396435,...,0.159919,-2.109779,-1.766623,0.297981,-1.015881,0.482017,-0.274946,0.577720,0.240703,-0.234576
1,-2.321347,0.083006,0.177482,0.477320,-0.286742,0.284562,0.174782,-2.183598,0.343622,0.424634,...,0.215666,-2.091664,-1.842501,0.297981,-1.015881,0.070290,-0.274946,0.582841,0.289700,-0.234576
2,-2.300105,0.112769,-0.382293,0.904602,-0.286742,0.255107,0.204162,-2.183598,0.372029,0.481033,...,0.271413,-2.146009,-1.956318,0.268916,-1.015881,0.269124,-0.274946,0.582841,0.311477,-0.234576
3,-2.300105,0.083006,-0.009110,-0.804525,-0.286742,0.255107,0.174782,-2.206443,0.343622,0.452834,...,0.350389,-2.200355,-2.013226,0.297981,-1.015881,0.056231,-0.274946,0.593083,0.344142,-0.234576
4,-2.342589,0.083006,-0.662181,0.263679,-0.286742,0.255107,0.204162,-2.206443,0.372029,0.452834,...,0.401490,-2.254700,-2.013226,0.297981,-1.015881,-0.336416,-0.274946,0.618688,0.371363,-0.234576
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4747,-0.069655,-0.303908,-0.568885,-0.697704,-0.286742,-0.422352,-0.383432,0.009577,-0.366541,-0.280350,...,1.126205,-0.388845,-0.173188,-0.457686,0.930244,0.102425,-0.274946,0.838894,0.022936,-0.126289
4748,-0.069655,-0.303908,-0.942069,-0.804525,-0.286742,-0.451807,-0.383432,-0.013269,-0.366541,-0.280350,...,1.191244,-0.388845,-0.154219,-0.457686,0.930244,-0.468972,-0.274946,0.849136,0.050157,-0.126289
4749,-0.112139,-0.274146,-0.568885,-0.911345,-0.286742,-0.422352,-0.412812,-0.058960,-0.394948,-0.252150,...,1.140142,-0.370730,-0.154219,-0.457686,0.930244,0.459924,-0.274946,0.874742,0.039269,-0.126289
4750,-0.175867,-0.274146,-0.568885,-0.697704,-0.286742,-0.422352,-0.354052,-0.081806,-0.338135,-0.252150,...,1.135497,-0.388845,-0.135249,-0.428622,0.930244,0.919853,-0.274946,0.884984,0.088266,-0.126289


In [9]:
# ID로 시간 변수 생성
def make_trigonometric_func(df):

    minute = pd.Series((df.index%144).astype(int))
    hour= pd.Series((df.index%144/6).astype(int))

    # 삼각함수를 이용한 시간변수 생성
    min_in_day = 24*6
    hour_in_day = 24

    minute_sin = np.sin(np.pi*minute/min_in_day) 
    minute_cos = np.cos(np.pi*minute/min_in_day)

    hour_sin  = np.sin(np.pi*hour/hour_in_day)
    hour_cos  = np.cos(np.pi*hour/hour_in_day)

    df['minute_sin'] = minute_sin
    df['minute_cos'] = minute_cos

    df['hour_sin'] = hour_sin
    df['hour_cos'] = hour_cos
    return df

In [10]:
X_train = make_trigonometric_func(X_train)

In [11]:
# RNN 모델에 입력 할 수 있는 시계열 형태로 데이터 변환 
def convert_to_timeseries(df, interval):
    sequence_list = []
    target_list = []
    
    for i in tqdm(range(df.shape[0] - interval)):
        sequence_list.append(np.array(df.iloc[i:i+interval,:-1]))
        target_list.append(df.iloc[i+interval,-1])
    
    sequence = np.array(sequence_list)
    target = np.array(target_list)
    
    return sequence, target

[토론 글](https://dacon.io/competitions/official/235584/codeshare/707)에 기반하여, Y15와 Y16 지점의 데이터가 Y18과 상관관계가 높음. 그래서 두 지점의 데이터를 학습하기로 결정

In [12]:
y_columns = ['Y15','Y16']

In [13]:
# t시점 이전 120분의 데이터로 t시점의 온도를 추정할 수 있는 학습데이터 형성
sequence = np.empty((0, 12, 44))
target = np.empty((0,))
for column in y_columns :
    
    # concat.shape = (4752, 41)
    concat = pd.concat([X_train, train[column]], axis = 1)

    _sequence, _target = convert_to_timeseries(concat.head(6*24*30), interval = 12)

    sequence = np.vstack((sequence, _sequence))
    target = np.hstack((target, _target))

100%|████████████████████████████████████████████████████████████████████████████| 4308/4308 [00:01<00:00, 3048.50it/s]
100%|████████████████████████████████████████████████████████████████████████████| 4308/4308 [00:01<00:00, 2579.47it/s]


In [14]:
sequence.shape, _sequence.shape, target.shape, _target.shape

((8616, 12, 44), (4308, 12, 44), (8616,), (4308,))

In [15]:
# convert_to_timeseries 함수를 쓰기 위한 dummy feature 생성
X_train['dummy'] = 0

In [16]:
# train set에서 도출된 평균과 표준편차로 standardization 실시 
test = (test - MEAN) / (STD + 1e-07)

In [17]:
test = make_trigonometric_func(test)

In [18]:
# convert_to_timeseries 함수를 쓰기 위한 dummy feature 생성
test['dummy'] = 0

In [19]:
# train과 test 기간을 합쳐서 120분 간격으로 학습데이터 재구축
X_test, _ = convert_to_timeseries(pd.concat([X_train, test], axis = 0), interval=12)

100%|██████████████████████████████████████████████████████████████████████████| 16260/16260 [00:06<00:00, 2415.81it/s]


In [20]:
X_test.shape

(16260, 12, 44)

In [21]:
test.shape, X_train.shape, 11520+4752-12

((11520, 45), (4752, 45), 16260)

In [22]:
11520 / (6*24)

80.0

In [23]:
# test set 기간인 후반부 80일에 맞게 자르기 
X_test = X_test[-11520:, :, :]

In [24]:
X_test.shape

(11520, 12, 44)

In [25]:
X_train.drop('dummy', axis = 1, inplace = True)
test.drop('dummy', axis = 1, inplace = True)

# Feature Engineering & Initial Modeling

In [26]:
# 간단한 LSTM 모델 구축하기 
simple_lstm_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(128, input_shape=sequence.shape[-2:]), # 128은 Unit이며, Output Space를 의미
    tf.keras.layers.Dense(256, activation='linear'),
    tf.keras.layers.Dense(128, activation='linear'),
    tf.keras.layers.Dense(1)
])

simple_lstm_model.compile(optimizer='adam', loss='mse')

In [27]:
simple_lstm_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 128)               88576     
_________________________________________________________________
dense (Dense)                (None, 256)               33024     
_________________________________________________________________
dense_1 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 129       
Total params: 154,625
Trainable params: 154,625
Non-trainable params: 0
_________________________________________________________________


In [28]:
sequence.shape, sequence.shape[-2:]

((8616, 12, 44), (12, 44))

# Model Tuning & Evaluation

In [29]:
# loss가 4미만으로 떨어지면 학습 종료 시키는 기능
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs = None):
        if(logs.get('loss') < 4):
            print('\n Loss is under 4, cancelling training')
            self.model.stop_training = True

In [30]:
callbacks = myCallback()

In [31]:
# 모델 학습
simple_lstm_model.fit(    
    sequence, target,
    epochs=60,
    batch_size=128,
    verbose=2,
    shuffle=False,
    callbacks = [callbacks]
)

Train on 8616 samples
Epoch 1/60
8616/8616 - 6s - loss: 116.7099
Epoch 2/60
8616/8616 - 2s - loss: 8.7310
Epoch 3/60
8616/8616 - 2s - loss: 5.6716
Epoch 4/60
8616/8616 - 2s - loss: 4.7186
Epoch 5/60
8616/8616 - 2s - loss: 4.4643
Epoch 6/60
8616/8616 - 2s - loss: 4.2816
Epoch 7/60
8616/8616 - 2s - loss: 4.0556
Epoch 8/60

 Loss is under 4, cancelling training
8616/8616 - 2s - loss: 3.9073


<tensorflow.python.keras.callbacks.History at 0x1b432c78898>

In [32]:
# LSTM 레이어는 고정
simple_lstm_model.layers[0].trainable = False

In [33]:
simple_lstm_model

<tensorflow.python.keras.engine.sequential.Sequential at 0x1b430ac1e10>

In [34]:
simple_lstm_model.layers

[<tensorflow.python.keras.layers.recurrent_v2.LSTM at 0x1b43025ceb8>,
 <tensorflow.python.keras.layers.core.Dense at 0x1b430a63668>,
 <tensorflow.python.keras.layers.core.Dense at 0x1b430a63ac8>,
 <tensorflow.python.keras.layers.core.Dense at 0x1b430ac1b00>]

In [35]:
simple_lstm_model.layers[0]

<tensorflow.python.keras.layers.recurrent_v2.LSTM at 0x1b43025ceb8>

In [36]:
# fine tuning 할 때 사용할 학습데이터 생성 (Y18)
finetune_X, finetune_y = convert_to_timeseries(pd.concat([X_train.tail(432), train['Y18'].tail(432)], axis = 1), interval=12)

100%|██████████████████████████████████████████████████████████████████████████████| 420/420 [00:00<00:00, 1474.60it/s]


In [37]:
# LSTM 레이어는 고정 시켜두고, DNN 레이어에 대해서 fine tuning 진행 (Transfer Learning)
finetune_history = simple_lstm_model.fit(
            finetune_X, finetune_y,
            epochs=20,
            batch_size=64,
            shuffle=False,
            verbose = 2)

Train on 420 samples
Epoch 1/20
420/420 - 0s - loss: 5.9472
Epoch 2/20
420/420 - 0s - loss: 12.0802
Epoch 3/20
420/420 - 0s - loss: 4.5658
Epoch 4/20
420/420 - 0s - loss: 3.8677
Epoch 5/20
420/420 - 0s - loss: 3.5950
Epoch 6/20
420/420 - 0s - loss: 2.1914
Epoch 7/20
420/420 - 0s - loss: 1.8924
Epoch 8/20
420/420 - 0s - loss: 1.5006
Epoch 9/20
420/420 - 0s - loss: 1.2774
Epoch 10/20
420/420 - 0s - loss: 1.1665
Epoch 11/20
420/420 - 0s - loss: 1.0209
Epoch 12/20
420/420 - 0s - loss: 0.8972
Epoch 13/20
420/420 - 0s - loss: 0.8562
Epoch 14/20
420/420 - 0s - loss: 0.7918
Epoch 15/20
420/420 - 0s - loss: 0.7256
Epoch 16/20
420/420 - 0s - loss: 0.6898
Epoch 17/20
420/420 - 0s - loss: 0.6612
Epoch 18/20
420/420 - 0s - loss: 0.6215
Epoch 19/20
420/420 - 0s - loss: 0.5924
Epoch 20/20
420/420 - 0s - loss: 0.5773


In [38]:
# 예측하기 
finetune_pred = simple_lstm_model.predict(X_test)

In [39]:
# 제출 파일 만들기
submit = pd.DataFrame({'id':range(144*33, 144*113),
              'Y18':finetune_pred.reshape(1,-1)[0]})

In [40]:
submit.to_csv('LSTM_Transfer_Learning.csv', index = False)