In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
import math
import pandas as pd
import os

In [None]:
from google.colab import drive

root = '/content/drive'
drive.mount(root)

In [None]:
from os.path import join

my_path = 'My Drive/Colab Notebooks/'
project_path = join(root, my_path)
print(project_path)

In [None]:
%cd '{project_path}'

In [None]:
for i in os.listdir(project_path+'modeling_dataset/'):
    df = pd.read_csv(project_path+'modeling_dataset/'+i)
    
# 날짜에 따른 변수들의 변화 시각화
    plt.figure(figsize=(35, 15))
    name = i.split('.')[0]

    plt.subplot(321)
    plt.plot(df['date'], df['social_buzz'],label='social_buzz')
    plt.title('<'+name+'>' 'Social buzz')

    plt.subplot(322)
    plt.plot(df['date'], df['minimum temperature(°C)'],label='minimum temperature(°C)')
    plt.title('Minimum temperature(°C)')

    plt.subplot(323)
    plt.plot(df['date'], df['maximum temperature(°C)'],label='maximum temperature(°C)')
    plt.title('Maximum temperature(°C)')

    plt.subplot(324)
    plt.plot(df['date'], df['humidity(%)'],label='humidity(%)')
    plt.title('Humidity(%)')

    plt.subplot(325)
    plt.plot(df['date'], df['precip_prob(%)'],label='precip_prob(%)')
    plt.title('Precip_ probability(%)')

    plt.show()

In [None]:
# GPU 설정하기
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
# LSTM 네트워크 구성하기
class LSTM1(nn.Module):
  def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
    super(LSTM1, self).__init__()
    # 클래스 개수
    self.num_classes = num_classes 
    #layer 개수
    self.num_layers = num_layers 
    #input 사이즈
    self.input_size = input_size 
    #hidden state
    self.hidden_size = hidden_size 
    #sequence 길이
    self.seq_length = seq_length 

    #LSTM
    self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                      num_layers=num_layers, batch_first=True) 
    # fully connected 
    self.fc_1 =  nn.Linear(hidden_size, 128) 
    # fully connected 마지막 layer
    self.fc = nn.Linear(128, num_classes) 
    # activation function : ReLU
    self.relu = nn.ReLU() 

  # Forward propagate 
  def forward(self,x):
    # hidden state
    h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).to(device) 
    # internal state
    c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).to(device)   

    output, (hn, cn) = self.lstm(x, (h_0, c_0)) 
   
    # lstm에 맞게 data shape 변경
    hn = hn.view(-1, self.hidden_size) 
    out = self.relu(hn)
    # 첫번째 Dense
    out = self.fc_1(out)
    out = self.relu(out)
    # 최종 output
    out = self.fc(out) 
   
    return out 

In [None]:
# CNN LSTM 네트워크 구성하기
class CNN_LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
        super(CNN_LSTM, self).__init__()
        # CNN 1D layer 
        self.conv1d_1 = nn.Conv1d(in_channels=input_size,
                                out_channels=16,
                                kernel_size=3,
                                stride=1,
                                padding=1)
        self.conv1d_2 = nn.Conv1d(in_channels=16,
                                out_channels=32,
                                kernel_size=3,
                                stride=1,
                                padding=1)
        
        # LSTM
        self.lstm = nn.LSTM(input_size=32,
                            hidden_size=64,
                            num_layers=1,
                            bias=True,
                            bidirectional=False,
                            batch_first=True)
        
        self.dropout = nn.Dropout(0.1)

        self.dense1 = nn.Linear(64, 32)
        self.dense2 = nn.Linear(32, 16)

        # fully connected layer 1
        self.fc_layer1 = nn.Sequential(
            nn.Linear(64, 32),
            # activation function : LeakyReLU
            nn.LeakyReLU(inplace=True),
            nn.Dropout(p = 0.1)
        )
        # fully connected layer 2
        self.fc_layer2 = nn.Sequential(
            nn.Linear(32, 1),
            # activation function : LeakyReLU
            nn.LeakyReLU(inplace=True),
            nn.Dropout(p = 0.1)
        )

    # Forward propagate 
    def forward(self, x):
  	# Raw x shape : (B, S, F) -> (B, 10, 3)
        
        # Shape : (B, F, S) -> (B, 3, 10)
        x = x.transpose(1, 2)
        # Shape : (B, F, S) == (B, C, S) // C = channel -> (B, 16, 10)
        x = self.conv1d_1(x)
        # Shape : (B, C, S) -> (B, 32, 10)
        x = self.conv1d_2(x)
        # Shape : (B, S, C) == (B, S, F) -> (B, 10, 32)
        x = x.transpose(1, 2)
        
        self.lstm.flatten_parameters()
        # Shape : (B, S, H) // H = hidden_size -> (B, 10, 50)
        _, (hidden, _) = self.lstm(x)
        # Shape : (B, H) // -1 means the last sequence -> (B, 50)
        x = hidden[-1]
        # Shape : (B, H) -> (B, 50)
        x = self.dropout(x)
        
        # Shape : (B, 32)
        x = self.fc_layer1(x)
        # 최종 output
        x = self.fc_layer2(x)

        return x

In [None]:
# MSE 값 넣기 위해 리스트 생성
lstm_temp = []
cnn_temp = []
rmse = pd.DataFrame()

In [None]:
# feature별로 범위가 달라 scaler 사용
from sklearn.preprocessing import MinMaxScaler,StandardScaler

for i in os.listdir('./modeling_dataset/'):
    # 데이터셋 준비하기
    df = pd.read_csv('./modeling_dataset/'+i)
    df = df.drop('poi', axis = 1)
    df.index = df['date']
    df = df.drop('date', axis = 1)
    X = df.drop(columns='num_of_search')
    y = df.iloc[:,8:9]

    mm = MinMaxScaler()
    ss = StandardScaler()

    X_ss = ss.fit_transform(X)
    y_mm = mm.fit_transform(y)

    # Train, Test 데이터셋 분리하기
    X_train = X_ss[:304,:]
    X_test = X_ss[304:,:]

    y_train = y_mm[:304,:]
    y_test = y_mm[304:,:]

    # 학습할 수 있는 tensor형태로 변환
    X_train_tensors = Variable(torch.Tensor(X_train))
    X_test_tensors = Variable(torch.Tensor(X_test))

    y_train_tensors = Variable(torch.Tensor(y_train))
    y_test_tensors = Variable(torch.Tensor(y_test))

    X_train_tensors_final = torch.reshape(X_train_tensors,   (X_train_tensors.shape[0], 1, X_train_tensors.shape[1]))
    X_test_tensors_final = torch.reshape(X_test_tensors,  (X_test_tensors.shape[0], 1, X_test_tensors.shape[1])) 

    # model1 - LSTM
    # 네트워크 파라미터 구성하기
    num_epochs = 10000
    learning_rate = 0.005 

    # feature 개수
    input_size = 8 
    # hidden state의 feature 개수
    hidden_size = 4 
    # lstm layer 개수
    num_layers = 1 

    # output class 개수
    num_classes = 1 

    # 모델 구현
    lstm1 = LSTM1(num_classes, input_size, hidden_size, num_layers, X_train_tensors_final.shape[1]).to(device)

    # Mean-squared error for regression
    loss_function = torch.nn.MSELoss()   
    # Adam optimizer 
    optimizer = torch.optim.Adam(lstm1.parameters(), lr=learning_rate)  

    # 학습하기
    lstm1.train()
    for epoch in range(num_epochs):
        # forward pass
        outputs = lstm1.forward(X_train_tensors_final.to(device)) 
        optimizer.zero_grad() 
        
        # loss function
        loss = loss_function(outputs, y_train_tensors.to(device))

        # loss function의 loss 계산
        loss.backward() 
        
        optimizer.step() 
        if epoch % 100 == 0:
            print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 

    # 예측 할 수 있는 tensor형태로 변환
    X_test2 = Variable(torch.Tensor(X_test)) #converting to Tensors
    y_test2 = Variable(torch.Tensor(y_test))

    X_test2 = torch.reshape(X_test2, (X_test2.shape[0], 1, X_test2.shape[1]))
    lstm1.eval()
    # forward pass
    train_predict = lstm1(X_test2.to(device))
    data_predict = train_predict.data.detach().cpu().numpy() 
    dataY_plot = y_test2.data.numpy()

    # 결과 시각화
    data_predict_to_plot =  [i[0] for i in data_predict]
    dataY_plot_to_plot =  [i[0] for i in dataY_plot]
    plt.figure(figsize=(10,6)) 
    plt.plot(dataY_plot, label='Actual num_tourist') 
    plt.plot(data_predict, label='Predicted num_tourist') 
    name = i.split('.')[0]
    plt.title(name+ '(LSTM)')
    plt.legend()
    plt.savefig('./'+name+'_lstm')
    plt.show()

    lstm_temp.append(math.sqrt(loss))
    print("RMSE: %s" % math.sqrt(loss))



    # model2 - CNN LSTM
    # 네트워크 파라미터 구성하기
    num_epochs = 4000
    learning_rate = 0.005

    # feature 개수
    input_size = 8 
    # hidden state의 feature 개수
    hidden_size = 4 
    # lstm layer 개수
    num_layers = 1 

    num_classes = 1 #number of output classes 

    cnn_lstm = CNN_LSTM(num_classes, input_size, hidden_size, num_layers, X_train_tensors_final.shape[1]).to(device)

    # Mean-squared error for regression
    loss_function = torch.nn.MSELoss()    
    # Adam optimizer
    optimizer = torch.optim.Adam(cnn_lstm.parameters(), lr=learning_rate) 

    # 학습하기
    cnn_lstm.train()
    for epoch in range(num_epochs):
        #forward pass
        outputs = cnn_lstm.forward(X_train_tensors_final.to(device)) 
        optimizer.zero_grad() 
        
        # loss function
        loss = loss_function(outputs, y_train_tensors.to(device))

        # loss function의 loss 계산
        loss.backward() 
        
        optimizer.step() 
        if epoch % 100 == 0:
            print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 

    # 예측할 수 있는 tensor형태로 변환
    X_test2 = Variable(torch.Tensor(X_test)) 
    y_test2 = Variable(torch.Tensor(y_test))

    X_test2 = torch.reshape(X_test2, (X_test2.shape[0], 1, X_test2.shape[1]))
    cnn_lstm.eval()
    # forward pass
    train_predict = cnn_lstm(X_test2.to(device))
    data_predict = train_predict.data.detach().cpu().numpy() 
    dataY_plot = y_test2.data.numpy()

    # 결과 시각화
    data_predict_to_plot =  [i[0] for i in data_predict]
    dataY_plot_to_plot =  [i[0] for i in dataY_plot]

    plt.figure(figsize=(10,6)) 
    plt.plot(dataY_plot, label='Actual num_tourist') 
    plt.plot(data_predict, label='Predicted num_tourist') 
    plt.title(name+ '(CNN LSTM)')
    plt.legend()
    plt.savefig('./'+name+'_cnn')
    plt.show() 

    # MSE -> RMSE 바꾸기 위해 제곱근 계산
    cnn_temp.append(math.sqrt(loss))
    print("RMSE: %s" % math.sqrt(loss))

In [None]:
# RMSE score, 모델 별 RMSE 평균 데이터프레임 생성
rmse['LSTM'] = pd.DataFrame(lstm_temp)
rmse['CNN_LSTM'] = pd.DataFrame(cnn_temp)
rmse = rmse.append({'LSTM' : np.mean(lstm_temp), 'CNN_LSTM' : np.mean(cnn_temp)}, ignore_index=True)