<a href="https://colab.research.google.com/github/Minh-A/Pytorch_DeepLearning/blob/main/LSTM_pytorch_12_23.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Library

In [1]:
# 구글드라이브 연결
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings(action='ignore')
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset,DataLoader
import torch.optim as optim
torch.manual_seed(1015) ###### <- 이거 뭐냐?

# define 'device' to upload tensor in gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#시간을 계산하는 함수
import time, datetime
def clock(start):
    sec = time.time() - start #현재시간 - 시스템초기시간
    times = str(datetime.timedelta(seconds = sec)).split(".") # 시간:분:초로 변환
    times = times[0]
    return times

# Data Load

In [3]:
train = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/LSTM_pytorch(Dacon)/input/train.csv", encoding = 'euc-kr')
train['DateTime'] = pd.to_datetime(train.DateTime)  # 데이터 타입을 object에서 datetime으로
train['date'] = train.DateTime.dt.date  # 날짜만 뽑음
train  = train.groupby('date').sum().reset_index()  # 날짜별로 묶음

In [4]:
train

Unnamed: 0,date,사용자,세션,신규방문자,페이지뷰
0,2018-09-09,281,266,73,1826
1,2018-09-10,264,247,51,2092
2,2018-09-11,329,310,58,1998
3,2018-09-12,300,287,45,2595
4,2018-09-13,378,344,50,3845
...,...,...,...,...,...
787,2020-11-04,4516,4472,1196,112683
788,2020-11-05,4155,4037,1044,102901
789,2020-11-06,3663,3576,825,88015
790,2020-11-07,2472,2417,531,57386


# Data Preprocessing

In [5]:
# scaling
mini = train.iloc[:,1:].min()
size = train.iloc[:,1:].max() - train.iloc[:,1:].min()
train.iloc[:,1:] = (train.iloc[:,1:] -  mini) / size

input_window = 30
output_window = 7

window_x = np.zeros((train.shape[0] - (input_window + output_window), input_window, 4))
window_y = np.zeros((train.shape[0] - (input_window + output_window), output_window, 4))

for start in range(train.shape[0] - (input_window + output_window)):
    end = start + input_window    
    window_x[start,:, :] = train.iloc[start : end                , 1: ].values
    window_y[start,:, :] = train.iloc[end   : end + output_window, 1: ].values

# Model

In [6]:
# Model 생성
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size = input_size,
                            hidden_size = hidden_size,
                            batch_first=True)
        self.hidden_lstm = nn.LSTM(input_size = hidden_size,
                                   hidden_size = hidden_size,
                                   batch_first=True)
        
        self.time_fc = nn.Linear(hidden_size, 4)
    
    def forward(self, x_time):
    
        out_time, _ = self.lstm(x_time)
        out_time, _ = self.hidden_lstm(out_time)
        out_time, _ = self.hidden_lstm(out_time)
        out_time, _ = self.hidden_lstm(out_time)
        
        
        out_time = self.time_fc(out_time[:,-7:, :])
        
        return out_time.view(-1,7,4)
    
model = LSTM(input_size = 4, hidden_size = 30).to(device)

In [8]:
print(model)

LSTM(
  (lstm): LSTM(4, 30, batch_first=True)
  (hidden_lstm): LSTM(30, 30, batch_first=True)
  (time_fc): Linear(in_features=30, out_features=4, bias=True)
)


In [9]:
print(window_x.shape)
print(window_y.shape)

(755, 30, 4)
(755, 7, 4)


In [10]:
# Model학습
window_x = torch.tensor(window_x).float().to(device)
window_y = torch.tensor(window_y).float().to(device)

# Train model
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-2)
criterion = nn.MSELoss(size_average = True)
num_epochs  = 500
train_error = []
for t in range(num_epochs):
    train_pred = model(window_x)
    loss = criterion(train_pred, window_y) ### trend
    train_error.append(loss)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if t % 10 == 0 and t !=0:
        print(f"{t} Epochs train MSE: {loss.item():1.5f}")

10 Epochs train MSE: 0.02835
20 Epochs train MSE: 0.01247
30 Epochs train MSE: 0.01194
40 Epochs train MSE: 0.00882
50 Epochs train MSE: 0.00821
60 Epochs train MSE: 0.00813
70 Epochs train MSE: 0.00802
80 Epochs train MSE: 0.00795
90 Epochs train MSE: 0.00786
100 Epochs train MSE: 0.00776
110 Epochs train MSE: 0.00755
120 Epochs train MSE: 0.00709
130 Epochs train MSE: 0.00646
140 Epochs train MSE: 0.00647
150 Epochs train MSE: 0.00667
160 Epochs train MSE: 0.00626
170 Epochs train MSE: 0.00591
180 Epochs train MSE: 0.00571
190 Epochs train MSE: 0.00555
200 Epochs train MSE: 0.00548
210 Epochs train MSE: 0.00541
220 Epochs train MSE: 0.00530
230 Epochs train MSE: 0.00624
240 Epochs train MSE: 0.00527
250 Epochs train MSE: 0.00507
260 Epochs train MSE: 0.00486
270 Epochs train MSE: 0.00913
280 Epochs train MSE: 0.00610
290 Epochs train MSE: 0.00538
300 Epochs train MSE: 0.00517
310 Epochs train MSE: 0.00496
320 Epochs train MSE: 0.00482
330 Epochs train MSE: 0.00469
340 Epochs train MS

# Prediction & CSV 

In [11]:
train_pred

tensor([[[0.0423, 0.0432, 0.0428, 0.0060],
         [0.0415, 0.0422, 0.0418, 0.0054],
         [0.0408, 0.0413, 0.0409, 0.0048],
         ...,
         [0.0402, 0.0404, 0.0405, 0.0042],
         [0.0406, 0.0407, 0.0411, 0.0045],
         [0.0393, 0.0394, 0.0396, 0.0035]],

        [[0.0418, 0.0425, 0.0420, 0.0056],
         [0.0410, 0.0415, 0.0411, 0.0049],
         [0.0417, 0.0419, 0.0420, 0.0053],
         ...,
         [0.0408, 0.0409, 0.0412, 0.0046],
         [0.0394, 0.0396, 0.0397, 0.0036],
         [0.0384, 0.0384, 0.0383, 0.0029]],

        [[0.0412, 0.0419, 0.0413, 0.0051],
         [0.0419, 0.0422, 0.0422, 0.0055],
         [0.0405, 0.0408, 0.0408, 0.0044],
         ...,
         [0.0395, 0.0397, 0.0398, 0.0037],
         [0.0385, 0.0386, 0.0384, 0.0030],
         [0.0389, 0.0388, 0.0388, 0.0032]],

        ...,

        [[0.5735, 0.5763, 0.4519, 0.4923],
         [0.3426, 0.3409, 0.2390, 0.2771],
         [0.3035, 0.3015, 0.2078, 0.2535],
         ...,
         [0.8288, 0.8

In [12]:
train_pred.shape

torch.Size([755, 7, 4])

In [13]:
last_month = torch.tensor(window_x[-1,:,:][np.newaxis,...]).float().to(device)
last_month

tensor([[[0.2719, 0.2669, 0.1604, 0.2021],
         [0.2902, 0.2849, 0.1710, 0.2242],
         [0.3109, 0.3047, 0.2118, 0.2249],
         [0.5704, 0.5557, 0.4414, 0.4659],
         [0.5141, 0.5053, 0.3686, 0.4430],
         [0.5595, 0.5477, 0.4509, 0.4565],
         [0.4957, 0.4869, 0.3331, 0.3698],
         [0.3836, 0.3669, 0.1947, 0.3030],
         [0.3415, 0.3404, 0.2308, 0.2975],
         [0.3063, 0.2978, 0.1964, 0.2278],
         [0.4685, 0.4599, 0.3550, 0.3588],
         [0.5034, 0.5016, 0.4237, 0.3637],
         [0.4473, 0.4466, 0.3515, 0.3213],
         [0.3982, 0.3958, 0.3325, 0.2804],
         [0.3867, 0.3769, 0.2805, 0.2870],
         [0.3005, 0.2969, 0.2751, 0.1814],
         [0.3762, 0.3712, 0.3639, 0.2516],
         [0.4886, 0.4849, 0.3923, 0.3579],
         [0.5981, 0.6036, 0.6379, 0.5542],
         [0.4883, 0.4845, 0.4178, 0.4242],
         [0.5132, 0.5131, 0.4627, 0.4058],
         [0.4320, 0.4223, 0.3320, 0.3416],
         [0.3345, 0.3289, 0.2491, 0.2403],
         [0

In [14]:
last_month.shape

torch.Size([1, 30, 4])

In [49]:
submission = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/LSTM_pytorch(Dacon)/input/submission.csv", encoding = 'euc-kr')
last_month = torch.tensor(window_x[-1,:,:][np.newaxis,...]).float().to(device)

In [50]:
submission

Unnamed: 0,DateTime,사용자,세션,신규방문자,페이지뷰
0,2020-11-09,0,0,0,0
1,2020-11-10,0,0,0,0
2,2020-11-11,0,0,0,0
3,2020-11-12,0,0,0,0
4,2020-11-13,0,0,0,0
...,...,...,...,...,...
56,2021-01-04,0,0,0,0
57,2021-01-05,0,0,0,0
58,2021-01-06,0,0,0,0
59,2021-01-07,0,0,0,0


In [51]:
model(last_month)

tensor([[[0.3109, 0.3075, 0.2104, 0.2576],
         [0.8317, 0.8310, 0.7240, 0.7188],
         [0.8340, 0.8556, 0.7645, 0.7274],
         [0.8019, 0.8213, 0.6864, 0.6791],
         [0.8205, 0.8413, 0.6917, 0.7049],
         [0.7203, 0.7382, 0.5990, 0.6443],
         [0.3676, 0.3605, 0.2497, 0.3002]]], device='cuda:0',
       grad_fn=<ViewBackward>)

In [52]:
next_week = model(last_month)
last_month = torch.cat([last_month[-7:], next_week], axis = 1)

In [53]:
last_month

tensor([[[0.2719, 0.2669, 0.1604, 0.2021],
         [0.2902, 0.2849, 0.1710, 0.2242],
         [0.3109, 0.3047, 0.2118, 0.2249],
         [0.5704, 0.5557, 0.4414, 0.4659],
         [0.5141, 0.5053, 0.3686, 0.4430],
         [0.5595, 0.5477, 0.4509, 0.4565],
         [0.4957, 0.4869, 0.3331, 0.3698],
         [0.3836, 0.3669, 0.1947, 0.3030],
         [0.3415, 0.3404, 0.2308, 0.2975],
         [0.3063, 0.2978, 0.1964, 0.2278],
         [0.4685, 0.4599, 0.3550, 0.3588],
         [0.5034, 0.5016, 0.4237, 0.3637],
         [0.4473, 0.4466, 0.3515, 0.3213],
         [0.3982, 0.3958, 0.3325, 0.2804],
         [0.3867, 0.3769, 0.2805, 0.2870],
         [0.3005, 0.2969, 0.2751, 0.1814],
         [0.3762, 0.3712, 0.3639, 0.2516],
         [0.4886, 0.4849, 0.3923, 0.3579],
         [0.5981, 0.6036, 0.6379, 0.5542],
         [0.4883, 0.4845, 0.4178, 0.4242],
         [0.5132, 0.5131, 0.4627, 0.4058],
         [0.4320, 0.4223, 0.3320, 0.3416],
         [0.3345, 0.3289, 0.2491, 0.2403],
         [0

In [54]:
pred_week = next_week.cpu().detach().numpy().reshape(output_window,4)
pred_week = pred_week * size.values + mini.values
pred_week = pred_week.astype(int)

print(pred_week)

[[  1684   1661    356  40045]
 [  4506   4487   1224 111733]
 [  4518   4620   1293 113067]
 [  4344   4435   1161 105551]
 [  4445   4543   1169 109567]
 [  3902   3986   1013 100150]
 [  1992   1947    422  46656]]


In [55]:
pred_week.shape

(7, 4)

In [56]:
(len(submission) - output_window)//7 + 1

8

In [57]:
len(submission)

61

In [58]:
# output_window는 일주일인 7일을 의미
output_window

7

In [59]:
# 예측 및 제출파일 생성
# submission = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/LSTM_pytorch(Dacon)/input/submission.csv", encoding = 'euc-kr')
# last_month = torch.tensor(window_x[-1,:,:][np.newaxis,...]).float().to(device)
for start in range((len(submission) - output_window)//7 + 2):
    start = start * 7
    print(start)
    next_week = model(last_month)
    last_month = torch.cat([last_month[-7:], next_week], axis = 1)
    
    pred_week = next_week.cpu().detach().numpy().reshape(output_window,4)
    pred_week = pred_week * size.values + mini.values
    pred_week = pred_week.astype(int)
    
    if start/7 == (len(submission) - output_window)//7 + 1:
        submission.iloc[start :, 1:] = pred_week[-submission.iloc[start :, 1:].shape[0]:,:]
    else:
        submission.iloc[start : start + output_window, 1:] = pred_week
submission

0
7
14
21
28
35
42
49
56


Unnamed: 0,DateTime,사용자,세션,신규방문자,페이지뷰
0,2020-11-09,1796,1755,379,44250
1,2020-11-10,3979,3913,1037,105128
2,2020-11-11,3804,3789,1019,98884
3,2020-11-12,3520,3558,916,88109
4,2020-11-13,3162,3229,811,80104
...,...,...,...,...,...
56,2021-01-04,3455,3591,885,84135
57,2021-01-05,3069,3127,721,70538
58,2021-01-06,3088,3121,732,73360
59,2021-01-07,1883,1845,373,42193


In [60]:
submission.to_csv('/content/drive/MyDrive/Colab Notebooks/LSTM_pytorch(Dacon)/input/제출1.csv', index = False, encoding = 'euc-kr')