<a href="https://colab.research.google.com/github/100jy/dacon_ts_forecasting/blob/main/Seq2Seq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install torchcontrib

Collecting torchcontrib
  Downloading https://files.pythonhosted.org/packages/72/36/45d475035ab35353911e72a03c1c1210eba63b71e5a6917a9e78a046aa10/torchcontrib-0.0.2.tar.gz
Building wheels for collected packages: torchcontrib
  Building wheel for torchcontrib (setup.py) ... [?25l[?25hdone
  Created wheel for torchcontrib: filename=torchcontrib-0.0.2-cp36-none-any.whl size=7531 sha256=b4cb32806eb59c4f0c5185868752bf63e895d1158dec1e3a09eff0a40e992233
  Stored in directory: /root/.cache/pip/wheels/06/06/7b/a5f5920bbf4f12a2c927e438fac17d4cd9560f8336b00e9a99
Successfully built torchcontrib
Installing collected packages: torchcontrib
Successfully installed torchcontrib-0.0.2


In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import Dataset
import torch.optim.adam
from torchcontrib.optim import SWA
import datetime
import matplotlib.pyplot as plt 
from tqdm import tqdm

# feature 생성 및 preprocessing

In [4]:
train = pd.read_csv("./drive/MyDrive/데이콘/train.csv", encoding = 'euc-kr')

# 시간 관련 변수들
train['DateTime'] = pd.to_datetime(train.DateTime)
#일자
train['Date'] = train.DateTime.dt.date

# 요일 혹은 분기정보
train['DayOfWeek'] = (train.DateTime.dt.weekday)/6
train['DayOfMon'] = ((train.DateTime).dt.day)/31
train['Quarter'] = ((train.DateTime).dt.quarter)/4

train['Year'] = ((train.DateTime.dt.year) -2019)
train['Days'] = (train.DateTime.max() - train.DateTime).dt.days + 1


left = train.iloc[:,:5].groupby(train['Date']).sum().reset_index()
right = train.iloc[:,5:].groupby(train['Date']).mean().reset_index()
train  = pd.merge(left, right, on='Date')

def log_trans(x):
  return np.log(1+x)

train['Days'] = log_trans(train['Days'])


# ts feature 생성 
for target in ['사용자', '세션', '신규방문자', '페이지뷰']:
    train[f'{target}CumSum'] = train[target].cumsum()
    # log하고 rolling mean
    train[target] = log_trans(train[target])
    
    for k in [3,7,14,21]:
        train[f'{target}RollingMean{k}'] =  (train[target].rolling(k).mean())

    train[f'{target}RollingStd21'] =  (train[target].rolling(21).std().round(0))
    train[f'{target}DaysSince10000'] = (train[f'{target}CumSum'] > 10000) * 1
    train[f'{target}DaysSince100000'] = (train[f'{target}CumSum'] > 100000) * 1

    train[f'{target}RollingMeanDiff2w'] = train[f'{target}RollingMean7'] / (train[f'{target}RollingMean14'] + 1) - 1
    train[f'{target}RollingMeanDiff3w'] = train[f'{target}RollingMean7'] / (train[f'{target}RollingMean21'] + 1) - 1


    train[f'{target}CumSum'] = log_trans(train[target].cumsum())
    
    
train_df = train.dropna()

# Dataset 정의

In [28]:
MAX_LENGTH = 180
class DatasetWindows(Dataset):
  def __init__(self, df=train_df):
    self.df = df.iloc[:,1:]
    self.max_len = len(df)

  def __len__(self):
    return 1
    
  def __getitem__(self, idx):

    #choose length(90, 200)
    seq_length = np.random.randint(90,181)
    
    # choose start point
    x_point = np.random.randint(0, self.max_len - seq_length)
    # half as x, half as y
    y_point = x_point + seq_length//2

    x = torch.tensor(self.df.iloc[x_point:y_point, :].values, dtype=torch.float32).cuda()
    y = torch.tensor(self.df.iloc[y_point:x_point+seq_length, :4].values, dtype=torch.float32).cuda()
    # L x 49
    return x, y

# Model 정의

In [29]:
class Embedding(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Embedding, self).__init__()
     
        self.embedding = nn.Sequential(nn.Linear(input_size, hidden_size))

    def forward(self, input):
        embedded = self.embedding(input)
        return embedded

class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size).cuda()


class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.out(output[0])
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size).cuda()



In [30]:
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    # L x 32
    encoder_outputs = torch.zeros(max_length, encoder.hidden_size).cuda()

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(
            input_tensor[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output

    # input last day
    decoder_input = input_tensor[-1,:4].unsqueeze(0)
    # last hidden state as initial
    decoder_hidden = encoder_hidden


    for di in range(target_length):
        decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
        decoder_input = decoder_output.squeeze().detach()
        loss += criterion(decoder_output, target_tensor[di])

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

In [31]:
import time
import math
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))


def showPlot(points):
    plt.figure()
    fig, ax = plt.subplots()
    # this locator puts ticks at regular intervals
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)

In [32]:
def evaluate(encoder, decoder, input_seq, max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = torch.tensor(input_seq, dtype=torch.float32).cuda()
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size).cuda()

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] = encoder_output

        decoder_input = input_tensor[-1,:4].unsqueeze(0)

        decoder_hidden = encoder_hidden

        decoded =  torch.zeros(1, 4).cuda()
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            decoded = torch.cat([decoded, decoder_output],dim=0)
            decoder_input = decoder_output.squeeze().detach()
        return decoded[1:]

In [33]:
def inverse_log(x):
    # 32bit 사용시 단위문제 발생..
    return (np.exp(x)-1).astype(np.int64)

def make_val_plot(encoder, attn_decoder, inp, val_df, only_loss=True):
  pred = evaluate(encoder, attn_decoder, inp.iloc[-120:, :].values)[:61,:]
  # 120일치중 61일까지
  pred = inverse_log(pred.detach().cpu().numpy())
  label =inverse_log(val_df)

  def dacon_rmse(true, pred):  
    w0 = 1095.214646
    w1 = 1086.728535
    w2 = 268.070707
    w3 = 24236.194444

    score = (np.sqrt(np.mean(np.square(true[:,0] - pred[:,0]))) / w0 + 
              np.sqrt(np.mean(np.square(true[:,1] - pred[:,1]))) / w1 + 
              np.sqrt(np.mean(np.square(true[:,2] - pred[:,2]))) / w2 + 
              np.sqrt(np.mean(np.square(true[:,3] - pred[:,3]))) / w3  )
    return score

  if only_loss:
    return dacon_rmse(label.iloc[:,:4].values, pred)

  for idx, key in enumerate(val_df.columns):
    plt.plot(figsize=(20,10))
    plt.plot(label.index,pred[:,idx])
    plt.plot(label[key])
    plt.legend(['predict', 'label'])
    plt.show()
    
  loss = dacon_rmse(label.iloc[:,:4].values, pred)
  print('RMSE : ' + str(loss))

In [36]:
def trainIters(encoder, decoder, n_iters, dataset, print_every=100, plot_every=100, learning_rate=0.01, val=True):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    adam_1 =  torch.optim.Adam(encoder.parameters(), lr=learning_rate)
    adam_2 =  torch.optim.Adam(decoder.parameters(), lr=learning_rate)
 
    encoder_optimizer = SWA(adam_1, swa_start=10, swa_freq=5, swa_lr=learning_rate/2)
    decoder_optimizer = SWA(adam_2, swa_start=10, swa_freq=5, swa_lr=learning_rate/2)
    criterion = nn.MSELoss()

    inp = train_df.iloc[:-61,1:]
    label = train_df.iloc[-61:,1:5]
    best_val = float('inf')

    for iter in range(1, n_iters + 1):
        input_tensor, target_tensor = dataset.__getitem__(0)

        loss = train(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            if val:
              val_loss =  make_val_plot(encoder, decoder, inp, label)
              print('%s (%d %d%%) (%.4f %.4f)' % (timeSince(start, iter / n_iters),
                                          iter, iter / n_iters * 100, print_loss_avg,val_loss))
            else:
              print('%s (%d %d%%) (%.4f)' % (timeSince(start, iter / n_iters),
                                          iter, iter / n_iters * 100))
            
            if val_loss < best_val:
              best = (encoder.state_dict() ,decoder.state_dict())

        if iter % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0
    showPlot(plot_losses)

    #다끝나면 구글 드라이브에 저장
    torch.save(best[0], './drive/MyDrive/데이콘/encoder.ckpt')
    torch.save(best[1],  './drive/MyDrive/데이콘/decoder.ckpt')

In [None]:
input_size = 49
hidden_size = 256
output_size = 4
dropout_p = 0.8
dataset = DatasetWindows(train_df.iloc[:-61,:])

encoder = EncoderRNN(input_size, hidden_size).cuda()
attn_decoder = AttnDecoderRNN(hidden_size, output_size, dropout_p=dropout_p).cuda()

trainIters(encoder, attn_decoder, 10000, dataset, learning_rate=0.001, print_every=500)

  return F.mse_loss(input, target, reduction=self.reduction)


2m 17s (- 43m 37s) (500 5%) (1.5985 7.0720)
4m 33s (- 41m 3s) (1000 10%) (0.4640 4.1117)
6m 52s (- 38m 57s) (1500 15%) (0.4070 3.8042)
9m 8s (- 36m 34s) (2000 20%) (0.3457 3.8944)
11m 28s (- 34m 26s) (2500 25%) (0.3379 4.0281)
13m 47s (- 32m 10s) (3000 30%) (0.3184 3.6857)


In [None]:
input_size = 49
hidden_size = 256
output_size = 4
dropout_p = 0.1
dataset = DatasetWindows(train_df)

encoder = EncoderRNN(input_size, hidden_size).cuda()
attn_decoder = AttnDecoderRNN(hidden_size, output_size, dropout_p=dropout_p).cuda()

trainIters(encoder, attn_decoder, 3000, dataset, print_every=500, learning_rate=0.001,val=False)