## Import

In [1]:
import random
import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Hyperparameter Setting

In [4]:
CFG = {
    'TRAIN_WINDOW_SIZE':80, # 90일치로 학습
    'PREDICT_SIZE':21, # 21일치 예측
    'EPOCHS':5,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':20,
    'SEED':41
}

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

### 데이터 불러오기

In [6]:
train_data = pd.read_csv('./drive/MyDrive/train.csv').drop(columns=['ID', '제품'])

### 데이터 전처리

In [7]:
# # Data Scaling
# scale_max_dict = {}
# scale_min_dict = {}

# for idx in tqdm(range(len(train_data))):
#     maxi = np.max(train_data.iloc[idx,4:])
#     mini = np.min(train_data.iloc[idx,4:])

#     if maxi == mini :
#         train_data.iloc[idx,4:] = 0
#     else:
#         train_data.iloc[idx,4:] = (train_data.iloc[idx,4:] - mini) / (maxi - mini)

#     scale_max_dict[idx] = maxi
#     scale_min_dict[idx] = mini

# 숫자형 변수들의 min-max scaling을 수행하는 코드입니다.
numeric_cols = train_data.columns[4:]
# 칵 column의 min 및 max 계산
min_values = train_data[numeric_cols].min(axis=1)
max_values = train_data[numeric_cols].max(axis=1)
# 각 행의 범위(max-min)를 계산하고, 범위가 0인 경우 1로 대체
ranges = max_values - min_values
ranges[ranges == 0] = 1
# min-max scaling 수행
train_data[numeric_cols] = (train_data[numeric_cols].subtract(min_values, axis=0)).div(ranges, axis=0)
# max와 min 값을 dictionary 형태로 저장
scale_min_dict = min_values.to_dict()
scale_max_dict = max_values.to_dict()

In [8]:
# Label Encoding
label_encoder = LabelEncoder()
categorical_columns = ['대분류', '중분류', '소분류', '브랜드']

for col in categorical_columns:
    label_encoder.fit(train_data[col])
    train_data[col] = label_encoder.transform(train_data[col])

### Custom Dataset

In [9]:
class CustomDataset(Dataset):
    def __init__(self, data, train_size=CFG['TRAIN_WINDOW_SIZE'], predict_size=CFG['PREDICT_SIZE'], is_inference=False):
        self.data = data.values # convert DataFrame to numpy array
        self.train_size = train_size
        self.predict_size = predict_size
        self.window_size = self.train_size + self.predict_size
        self.is_inference = is_inference

    def __len__(self):
        if self.is_inference:
            return len(self.data)
        else:
            return self.data.shape[0] * (self.data.shape[1] - self.window_size - 3)

    def __getitem__(self, idx):
        if self.is_inference:
            # 추론 시
            encode_info = self.data[idx, :4]
            window = self.data[idx, -self.train_size:]
            input_data = np.column_stack((np.tile(encode_info, (self.train_size, 1)), window))
            return input_data
        else:
            # 학습 시
            row = idx // (self.data.shape[1] - self.window_size - 3) # 463 - window + 1 - 4 (분류,브랜드 칼럼 빼기)
            col = idx % (self.data.shape[1] - self.window_size - 3)
            encode_info = self.data[row, :4]
            sales_data = self.data[row, 4:]
            window = sales_data[col : col + self.window_size]
            input_data = np.column_stack((np.tile(encode_info, (self.train_size, 1)), window[:self.train_size]))
            target_data = window[self.train_size:]
            return input_data, target_data

In [10]:
# CustomDataset 인스턴스 생성
dataset = CustomDataset(train_data)

# 전체 데이터셋의 크기
total_size = len(dataset)

# 분리할 데이터셋의 크기 계산
train_size = int(total_size * 0.8)
val_size = total_size - train_size

# random_split 함수를 사용해 데이터셋 분리
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# DataLoader 인스턴스 생성
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

### 모델 선언

In [11]:
class DoubleConv(nn.Module):
    # 반복되는 Convolution, Batch Normalization, ReLU 모듈화
    def __init__(self, in_channel, out_channel):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv1d(in_channels=in_channel, out_channels=out_channel, kernel_size=3,
                      padding=1),
            nn.BatchNorm1d(num_features=out_channel),
            nn.ReLU(),
            nn.Conv1d(in_channels=out_channel, out_channels=out_channel, kernel_size=3,
                      padding=1),
            nn.BatchNorm1d(num_features=out_channel),
            nn.ReLU(),
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    # Down Sampling 진행, 채널이 2배씩 증가한다.
    def __init__(self, in_channel, out_channel):
        super(Down,self ).__init__()
        self.DConv = DoubleConv(in_channel, out_channel)
        self.maxpool = nn.MaxPool1d(kernel_size=2)

    def forward(self, x):
        skip = self.DConv(x)
        return skip, self.maxpool(skip)

class Up(nn.Module):
    # Up Sampling 진행, 채널이 절반으로 감소.
    def __init__(self, in_channel, out_channel):
        super(Up, self).__init__()
        self.DConv = DoubleConv(in_channel, out_channel)
        self.upsample = nn.ConvTranspose1d(in_channels=out_channel, out_channels=out_channel//2,
                                           kernel_size=2, stride=2, bias=True)

    def forward(self, x):
        x = self.DConv(x)
        return self.upsample(x)

class UNet(nn.Module):
    def __init__(self, in_channel, out_channel, batch_size=CFG['BATCH_SIZE'], window_size = CFG['TRAIN_WINDOW_SIZE'], output_size=CFG['PREDICT_SIZE']):
        super(UNet, self).__init__()
        self.down1 = Down(in_channel, out_channel)  # 1 -> 16
        self.down2 = Down(out_channel, out_channel * 2) # 16 -> 32
        self.down3 = Down(out_channel * 2, out_channel * 2**2)  # 32 -> 64
        self.down4 = Down(out_channel * 2**2, out_channel * 2**3)  # 64 -> 128

        self.down5 = DoubleConv(out_channel * 2**3, out_channel * 2**4)  # 128 -> 256
        self.unpool = nn.ConvTranspose1d(in_channels = out_channel * 2**4, out_channels = out_channel * 2**3,
                                         kernel_size=2, stride=2, bias=True)

        self.up1 = Up(out_channel * 2**4, out_channel * 2**3)   # 256 -> 128
        self.up2 = Up(out_channel * 2**3, out_channel * 2**2)   # 128 -> 64
        self.up3 = Up(out_channel * 2**2, out_channel * 2**1)   # 64 -> 32
        self.up4 = DoubleConv(out_channel * 2**1, out_channel)  # 32 -> 16
        self.up5 = nn.Conv1d(in_channels=out_channel, out_channels=in_channel, kernel_size=3, padding=1)   # 16 -> 1

        self.fc = nn.Sequential(
          nn.Linear(window_size, window_size//2),
          nn.ReLU(),
          nn.Dropout(),
          nn.Linear(window_size//2, output_size)
          )
        self.actv = nn.ReLU()

    def forward(self, x):
        # Down Sampling 과정
        x1, x = self.down1(x)
        x2, x = self.down2(x)
        x3, x = self.down3(x)
        x4, x = self.down4(x)

        # Down Sampling 종료, Up sampling을 위한 마지막 컨벌루전
        x = self.down5(x)
        x = self.unpool(x)
        # Up sampling 과정
        # Skip Connection 구현. Down sampling애서의 각 feature map을 가져와 붙여준뒤
        # 깉이 디코딩 작업을 하여 데이터의 Localization 정보를 보존한다.

        x = torch.cat((x4, x), dim = 1)
        x = self.up1(x)

        x = torch.cat((x3, x), dim = 1)
        x = self.up2(x)

        x = torch.cat((x2, x), dim = 1)
        x = self.up3(x)

        x = torch.cat((x1, x), dim = 1)
        x = self.up4(x)

        x = self.up5(x)

        last_output = x[:, -1, :]
        output = self.actv(self.fc(last_output))

        return output.squeeze(1)


# class Encoder(nn.Module):
#     def __init__(self, input_size, hidden_size, sequence_length, batch_size):
#         super(Encoder, self).__init__()
#         self.batch_size = batch_size
#         self.hidden_size = hidden_size
#         self.sequence_length = sequence_length
#         self.encoder1 = nn.LSTM(input_size, hidden_size, batch_first=True)
#         self.encoder2 = nn.LSTM(hidden_size, hidden_size//2, batch_first=True)

#     def init_hidden(self, batch_size, device):
#         # Initialize hidden state and cell state
#         return (torch.zeros(1, batch_size, self.hidden_size, device=device),
#                 torch.zeros(1, batch_size, self.hidden_size, device=device))

#     def forward(self, x):
#         # x shape: (B, TRAIN_WINDOW_SIZE, 5)
#         batch_size = x.size(0)
#         hidden = self.init_hidden(batch_size, x.device)

#         # x = (Batch_size, TRAIN_WINDOW_SIZE, 5)
#         encoded_output1, _ = self.encoder1(x, hidden)
#         _, hidden_state = self.encoder2(encoded_output1)
#         reshaped_tensor = hidden_state[0].reshape(self.batch_size, 1, self.hidden_size//2)
#         expanded_tensor = reshaped_tensor.expand(-1, self.sequence_length, -1)
#         return expanded_tensor


# class Decoder(nn.Module):
#     def __init__(self, input_size, hidden_size, output_size):
#         super(Decoder, self).__init__()
#         self.decoder1 = nn.LSTM(hidden_size//2, hidden_size//2, batch_first=True)
#         self.decoder2 = nn.LSTM(hidden_size//2, hidden_size, batch_first=True)
#         # self.timedistributed = TimeDistributed(nn.Linear(hidden_size*2, input_size))
#         self.fc = nn.Sequential(
#             nn.Linear(hidden_size, hidden_size//2),
#             nn.ReLU(),
#             nn.Dropout(),
#             nn.Linear(hidden_size//2, output_size)
#         )

#         self.actv = nn.ReLU()

#     def forward(self, x):
#         decoded_output1, _ = self.decoder1(x)   # (3, 10, 64)
#         decoded_output2, _ = self.decoder2(decoded_output1) # (3, 10, 128)
#         # distributer_output = self.timedistributed(decoded_output2)  # (3, 10, 2)
#         # Only use the last output sequence
#         last_output = decoded_output2[:, -1, :]

#         # Fully connected layer
#         output = self.actv(self.fc(last_output))

#         return output.squeeze(1)


# class LSTM_Autoencoder(nn.Module):
#     def __init__(self, input_size=5, hidden_size=512, sequence_length=5, batch_size=CFG['BATCH_SIZE'], output_size=CFG['PREDICT_SIZE']):
#         super(LSTM_Autoencoder, self).__init__()
#         self.encoder = Encoder(input_size, hidden_size, sequence_length, batch_size)
#         self.decoder = Decoder(input_size, hidden_size, output_size)

#     def forward(self, x):
#         encoded_output = self.encoder(x)
#         decoded_output = self.decoder(encoded_output)

#         return decoded_output

# class BaseModel(nn.Module):
#     def __init__(self, input_size=5, hidden_size=512, output_size=CFG['PREDICT_SIZE']):
#         super(BaseModel, self).__init__()
#         self.hidden_size = hidden_size
#         self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)

#         self.fc = nn.Sequential(
#             nn.Linear(hidden_size, hidden_size//2),
#             nn.ReLU(),
#             nn.Dropout(),
#             nn.Linear(hidden_size//2, output_size)
#         )

#         self.actv = nn.ReLU()

#     def forward(self, x):
#         # x shape: (B, TRAIN_WINDOW_SIZE, 5)
#         batch_size = x.size(0)
#         hidden = self.init_hidden(batch_size, x.device)

#         # LSTM layer
#         lstm_out, _ = self.lstm(x, hidden)
#         lstm_out2, hidden = self.lstm2(lstm_out)

#         # Only use the last output sequence
#         last_output = lstm_out[:, -1, :]

#         # Fully connected layer
#         output = self.actv(self.fc(last_output))

#         return output.squeeze(1)

#     def init_hidden(self, batch_size, device):
#         # Initialize hidden state and cell state
#         return (torch.zeros(1, batch_size, self.hidden_size, device=device),
#                 torch.zeros(1, batch_size, self.hidden_size, device=device))

# # class BaseModel(nn.Module):
# #     def __init__(self, input_size=5, hidden_size=512, output_size=CFG['PREDICT_SIZE']):
# #         super(BaseModel, self).__init__()
# #         self.hidden_size = hidden_size
# #         self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
# #         self.fc = nn.Sequential(
# #             nn.Linear(hidden_size, hidden_size//2),
# #             nn.ReLU(),
# #             nn.Dropout(),
# #             nn.Linear(hidden_size//2, output_size)
# #         )

# #         self.actv = nn.ReLU()

# #     def forward(self, x):
# #         # x shape: (B, TRAIN_WINDOW_SIZE, 5)
# #         batch_size = x.size(0)
# #         hidden = self.init_hidden(batch_size, x.device)

# #         # LSTM layer
# #         lstm_out, hidden = self.lstm(x, hidden)

# #         # Only use the last output sequence
# #         last_output = lstm_out[:, -1, :]

# #         # Fully connected layer
# #         output = self.actv(self.fc(last_output))

# #         return output.squeeze(1)

# #     def init_hidden(self, batch_size, device):
# #         # Initialize hidden state and cell state
# #         return (torch.zeros(1, batch_size, self.hidden_size, device=device),
# #                 torch.zeros(1, batch_size, self.hidden_size, device=device))

### 모델 학습

In [12]:
def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.MSELoss().to(device)
    best_loss = 9999999
    best_model = None

    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        train_mae = []
        for X, Y in tqdm(iter(train_loader)):
            X = np.swapaxes(X, 1, 2);
            X = X.float().to(device)
            Y = Y.float().to(device)

            optimizer.zero_grad()

            output = model(X)
            loss = criterion(output, Y)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        val_loss = validation(model, val_loader, criterion, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}]')

        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model
            print('Model Saved')
    return best_model

In [13]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []

    with torch.no_grad():
        for X, Y in tqdm(iter(val_loader)):
            X = np.swapaxes(X, 1, 2);
            X = X.float().to(device)
            Y = Y.float().to(device)

            output = model(X)
            loss = criterion(output, Y)

            val_loss.append(loss.item())
    return np.mean(val_loss)

## Run !!

In [14]:
input_size = 5;
hidden_size = CFG['BATCH_SIZE'];

model = UNet(input_size, hidden_size)
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
infer_model = train(model, optimizer, train_loader, val_loader, device)

  0%|          | 0/228181 [00:00<?, ?it/s]

  0%|          | 0/57046 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [0.01906] Val Loss : [0.02856]
Model Saved


  0%|          | 0/228181 [00:00<?, ?it/s]

  0%|          | 0/57046 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.01765] Val Loss : [0.02001]
Model Saved


  0%|          | 0/228181 [00:00<?, ?it/s]

  0%|          | 0/57046 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.01754] Val Loss : [0.01795]
Model Saved


  0%|          | 0/228181 [00:00<?, ?it/s]

  0%|          | 0/57046 [00:00<?, ?it/s]

Epoch : [4] Train Loss : [0.01747] Val Loss : [0.01829]


  0%|          | 0/228181 [00:00<?, ?it/s]

  0%|          | 0/57046 [00:00<?, ?it/s]

Epoch : [5] Train Loss : [0.01741] Val Loss : [0.01691]
Model Saved


## 모델 추론

In [15]:
test_dataset = CustomDataset(data=train_data, is_inference=True)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

In [16]:
def inference(model, test_loader, device):
    predictions = []

    with torch.no_grad():
        for X in tqdm(iter(test_loader)):
            X = np.swapaxes(X, 1, 2);
            X = X.float().to(device)

            output = model(X)

            # 모델 출력인 output을 CPU로 이동하고 numpy 배열로 변환
            output = output.cpu().numpy()

            predictions.extend(output)

    return np.array(predictions)

In [17]:
pred = inference(infer_model, test_loader, device)

  0%|          | 0/795 [00:00<?, ?it/s]

In [18]:
# 추론 결과를 inverse scaling
for idx in range(len(pred)):
    pred[idx, :] = pred[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]

# 결과 후처리
pred = np.round(pred, 0).astype(int)

In [23]:
pred.shape

(15890, 21)

## Submission

In [20]:
submit = pd.read_csv('./drive/MyDrive/sample_submission.csv')
submit.head()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
submit.iloc[:,1:] = pred
submit.head()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
submit.to_csv('./baseline_submit_colab.csv', index=False)