In [2]:
import random
import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import warnings
warnings.filterwarnings('ignore')

In [3]:
CFG = {
    'TRAIN_WINDOW_SIZE':400, # 일치로 학습
    'PREDICT_SIZE':21, # 21일치 예측
    'EPOCHS':20,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE': 2048,
    'SEED':41
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [5]:
train_data = pd.read_csv('/Users/leeshinhaeng/Desktop/open/train.csv').drop(columns=['ID', '제품'])

In [6]:
train_data.head()

Unnamed: 0,대분류,중분류,소분류,브랜드,2022-01-01,2022-01-02,2022-01-03,2022-01-04,2022-01-05,2022-01-06,...,2023-03-26,2023-03-27,2023-03-28,2023-03-29,2023-03-30,2023-03-31,2023-04-01,2023-04-02,2023-04-03,2023-04-04
0,B002-C001-0002,B002-C002-0007,B002-C003-0038,B002-00001,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-00002,0,0,0,0,0,0,...,0,0,0,1,3,2,0,0,2,0
2,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-00002,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,B002-C001-0003,B002-C002-0008,B002-C003-0044,B002-00002,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,B002-C001-0001,B002-C002-0001,B002-C003-0003,B002-00003,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
# Data Scaling
scale_max_dict = {}
scale_min_dict = {}

for idx in tqdm(range(len(train_data))):
    # 4번째 열부터 마지막 열까지 추출
    maxi = np.max(train_data.iloc[idx,4:])
    mini = np.min(train_data.iloc[idx,4:])
    # 최대, 최소 계산 (동일한 경우 0, 아니면 MinMax)
    if maxi == mini :
        train_data.iloc[idx,4:] = 0
    else:
        train_data.iloc[idx,4:] = (train_data.iloc[idx,4:] - mini) / (maxi - mini)
    # 각 행의 최대, 최소 저장
    scale_max_dict[idx] = maxi
    scale_min_dict[idx] = mini

  0%|          | 0/15890 [00:00<?, ?it/s]

In [8]:
# Label Encoding
label_encoder = LabelEncoder()
categorical_columns = ['대분류', '중분류', '소분류', '브랜드']

# 범주형 칼럼별로 레이블 인코딩
for col in categorical_columns:
    label_encoder.fit(train_data[col])
    train_data[col] = label_encoder.transform(train_data[col])

In [9]:
def make_train_data(data, train_size=CFG['TRAIN_WINDOW_SIZE'], predict_size=CFG['PREDICT_SIZE']):
    '''
    학습 기간 블럭, 예측 기간 블럭의 세트로 데이터를 생성
    data : 일별 판매량
    train_size : 학습에 활용할 기간
    predict_size : 추론할 기간
    '''
    # 데이터 행 수 저장
    num_rows = len(data)
    # 전체 시간 간격 (학습 기간 + 예측 기간)
    window_size = train_size + predict_size
    
    # 빈 배열로 초기화
    input_data = np.empty((num_rows * (len(data.columns) - window_size + 1), train_size, len(data.iloc[0, :4]) + 1))
    target_data = np.empty((num_rows * (len(data.columns) - window_size + 1), predict_size))
    
    # 학습 데이터, 레이블 데이터 학습
    for i in tqdm(range(num_rows)):
        # 정보를 나타내는 4개 열을 encode_info로
        encode_info = np.array(data.iloc[i, :4])
        sales_data = np.array(data.iloc[i, 4:])
        # 학습 기간 블럭 : temp_data에 저장, input_data에 추가
        # 예측 기간 블럭 : windorw[train_size:]로 추출, target_data에 추가
        for j in range(len(sales_data) - window_size + 1):
            window = sales_data[j : j + window_size]
            temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
            input_data[i * (len(data.columns) - window_size + 1) + j] = temp_data
            target_data[i * (len(data.columns) - window_size + 1) + j] = window[train_size:]
    
    return input_data, target_data

In [10]:
def make_predict_data(data, train_size=CFG['TRAIN_WINDOW_SIZE']):
    '''
    평가 데이터(Test Dataset)를 추론하기 위한 Input 데이터를 생성
    data : 일별 판매량
    train_size : 추론을 위해 필요한 일별 판매량 기간 (= 학습에 활용할 기간)
    '''
    num_rows = len(data)
    
    # 초기 빈 배열로 초기화
    input_data = np.empty((num_rows, train_size, len(data.iloc[0, :4]) + 1))
    
    for i in tqdm(range(num_rows)):
        # 정보를 나타내는 4개 열을 encode_info로
        encode_info = np.array(data.iloc[i, :4])
        sales_data = np.array(data.iloc[i, -train_size:])
        
        window = sales_data[-train_size : ]
        temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
        input_data[i] = temp_data
    
    return input_data

In [11]:
train_input, train_target = make_train_data(train_data)
test_input = make_predict_data(train_data)

  0%|          | 0/15890 [00:00<?, ?it/s]

  0%|          | 0/15890 [00:00<?, ?it/s]

In [12]:
# Train / Validation Split (70,15,15)
data_len = len(train_input)
val_input = train_input[-int(data_len*0.15):]
val_target = train_target[-int(data_len*0.15):]
train_input = train_input[:-int(data_len*0.15)]
train_target = train_target[:-int(data_len*0.15)]

In [13]:
train_input.shape, train_target.shape, val_input.shape, val_target.shape, test_input.shape

((1053507, 365, 5),
 (1053507, 21),
 (185913, 365, 5),
 (185913, 21),
 (15890, 365, 5))

In [14]:
len(train_input[0])

365

In [15]:
train_target[0]

array([0.        , 0.        , 0.41666667, 0.41666667, 0.41666667,
       0.5       , 0.5       , 0.16666667, 0.16666667, 0.58333333,
       0.16666667, 0.33333333, 0.5       , 0.        , 0.33333333,
       0.        , 0.08333333, 0.16666667, 0.16666667, 0.5       ,
       0.16666667])

In [16]:
# 주어진 데이터셋을 X와 Y로 나누어 입력과 타겟을 각각 저장
# 학습과 평가를 위한 데이터셋을 편리하게 관리
class CustomDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        
    def __getitem__(self, index):
        if self.Y is not None:
            return torch.Tensor(self.X[index]), torch.Tensor(self.Y[index])
        return torch.Tensor(self.X[index])
    
    def __len__(self):
        return len(self.X)

In [17]:
train_dataset = CustomDataset(train_input, train_target)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_input, val_target)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [116]:
# # 1
# class Encoder(nn.Module):
#     def __init__(self, input_size, hidden_size):
#         super(Encoder, self).__init__()
        
#         self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size)
#         self.relu = nn.ReLU()
        
#     def forward(self, x):
#         output, hidden = self.lstm(x)
# #         output = self.fc(output[:, -1, :]) # 완전 연결층 적용
# #         output = self.relu(output)  # ReLU 적용
#         print('Encoder execute')
#         return output, hidden

# class Decoder(nn.Module):
#     def __init__(self, input_size, output_size, hidden_size):
#         super(Decoder, self).__init__()
#         self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size)
#         self.fc = nn.Linear(hidden_size, output_size)  # 변경된 부분: output_size로 출력 크기 조정
#         self.relu = nn.ReLU()
        
#     def forward(self, x, hidden):
#         output, hidden = self.lstm(x, hidden)
#         # output을 변환하여 최종적인 출력 크기 조정
#         prediction = self.fc(output[:, -1, :])  # 마지막 스텝의 출력만 사용하여 (batch_size, output_size) 크기로 변환
#         prediction = self.relu(prediction)
#         print('Decoder execute')
#         return prediction, hidden

# class LSTMAutoEncoder(nn.Module):
#     def __init__(self, input_dim, latent_dim, window_size, output_size):
#         super(LSTMAutoEncoder, self).__init__()
#         self.latent_dim = latent_dim
#         self.input_dim = input_dim
#         self.window_size = window_size
#         self.output_size = output_size
#         self.encoder = Encoder(input_size=input_dim, hidden_size=latent_dim)
#         self.decoder = Decoder(input_size=input_dim, output_size=output_size, hidden_size=latent_dim)

#     def forward(self, x):
#         encoded, hidden = self.encoder(x)
#         reconstructed, _ = self.decoder(x, hidden)
#         print('LSTMAuto ev')
#         return reconstructed  # 여기서 두 번째 반환값인 src를 반환하지 않고, 첫 번째 값인 reconstruct_output만 반환합니다.

#     def loss_function(self, *args, **kwargs) -> dict:
#         recons = args[0]
#         input = args[1]

#         # MSE loss(Mean squared Error)
#         loss = F.mse_loss(recons, input)
#         return loss

In [395]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout):
        super(Encoder, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout)
        self.fc = nn.Linear(hidden_size, hidden_size)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        output, (hidden, cell) = self.lstm(x)
        hidden = self.fc(hidden)
        hidden = self.relu(hidden)
        print('Encoder execute')
        return output, (hidden, cell)


class Decoder(nn.Module):
    def __init__(self, input_size, output_size, hidden_size, num_layers, dropout):
        super(Decoder, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        
    def forward(self, x, hidden):
        output, (hidden, cell) = self.lstm(x, hidden)
        prediction = self.fc(output[:, -1, :])
        prediction = self.relu(prediction)
        print('Decoder execute')
        return prediction, (hidden, cell)

class LSTMAutoEncoder(nn.Module):
    def __init__(self, input_dim, latent_dim, window_size, output_size, hidden_size=128, num_layers=2, dropout=0.1):
        super(LSTMAutoEncoder, self).__init__()
        self.latent_dim = latent_dim
        self.input_dim = input_dim
        self.window_size = window_size
        self.output_size = output_size
        self.encoder = Encoder(input_size=input_dim, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout)
        self.decoder = Decoder(input_size=hidden_size, output_size=output_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout)
        
        
    def forward(self, x):
        encoder_output, (hidden, cell) = self.encoder(x)
        reconstructed, _ = self.decoder(encoder_output, (hidden, cell))
        print('lstm-ae execute')
        return reconstructed

    def loss_function(self, *args, **kwargs) -> dict:
        recons = args[0]
        input = args[1]

        # MSE loss(Mean squared Error)
        loss = F.mse_loss(recons, input)
        return loss


In [396]:
# 모델 생성 및 학습 설정
input_dim = train_input.shape[-1]
output_size = train_target.shape[-1]
latent_dim = 128
window_size = train_input.shape[1]
num_layers = 2

In [397]:
for sample in train_loader:
    print(sample[0].shape)
    print(sample[1].shape)
    break

torch.Size([256, 365, 5])
torch.Size([256, 21])


In [398]:
model = LSTMAutoEncoder(input_dim=input_dim, latent_dim=latent_dim, window_size=window_size, output_size=output_size)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
optimizer = optim.Adam(model.parameters(), lr=CFG["LEARNING_RATE"])
criterion = nn.MSELoss()

In [399]:
model

LSTMAutoEncoder(
  (encoder): Encoder(
    (lstm): LSTM(5, 128, num_layers=2, dropout=0.1)
    (fc): Linear(in_features=128, out_features=128, bias=True)
    (relu): ReLU()
  )
  (decoder): Decoder(
    (lstm): LSTM(128, 128, num_layers=2, dropout=0.1)
    (fc): Linear(in_features=128, out_features=21, bias=True)
    (relu): ReLU()
  )
)

In [400]:
# 학습
model.to(device)
num_epochs = CFG['EPOCHS']

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for X, Y in tqdm(train_loader):
        X = X.to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        output = model(X)
        print(output.shape, Y.shape)
        loss = criterion(output, Y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    # 학습 손실 출력
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss/len(train_loader):.4f}")

  0%|          | 0/4116 [00:00<?, ?it/s]

Encoder execute
Decoder execute
lstm-ae execute
torch.Size([256, 21]) torch.Size([256, 21])
Encoder execute
Decoder execute
lstm-ae execute
torch.Size([256, 21]) torch.Size([256, 21])


KeyboardInterrupt: 

In [46]:
# 학습
model.to(device)
num_epochs = CFG['EPOCHS']

for epoch in range(num_epochs): 
    # 검증
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X, Y in tqdm(val_loader):
            X = X.to(device)
            Y = Y.to(device)

            output = model(X)
            loss = criterion(output, Y)

            val_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {val_loss/len(val_loader):.4f}")

  0%|          | 0/727 [00:00<?, ?it/s]

Encoder execute


RuntimeError: input.size(-1) must be equal to input_size. Expected 5, got 256

In [37]:
# 모델 추론
test_dataset = CustomDataset(test_input, None)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [42]:
def inference(model, test_loader):
    predictions = []
    
    with torch.no_grad():
        # gradient 계산하지 않도록
        for X in tqdm(iter(test_loader)):
            # 입력 데이터 전달
            output = model(X)
            
            # 모델 출력인 output을 CPU로 이동하고 numpy 배열로 변환
            output = output.cpu().numpy()
            
            predictions.extend(output)
    
    return np.array(predictions)

In [54]:
# 학습이 완료된 모델을 평가 모드로 전환
model.eval()

# test_loader를 사용하여 예측 수행
predictions = inference(model, test_loader)

  0%|          | 0/4 [00:00<?, ?it/s]

Encoder execute
Decoder execute
LSTMAuto ev
Encoder execute
Decoder execute
LSTMAuto ev
Encoder execute
Decoder execute
LSTMAuto ev
Encoder execute
Decoder execute
LSTMAuto ev


In [55]:
predictions[0]

array([ 0.0003418 , -0.2426741 , -0.02012178,  0.09120721,  0.18221998,
       -0.03150156,  0.08831762, -0.02035991,  0.06683018,  0.06009536,
        0.11842229, -0.07690976,  0.02734233,  0.00249619,  0.01447545,
       -0.01075963, -0.11198274, -0.02678966,  0.02268494,  0.05320893,
       -0.13502839], dtype=float32)

In [56]:
for idx in range(len(predictions)):
    predictions[idx, :] = predictions[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]
    
# 결과 후처리
predictions = np.round(predictions, 0).astype(int)

In [57]:
len(predictions[0])

21

In [58]:
predictions.shape

(15890, 21)

In [59]:
submit = pd.read_csv('/Users/leeshinhaeng/Desktop/open/sample_submission.csv')
submit.head()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [60]:
submit.iloc[:,1:] = predictions
submit.head()

Unnamed: 0,ID,2023-04-05,2023-04-06,2023-04-07,2023-04-08,2023-04-09,2023-04-10,2023-04-11,2023-04-12,2023-04-13,...,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24,2023-04-25
0,0,0,-3,0,1,2,0,1,0,1,...,-1,0,0,0,0,-1,0,0,1,-2
1,1,1,-1,-1,1,2,-1,1,0,1,...,-1,0,0,0,0,0,-1,0,0,-1
2,2,4,-2,-4,2,6,-7,5,0,2,...,-5,-1,0,-1,-3,2,-3,3,2,-5
3,3,8,-3,-8,4,12,-14,11,0,4,...,-9,-2,1,-3,-5,4,-6,5,4,-10
4,4,4,2,0,0,4,-4,2,2,0,...,1,0,0,-2,-2,0,-3,2,-2,-4


In [61]:
submit.to_csv('rawLSTMAE_submit.csv', index=False, encoding='utf-8-sig')