In [13]:
pip install --upgrade torch torchvision torchaudio

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [14]:
import torch
print(torch.__version__)

2.0.1+cu117


In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

In [6]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# [Hyperparameter Setting]
CFG = {
    'TRAIN_WINDOW_SIZE': 90,  # 90일치로 학습
    'PREDICT_SIZE': 21,  # 21일치 예측
    'EPOCHS': 10,
    'LEARNING_RATE': 1e-4,
    'BATCH_SIZE': 512,
    'SEED': 41,
    'HIDDEN_SIZE': 512,  # Hidden size of GRU model
    'STACKED_GRU_LAYERS': 2  # Number of stacked GRU layers
}


# [Import data]
train_data = pd.read_csv('data/train.csv').drop(columns=['ID', '제품'])

# Data Scaling
# 숫자형 변수들의 min-max scaling을 수행하는 코드입니다.
numeric_cols = train_data.columns[4:]
# 칵 column의 min 및 max 계산
min_values = train_data[numeric_cols].min(axis=1)
max_values = train_data[numeric_cols].max(axis=1)
# 각 행의 범위(max-min)를 계산하고, 범위가 0인 경우 1로 대체
ranges = max_values - min_values
ranges[ranges == 0] = 1
# min-max scaling 수행
train_data[numeric_cols] = (train_data[numeric_cols].subtract(min_values, axis=0)).div(ranges, axis=0)
# max와 min 값을 dictionary 형태로 저장
scale_min_dict = min_values.to_dict()
scale_max_dict = max_values.to_dict()

label_encoder = LabelEncoder()
categorical_columns = ['대분류', '중분류', '소분류', '브랜드']

for col in categorical_columns:
   label_encoder.fit(train_data[col])
   train_data[col] = label_encoder.transform(train_data[col])

In [7]:
# holiday effect df만들기

from datetime import datetime, timedelta

start_date = datetime(2022, 1, 1)
end_date = datetime(2023, 4, 4)

date_list = []
day_type_list = []

current_date = start_date
while current_date <= end_date:
    date_list.append(current_date.strftime('%Y-%m-%d'))
    day_type_list.append(1 if current_date.weekday() in [5, 6] else 0)
    current_date += timedelta(days=1)

# Create a DataFrame
date_df = pd.DataFrame({'Date': date_list, 'Day_Type': day_type_list})

print(date_df.tail())



holiday = train_data.copy()
holiday.drop(['브랜드', '대분류','중분류','소분류'], axis = 1, inplace = True)
i = 0
for date in holiday:
    holiday[date] = day_type_list[i]
    i +=1
    if i  > len(day_type_list):
        break

combined_df = pd.merge(train_data, holiday,left_index = True, right_index = True, how = 'left')
combined_df
train_data = combined_df.copy()
combinded_df = None

           Date  Day_Type
454  2023-03-31         0
455  2023-04-01         1
456  2023-04-02         1
457  2023-04-03         0
458  2023-04-04         0


In [8]:
train_data

Unnamed: 0,대분류,중분류,소분류,브랜드,2022-01-01_x,2022-01-02_x,2022-01-03_x,2022-01-04_x,2022-01-05_x,2022-01-06_x,...,2023-03-26_y,2023-03-27_y,2023-03-28_y,2023-03-29_y,2023-03-30_y,2023-03-31_y,2023-04-01_y,2023-04-02_y,2023-04-03_y,2023-04-04_y
0,1,6,37,0,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,0,0,1,1,0,0
1,2,7,43,1,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,0,0,1,1,0,0
2,2,7,43,1,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,0,0,1,1,0,0
3,2,7,43,1,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,0,0,1,1,0,0
4,0,0,2,2,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,0,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15885,2,7,41,3169,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,0,0,1,1,0,0
15886,2,7,43,3169,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,0,0,1,1,0,0
15887,2,7,43,3169,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,0,0,1,1,0,0
15888,2,7,43,3169,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,0,0,0,0,1,1,0,0


In [9]:
def make_train_data(data, train_size=CFG['TRAIN_WINDOW_SIZE'], predict_size=CFG['PREDICT_SIZE']):
    '''
    학습 기간 블럭, 예측 기간 블럭의 세트로 데이터를 생성
    data : 일별 판매량, 브랜드, 그리고 브랜드 키워드 카운트 정보가 있는 데이터 프레임
    train_size : 학습에 활용할 기간
    predict_size : 추론할 기간
    '''
    num_rows = len(data)
    holiday_start_index = data.columns.get_loc('2022-01-01_y')  # Find the index of the first '2022-01-01_y' column
    window_size = train_size + predict_size

    input_data = np.empty((num_rows * (459 - window_size + 1), train_size, 6)) # 6 features: 대분류, 중분류, 소분류, 브랜드, sales. holiday
    target_data = np.empty((num_rows * (459 - window_size + 1), predict_size))



    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])  # Extract encoding info ('대분류', '중분류', '소분류', '브랜드')
        sales_data = np.array(data.iloc[i, 4: holiday_start_index-1])
        holiday_data = np.array(data.iloc[i, holiday_start_index:])


        for j in range(len(sales_data) - window_size + 1):
            window = sales_data[j : j + window_size]
            temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size], holiday_data[j : j + train_size]))
            input_data[i * (459 - window_size + 1) + j] = temp_data
            target_data[i * (459 - window_size + 1) + j] = window[train_size:]


    return input_data, target_data

# 확인 완료

def make_predict_data(data, train_size=CFG['TRAIN_WINDOW_SIZE']):
    '''
    평가 데이터(Test Dataset)를 추론하기 위한 Input 데이터를 생성
    data : 일별 판매량, 브랜드, 그리고 holiday 정보가 있는 데이터 프레임
    train_size : 추론을 위해 필요한 일별 판매량 기간 (= 학습에 활용할 기간)
    '''
    num_rows = len(data)
    holiday_start_index = data.columns.get_loc('2022-01-01_y')  # Find the index of the first '2022-01-01_y' column

    input_data = np.empty((num_rows, train_size, 6))


    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])  # Extract encoding info ('대분류', '중분류', '소분류', '브랜드')
        sales_data = np.array(data.iloc[i, holiday_start_index-91:holiday_start_index-1])  # Extract daily sales data for the past 'train_size' days
        holiday_data = np.array(data.iloc[i, -train_size:])  # Extract holiday data for the past 'train_size' days

        window = sales_data[-train_size : ]
        temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size], holiday_data))
        #temp_data = np.column_stack((encode_info, sales_data, holiday_data))
        input_data[i] = temp_data

    return input_data


train_input, train_target = make_train_data(train_data)
test_input = make_predict_data(train_data)

  0%|          | 0/15890 [00:00<?, ?it/s]

  0%|          | 0/15890 [00:00<?, ?it/s]

In [5]:
#np.savez_compressed('/content/drive/MyDrive/Colab Notebooks/aimers/data/weekendtrain_input', train_input)
#np.savez_compressed('/content/drive/MyDrive/Colab Notebooks/aimers/data/weekendtrain_target', train_target)
#np.savez_compressed('/content/drive/MyDrive/Colab Notebooks/aimers/data/weekendtest_input', test_input)

In [4]:
#train_input = np.load('/content/drive/MyDrive/Colab Notebooks/aimers/data/weekendtrain_input.npz')
#train_target = np.load('/content/drive/MyDrive/Colab Notebooks/aimers/data/weekendtrain_target.npz')
#test_input = np.load('/content/drive/MyDrive/Colab Notebooks/aimers/data/weekendtest_input.npz')


In [5]:
#train_input = train_input['arr_0']
#train_target = train_target['arr_0']
#test_input = test_input['arr_0']

In [16]:
# Train / Validation Split
data_len = len(train_input)
val_input = train_input[-int(data_len * 0.2):]
val_target = train_target[-int(data_len * 0.2):]
train_input = train_input[:-int(data_len * 0.2)]
train_target = train_target[:-int(data_len * 0.2)]

# [Custom Dataset]
class CustomDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __getitem__(self, index):
        if self.Y is not None:
            return torch.Tensor(self.X[index]), torch.Tensor(self.Y[index])
        return torch.Tensor(self.X[index])

    def __len__(self):
        return len(self.X)

train_dataset = CustomDataset(train_input, train_target)
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_input, val_target)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [17]:
class AttentionGRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(AttentionGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        return out


class StackedGRUWithAttentionModel(nn.Module):
    def __init__(self, input_size=6, hidden_size=512, output_size=CFG['PREDICT_SIZE'], num_layers=CFG['STACKED_GRU_LAYERS']):
        super(StackedGRUWithAttentionModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.attention_gru = AttentionGRU(input_size, hidden_size, num_layers)
        self.attention_w = nn.Linear(hidden_size, 1)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(hidden_size // 2, output_size)
        )
        self.actv = nn.ReLU()

    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size, x.device)

        # Attention GRU layer
        gru_out = self.attention_gru(x)
        attention_weights = F.softmax(self.attention_w(gru_out), dim=1)
        attention_out = torch.sum(attention_weights * gru_out, dim=1)

        # Fully connected layer
        output = self.actv(self.fc(attention_out))

        return output.squeeze(1)

    def init_hidden(self, batch_size, device):
        # Initialize hidden state
        return torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device)



# [Model Training]
def train_stacked_gru_with_attention(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.MSELoss().to(device)
    best_loss = 9999999
    best_model = None

    for epoch in range(1, CFG['EPOCHS'] + 1):
        model.train()
        train_loss = []

        for X, Y in tqdm(iter(train_loader)):
            X = X.to(device)
            Y = Y.to(device)

            optimizer.zero_grad()

            output = model(X)
            loss = criterion(output, Y)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        val_loss = validation_stacked_gru_with_attention(model, val_loader, criterion, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}]')

        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model
            print('Model Saved')

    return best_model

def validation_stacked_gru_with_attention(model, val_loader, criterion, device):
    model.eval()
    val_loss = []

    with torch.no_grad():
        for X, Y in tqdm(iter(val_loader)):
            X = X.to(device)
            Y = Y.to(device)

            output = model(X)
            loss = criterion(output, Y)

            val_loss.append(loss.item())

    return np.mean(val_loss)



In [18]:
# [Run !!]
stacked_gru_with_attention_model = StackedGRUWithAttentionModel()
optimizer = torch.optim.Adam(params=stacked_gru_with_attention_model.parameters(), lr=CFG["LEARNING_RATE"])
trained_stacked_gru_with_attention_model = train_stacked_gru_with_attention(stacked_gru_with_attention_model, optimizer, train_loader, val_loader, device)

# [Model Inference]
test_dataset = CustomDataset(test_input, None)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

def inference(model, test_loader, device):
    predictions = []

    with torch.no_grad():
        for X in tqdm(iter(test_loader)):
            X = X.to(device)

            output = model(X)

            # Move model output to CPU and convert to numpy array
            output = output.cpu().numpy()

            predictions.extend(output)

    return np.array(predictions)

stacked_gru_with_attention_pred = inference(trained_stacked_gru_with_attention_model, test_loader, device)

# [Inverse Scaling and Post-processing]
for idx in range(len(stacked_gru_with_attention_pred)):
    stacked_gru_with_attention_pred[idx, :] = stacked_gru_with_attention_pred[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]

# Post-processing
stacked_gru_with_attention_pred = np.round(stacked_gru_with_attention_pred, 0).astype(int)

# [Submission]
submit = pd.read_csv('data/sample_submission.csv')
submit.iloc[:, 1:] = stacked_gru_with_attention_pred
submit.to_csv('data/stacked_gru_with_attention_weekend_submit.csv', index=False)


  0%|          | 0/6933 [00:00<?, ?it/s]

  0%|          | 0/1734 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [0.02601] Val Loss : [0.01928]
Model Saved


  0%|          | 0/6933 [00:00<?, ?it/s]

  0%|          | 0/1734 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.01800] Val Loss : [0.03344]


  0%|          | 0/6933 [00:00<?, ?it/s]

  0%|          | 0/1734 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.01774] Val Loss : [0.01695]
Model Saved


  0%|          | 0/6933 [00:00<?, ?it/s]

  0%|          | 0/1734 [00:00<?, ?it/s]

Epoch : [4] Train Loss : [0.01741] Val Loss : [0.01661]
Model Saved


  0%|          | 0/6933 [00:00<?, ?it/s]

  0%|          | 0/1734 [00:00<?, ?it/s]

Epoch : [5] Train Loss : [0.01725] Val Loss : [0.01651]
Model Saved


  0%|          | 0/6933 [00:00<?, ?it/s]

  0%|          | 0/1734 [00:00<?, ?it/s]

Epoch : [6] Train Loss : [0.01711] Val Loss : [0.01649]
Model Saved


  0%|          | 0/6933 [00:00<?, ?it/s]

  0%|          | 0/1734 [00:00<?, ?it/s]

Epoch : [7] Train Loss : [0.01694] Val Loss : [0.01686]


  0%|          | 0/6933 [00:00<?, ?it/s]

  0%|          | 0/1734 [00:00<?, ?it/s]

Epoch : [8] Train Loss : [0.01678] Val Loss : [0.01795]


  0%|          | 0/6933 [00:00<?, ?it/s]

  0%|          | 0/1734 [00:00<?, ?it/s]

Epoch : [9] Train Loss : [0.01658] Val Loss : [0.01782]


  0%|          | 0/6933 [00:00<?, ?it/s]

  0%|          | 0/1734 [00:00<?, ?it/s]

Epoch : [10] Train Loss : [0.01632] Val Loss : [0.01671]


  0%|          | 0/32 [00:00<?, ?it/s]

In [None]:
## csv 이름 변경 --> _all이라고 해서 브랜드랑 대분류 다 포함한거라는걸 인지

In [8]:

def inference(model, test_loader, device):
    predictions = []

    with torch.no_grad():
        for X in tqdm(iter(test_loader)):
            X = X.to(device)

            output = model(X)

            # Move model output to CPU and convert to numpy array
            output = output.cpu().numpy()

            predictions.extend(output)

    return np.array(predictions)

stacked_gru_with_attention_pred = inference(trained_stacked_gru_with_attention_model, test_loader, device)

# [Inverse Scaling and Post-processing]
for idx in range(len(stacked_gru_with_attention_pred)):
    stacked_gru_with_attention_pred[idx, :] = stacked_gru_with_attention_pred[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]

# Post-processing
stacked_gru_with_attention_pred = np.round(stacked_gru_with_attention_pred, 0).astype(int)

# [Submission]
submit = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/aimers/data/sample_submission.csv')
submit.iloc[:, 1:] = stacked_gru_with_attention_pred
submit.to_csv('/content/drive/MyDrive/Colab Notebooks/aimers/submit/stacked_gru_with_attention_submit.csv', index=False)


  0%|          | 0/32 [00:00<?, ?it/s]