## Import

In [119]:
import random
import os
import pandas as pd
import numpy as np

from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [120]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [121]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 3090'

In [122]:
torch.cuda.is_available()

True

In [123]:
torch.__version__

'2.0.1+cu118'

In [124]:
# torch.cuda.empty_cache()

## Hyperparameter Setting

In [161]:
CFG = {
    'TRAIN_WINDOW_SIZE':90, # 90일치로 학습  초기는 90일이였음 
    'PREDICT_SIZE':21, # 21일치 예측
    'EPOCHS':50,
    'LEARNING_RATE':0.0001,
    'BATCH_SIZE':4096,
    'SEED':41
}

In [162]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

### 데이터 불러오기

In [163]:
train_data = pd.read_csv('E:/LG/LG_data/LG/train.csv').drop(columns=['ID',"제품",'대분류',"중분류","소분류"])

In [164]:
train_data.head()

Unnamed: 0,브랜드,쇼핑몰,2022-01-01,2022-01-02,2022-01-03,2022-01-04,2022-01-05,2022-01-06,2022-01-07,2022-01-08,...,2023-04-15,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24
0,B002-00001,S001-00001,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,B002-00002,S001-00001,0,0,0,0,0,0,0,0,...,2,0,2,0,2,2,1,0,0,0
2,B002-00002,S001-00001,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,B002-00002,S001-00001,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,B002-00002,S001-00010,0,0,0,0,0,0,0,0,...,10,6,4,4,4,0,0,0,0,8


# EDA

In [165]:
train_data.head(2)

Unnamed: 0,브랜드,쇼핑몰,2022-01-01,2022-01-02,2022-01-03,2022-01-04,2022-01-05,2022-01-06,2022-01-07,2022-01-08,...,2023-04-15,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24
0,B002-00001,S001-00001,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,B002-00002,S001-00001,0,0,0,0,0,0,0,0,...,2,0,2,0,2,2,1,0,0,0


In [166]:
train_data.isnull().sum()

브랜드           0
쇼핑몰           0
2022-01-01    0
2022-01-02    0
2022-01-03    0
             ..
2023-04-20    0
2023-04-21    0
2023-04-22    0
2023-04-23    0
2023-04-24    0
Length: 481, dtype: int64

In [167]:
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28894 entries, 0 to 28893
Columns: 481 entries, 브랜드 to 2023-04-24
dtypes: int64(479), object(2)
memory usage: 106.0+ MB


In [168]:
train_data.shape

(28894, 481)

In [169]:
train_data.size

13898014

### 데이터 전처리

In [170]:
import pandas as pd

# 더 작은 값을 추가하여 분모가 0인 경우 방지하는 상수 정의
EPSILON = 1e-8

numeric_cols = train_data.columns[2:]

# 칵 column의 min 및 max 계산
min_values = train_data[numeric_cols].min(axis=1)
max_values = train_data[numeric_cols].max(axis=1)

# 각 행의 범위(max-min)를 계산하고, 범위가 0인 경우 EPSILON 더해주기
ranges = max_values - min_values + EPSILON
# min-max scaling 수행
scaled_data = (train_data[numeric_cols].subtract(min_values, axis=0)).div(ranges, axis=0)

# 스케일링된 데이터로 업데이트
train_data[numeric_cols] = scaled_data

# max와 min 값을 dictionary 형태로 저장
scale_min_dict = min_values.to_dict()
scale_max_dict = max_values.to_dict()


In [171]:
train_data.columns

Index(['브랜드', '쇼핑몰', '2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
       '2022-01-05', '2022-01-06', '2022-01-07', '2022-01-08',
       ...
       '2023-04-15', '2023-04-16', '2023-04-17', '2023-04-18', '2023-04-19',
       '2023-04-20', '2023-04-21', '2023-04-22', '2023-04-23', '2023-04-24'],
      dtype='object', length=481)

In [172]:
# Label Encoding
label_encoder = LabelEncoder()
categorical_columns = ['쇼핑몰', '브랜드']

for col in categorical_columns:
    label_encoder.fit(train_data[col])
    train_data[col] = label_encoder.transform(train_data[col])

In [187]:
# def make_train_data(data, train_size=CFG['TRAIN_WINDOW_SIZE'], predict_size=CFG['PREDICT_SIZE']):
#     '''
#     학습 기간 블럭, 예측 기간 블럭의 세트로 데이터를 생성
#     data : 일별 판매량
#     train_size : 학습에 활용할 기간
#     predict_size : 추론할 기간
#     '''
#     num_rows = len(data)
#     window_size = train_size + predict_size
    
#     input_data = np.empty((num_rows * (len(data.columns) - window_size + 1), train_size, len(data.iloc[0, :2]) + 1))
#     target_data = np.empty((num_rows * (len(data.columns) - window_size + 1), predict_size))
    
#     for i in tqdm(range(num_rows)):
#         encode_info = np.array(data.iloc[i, :2])
#         sales_data = np.array(data.iloc[i, 2:])
        
#         for j in range(len(sales_data) - window_size + 1):
#             window = sales_data[j : j + window_size]
#             temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
#             input_data[i * (len(data.columns) - window_size + 1) + j] = temp_data
#             target_data[i * (len(data.columns) - window_size + 1) + j] = window[train_size:]
    
#     return input_data, target_data

In [197]:
def make_train_data(data, train_size=CFG['TRAIN_WINDOW_SIZE'], predict_size=CFG['PREDICT_SIZE'], overlap=21):
    num_rows = len(data)
    window_size = train_size + predict_size
    
    num_windows = (len(data.columns) - window_size) // overlap + 1  # 겹치는 창의 개수
    
    input_data = np.empty((num_rows * num_windows, train_size, len(data.iloc[0, :2]) + 1))
    target_data = np.empty((num_rows * num_windows, predict_size))
    
    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :2])
        sales_data = np.array(data.iloc[i, 2:])
        
        for j in range(0, len(sales_data) - window_size + 1, overlap):
            start_idx = j
            end_idx = j + window_size
            
            if end_idx <= len(sales_data):
                window = sales_data[start_idx:end_idx]
                temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
                input_data[i * num_windows + j // overlap] = temp_data
                target_data[i * num_windows + j // overlap] = window[train_size:]
    
    return input_data, target_data


In [198]:
# def make_predict_data(data, train_size=CFG['TRAIN_WINDOW_SIZE']):
#     '''
#     평가 데이터(Test Dataset)를 추론하기 위한 Input 데이터를 생성
#     data : 일별 판매량
#     train_size : 추론을 위해 필요한 일별 판매량 기간 (= 학습에 활용할 기간)
#     '''
#     num_rows = len(data)
    
#     input_data = np.empty((num_rows, train_size, len(data.iloc[0, :2]) + 1))
    
#     for i in tqdm(range(num_rows)):
#         encode_info = np.array(data.iloc[i, :2])
#         sales_data = np.array(data.iloc[i, -train_size:])
        
#         window = sales_data[-train_size : ]
#         temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
#         input_data[i] = temp_data
    
#     return input_data

In [199]:
def make_predict_data(data, train_size=CFG['TRAIN_WINDOW_SIZE'], overlap=21):
    '''
    평가 데이터(Test Dataset)를 추론하기 위한 Input 데이터를 생성
    data : 일별 판매량
    train_size : 추론을 위해 필요한 일별 판매량 기간 (= 학습에 활용할 기간)
    overlap : 겹치는 기간
    '''
    num_rows = len(data)
    
    input_data = []
    
    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :2])
        sales_data = np.array(data.iloc[i, -train_size:])
        
        for j in range(0, train_size, overlap):
            start_idx = j
            end_idx = j + train_size
            
            if end_idx <= train_size:
                window = sales_data[start_idx:end_idx]
                temp_data = np.column_stack((np.tile(encode_info, (train_size, 1)), window[:train_size]))
                input_data.append(temp_data)
    
    input_data = np.array(input_data)
    
    return input_data


In [200]:
train_data.tail()

Unnamed: 0,브랜드,쇼핑몰,2022-01-01,2022-01-02,2022-01-03,2022-01-04,2022-01-05,2022-01-06,2022-01-07,2022-01-08,...,2023-04-15,2023-04-16,2023-04-17,2023-04-18,2023-04-19,2023-04-20,2023-04-21,2023-04-22,2023-04-23,2023-04-24
28889,2893,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28890,2894,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28891,2894,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00813,0.01626,0.004065,0.0,0.0,0.0,0.01626,0.0,0.00813,0.0
28892,2894,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28893,2894,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.035714,0.0,0.035714,0.0,0.035714,0.071429,0.0


In [201]:
train_input, train_target = make_train_data(train_data)
test_input = make_predict_data(train_data)

  0%|          | 0/28894 [00:00<?, ?it/s]

100%|██████████| 28894/28894 [00:56<00:00, 512.14it/s]
100%|██████████| 28894/28894 [00:44<00:00, 653.98it/s]


In [202]:
data_len = len(train_input)
val_ratio = 0.2


val_len = int(data_len * val_ratio)


val_input = train_input[-val_len:]
val_target = train_target[-val_len:]

train_input = train_input[:-val_len]
train_target = train_target[:-val_len]


In [203]:
train_input.shape, train_target.shape, val_input.shape, val_target.shape, test_input.shape

((416074, 90, 3), (416074, 21), (104018, 90, 3), (104018, 21), (28894, 90, 3))

### Custom Dataset

In [204]:
class CustomDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        
    def __getitem__(self, index):
        if self.Y is not None:
            return torch.Tensor(self.X[index]), torch.Tensor(self.Y[index])
        return torch.Tensor(self.X[index])
    
    def __len__(self):
        return len(self.X)

In [205]:
train_dataset = CustomDataset(train_input, train_target)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(val_input, val_target)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

### 모델 선언

# Layer Normalization

In [206]:
import torch
import torch.nn as nn

class ImprovedModelWithCNN(nn.Module):
    def __init__(self, input_size=3, hidden_size=512, num_layers=2, output_size=CFG['PREDICT_SIZE'], dropout_prob=0.2):
        super(ImprovedModelWithCNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # 1D CNN 레이어 추가
        self.cnn = nn.Sequential(
            nn.Conv1d(input_size, hidden_size, kernel_size=20, padding=1, stride = 1),  # 1D 합성곱 레이어
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=5, stride = 1),  # 맥스 풀링 레이어
            nn.Conv1d(hidden_size, hidden_size, kernel_size=20, padding=1, stride = 1),  # 1D 합성곱 레이어
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=5, stride = 1),  # 맥스 풀링 레이어
            nn.Conv1d(hidden_size, hidden_size, kernel_size=20, padding=1, stride = 1),  # 1D 합성곱 레이어
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=5, stride = 1)  # 맥스 풀링 레이어
            
        )
        
        self.gru = nn.GRU(hidden_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.ln = nn.LayerNorm(hidden_size)  # Layer Normalization
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Linear(hidden_size // 2, output_size)
        )
        self.actv = nn.ReLU()

    def forward(self, x):
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size, x.device)

        # 1D CNN 레이어 적용
        x = self.cnn(x.permute(0, 2, 1)).permute(0, 2, 1)

        gru_out, hidden = self.gru(x, hidden)
        gru_out = self.dropout(gru_out)
        gru_out = self.ln(gru_out)  # Layer Normalization 적용

        last_output = gru_out[:, -1, :]

        output = self.actv(self.fc(last_output))

        return output.squeeze(1)

    def init_hidden(self, batch_size, device):
        return torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device)


### 모델 학습

In [207]:
def train(model, optimizer, train_loader, val_loader, device, scheduler=None):
    model.to(device)
    criterion = nn.MSELoss().to(device)
    best_loss = float('inf')
    best_model = None
    early_stopping_counter = 0
    max_early_stopping = 7  # Max number of consecutive epochs with increasing validation loss
    
    for epoch in range(1, CFG['EPOCHS'] + 1):
        model.train()
        train_loss = []
        train_mae = []

        for X, Y in tqdm(iter(train_loader)):
            X = X.to(device)
            Y = Y.to(device)

            optimizer.zero_grad()

            output = model(X)
            loss = criterion(output, Y)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())


        
        val_loss = validation(model, val_loader, criterion, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}]')

        if scheduler is not None:
            scheduler.step(val_loss)  # Update learning rate based on scheduler's policy
        
        if val_loss < best_loss:
            best_loss = val_loss
            best_model = model
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= max_early_stopping:
                print(f'Early stopping triggered at epoch {epoch}')
                break

        

    return best_model



In [208]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []
    
    with torch.no_grad():
        for X, Y in tqdm(iter(val_loader)):
            X = X.to(device)
            Y = Y.to(device)
            
            output = model(X)
            loss = criterion(output, Y)
            
            val_loss.append(loss.item())
    return np.mean(val_loss)

## Run !!

In [209]:
import torch.optim.lr_scheduler as lr_scheduler




model = ImprovedModelWithCNN()
optimizer = torch.optim.RAdam(params=model.parameters(), lr=CFG["LEARNING_RATE"])
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.7, verbose=True)

# 이전에 정의한 함수로 train을 수행하고, 각 epoch 후에 모델을 저장할 준비를 합니다.
for epoch in range(CFG["EPOCHS"]):
    infer_model = train(model, optimizer, train_loader, val_loader, device, scheduler)
    
    # 모델을 저장할 경로를 지정합니다. epoch 번호를 이용하여 파일 이름을 생성합니다.
    model_save_path = f'./models/infer_model_epoch_{epoch + 1}.pt'
    
    # 모델을 저장합니다.
    torch.save(infer_model.state_dict(), model_save_path)


100%|██████████| 102/102 [01:17<00:00,  1.31it/s]
100%|██████████| 26/26 [00:07<00:00,  3.61it/s]


Epoch : [1] Train Loss : [0.02915] Val Loss : [0.02527]


100%|██████████| 102/102 [00:59<00:00,  1.73it/s]
100%|██████████| 26/26 [00:06<00:00,  4.23it/s]


Epoch : [2] Train Loss : [0.02635] Val Loss : [0.02465]


100%|██████████| 102/102 [00:59<00:00,  1.72it/s]
100%|██████████| 26/26 [00:06<00:00,  4.31it/s]


Epoch : [3] Train Loss : [0.02522] Val Loss : [0.02441]


100%|██████████| 102/102 [00:58<00:00,  1.75it/s]
100%|██████████| 26/26 [00:05<00:00,  4.38it/s]


Epoch : [4] Train Loss : [0.02500] Val Loss : [0.02452]


100%|██████████| 102/102 [00:58<00:00,  1.75it/s]
100%|██████████| 26/26 [00:06<00:00,  4.02it/s]


Epoch : [5] Train Loss : [0.02489] Val Loss : [0.02443]


 30%|███       | 31/102 [00:17<00:40,  1.74it/s]

## 그 모델을 마지막에 최근에 연구에서 나온 n_liner 그리고 lstm을 앙상블한다

## 그리고 모델을 pafa???그거 하는 법알자


## 모델을 데이터 전처리하면서 갑자기 0으로 변하는 값을 보간하게하자

## 데이터에 새로운 컬럼추가 ex) 주 별로 만들었을걸?? 

## 모델 추론

In [111]:
test_dataset = CustomDataset(test_input, None)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [112]:
def inference(model, test_loader, device):
    predictions = []
    
    with torch.no_grad():
        for X in tqdm(iter(test_loader)):
            X = X.to(device)
            
            output = model(X)
            
            # 모델 출력인 output을 CPU로 이동하고 numpy 배열로 변환
            output = output.cpu().numpy()
            
            predictions.extend(output)
    
    return np.array(predictions)

In [113]:
pred = inference(infer_model, test_loader, device)

100%|██████████| 8/8 [00:00<00:00,  9.90it/s]


In [114]:
# 추론 결과를 inverse scaling
for idx in range(len(pred)):
    pred[idx, :] = pred[idx, :] * (scale_max_dict[idx] - scale_min_dict[idx]) + scale_min_dict[idx]
    
# 결과 후처리
pred = np.round(pred, 0).astype(int)

In [115]:
pred.shape

(28894, 21)

## Submission

In [116]:
submit = pd.read_csv('E:/LG/LG_data/LG/sample_submission.csv')
submit.head()

Unnamed: 0,ID,2023-04-25,2023-04-26,2023-04-27,2023-04-28,2023-04-29,2023-04-30,2023-05-01,2023-05-02,2023-05-03,...,2023-05-06,2023-05-07,2023-05-08,2023-05-09,2023-05-10,2023-05-11,2023-05-12,2023-05-13,2023-05-14,2023-05-15
0,SAMPLE_00000,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,SAMPLE_00001,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,SAMPLE_00002,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,SAMPLE_00003,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,SAMPLE_00004,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [117]:
submit.iloc[:,1:] = pred
submit.head()

Unnamed: 0,ID,2023-04-25,2023-04-26,2023-04-27,2023-04-28,2023-04-29,2023-04-30,2023-05-01,2023-05-02,2023-05-03,...,2023-05-06,2023-05-07,2023-05-08,2023-05-09,2023-05-10,2023-05-11,2023-05-12,2023-05-13,2023-05-14,2023-05-15
0,SAMPLE_00000,0,0,0,0,2,3,0,0,0,...,0,0,4,0,3,0,2,0,5,2
1,SAMPLE_00001,0,0,0,0,1,3,0,0,0,...,0,1,2,0,1,0,3,0,5,2
2,SAMPLE_00002,0,0,0,0,5,11,0,0,0,...,0,6,8,0,6,0,12,0,21,8
3,SAMPLE_00003,0,0,0,0,11,22,0,0,0,...,0,12,15,0,13,0,25,0,42,17
4,SAMPLE_00004,0,0,0,0,43,44,0,0,0,...,0,50,91,0,63,0,55,0,136,35


In [118]:
submit.to_csv('E:/LG/LG_data/models/036_ReLU_layer4.csv', index=False)

In [None]:
### 시도할 경우의 수는 임베딩이 안된다 후하 그럼 이제는 다른 것을 적용해보자  //음... 

# 1. 학습률 다르게 해보기 

# 1. 기준 조건에서 150 일로 증가  
# 2. 다른 특징 추가로 넣기
# 3. 