function ClickConnect(){
console.log("Working");
document.querySelector("colab-toolbar-button#connect").click()
}setInterval(ClickConnect, 2400000)

# Import

In [None]:
import random
import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# Hyperparameter Setting

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

CFG = {
    'TRAIN_WINDOW_SIZE': 100,
    'PREDICT_SIZE': 21,
    'EPOCHS': 10,
    'LEARNING_RATE': 1e-4,
    'BATCH_SIZE': 1024,
    'HIDDEN_SIZE': 256,
    'SEED': 41,
    'data_scaling': 'minmax',  # 'std'
    'enc_in':7
}

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

# data

### Load dataset

In [None]:
train_data = pd.read_csv('train.csv', encoding='utf-8').drop(columns=['ID', '제품'])
price = pd.read_csv('price.csv', encoding='utf-8', index_col=0)
b_keyword = pd.read_csv('brand_keyword_cnt.csv', encoding='utf-8')

### Data preprocessing

In [None]:
def data_scaling(df, t=CFG['data_scaling']):
    if t=='minmax':
        min_v, max_v= df.min(axis=1), df.max(axis=1)
        scale_min_dict, scale_max_dict = min_v.to_dict(), max_v.to_dict()
        max_sub_min = max_v - min_v
        max_sub_min[max_sub_min==0] = 1
        data_minmax = df.sub(min_v, axis=0).div(max_sub_min, axis=0)

        return data_minmax, scale_min_dict, scale_max_dict
    elif t=='std':
        mean_v, std_v= df.mean(axis=1), df.std(axis=1)
        scale_mean_dict, scale_std_dict = mean_v.to_dict(), std_v.to_dict()

        data_std = df.sub(mean_v, axis=0).div(std_v, axis=0).fillna(0)

        return data_std, scale_mean_dict, scale_std_dict

In [None]:
def text2float(data, cat="대분류"):
    sales_data = pd.concat([data.loc[:, cat], data.iloc[:, 4:]], axis=1)
    if cat in ['대분류', '중분류', '소분류']:
        df = sales_data.groupby(cat).sum().mean(axis=1)
        df = round((df-df.min())/(df.max()-df.min()), 5)
        return {k: df[k] for k in df.index}
    else: # cat=='브랜드'
        df = data.groupby('브랜드').sum().sum(axis=1)
        df_ = [[df.loc[k], k] for k in df.index]
        df_ = sorted(df_, key=lambda x: (x[0], x[1]))
        return {df_[i][1]: round(1/len(df_)*(i+1), 5) for i in range(len(df_))}

In [None]:
# ===== 이상값, 결측치 제거 =====
for i in range(len(train_data)):
    x, y = np.array(train_data.iloc[i, 4:]), np.array(price.iloc[i, :])
    x, y = x[x[:] != 0], y[y[:] != 0]
    if x.shape != y.shape:
        t = train_data.iloc[i, 4:]
        s = price.iloc[i, :]
        tmp = list(set(s[s==0].index)-set(t[t==0].index))
        train_data.loc[i, tmp] = 0.0

b_keyword = b_keyword.fillna(0.0)

In [None]:
# ===== text label -> float =====
cat_L = text2float(train_data, cat='대분류') # 5개
train_data['대분류'] = train_data['대분류'].apply(lambda x: cat_L[x])

cat_M = text2float(train_data, cat='중분류') # 11개
train_data['중분류'] = train_data['중분류'].apply(lambda x: cat_M[x])

cat_S = text2float(train_data, cat='소분류') # 53개
train_data['소분류'] = train_data['소분류'].apply(lambda x: cat_S[x])

cat_B = text2float(train_data, cat='브랜드') # 3170개
train_data['브랜드'] = train_data['브랜드'].apply(lambda x: cat_B[x])
b_keyword['브랜드'] = b_keyword['브랜드'].apply(lambda x: cat_B[x])

In [None]:
# ===== data scaling =====
train_data.iloc[:, 4:], scale_dict1, scale_dict2 = data_scaling(train_data.iloc[:, 4:])
price, _, _ = data_scaling(price)
b_keyword.iloc[:, 1:], _, _ = data_scaling(b_keyword.iloc[:, 1:])

### Dataset split

In [None]:
def make_train_data(data, price, brand, train_size=CFG['TRAIN_WINDOW_SIZE'], predict_size=CFG['PREDICT_SIZE'], enc_in=CFG['enc_in']):
    num_rows = len(data)
    window_size = train_size + predict_size

    input_data = np.empty((num_rows * (len(data.columns)- 4 - predict_size), enc_in))
    Y = np.empty((num_rows * (len(data.columns)- 4 - window_size + 1), predict_size))

    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])
        price_data = np.array(price.iloc[i, :])
        brand = np.array(b_keyword[b_keyword['브랜드'] == train_data['브랜드'][i]].iloc[0, 1:])
        sales_data = np.array(data.iloc[i, 4:])

        for j in range(len(sales_data)-predict_size):
            input_data[i*(len(sales_data)-predict_size)+j] = np.concatenate((encode_info, np.array([price_data[j]]), np.array([brand[j]]), np.array([sales_data[j]])))

        for j in range(len(sales_data) - window_size + 1):
            window = sales_data[j : j + window_size]
            Y[i*(len(sales_data) - window_size + 1) + j] = window[train_size:]

    return input_data, Y

In [None]:
def make_predict_data(data, price, brand, train_size=CFG['TRAIN_WINDOW_SIZE'], enc_in=CFG['enc_in']):
    num_rows = len(data)

    input_data = np.empty((num_rows, train_size, enc_in))

    for i in tqdm(range(num_rows)):
        encode_info = np.array(data.iloc[i, :4])
        price_data = np.array(price.iloc[i, -train_size:])
        brand_data = np.array(b_keyword[b_keyword['브랜드'] == train_data['브랜드'][i]].iloc[0, -train_size:])
        sales_data = np.array(data.iloc[i, -train_size:])

        input_data[i] = np.column_stack((np.tile(encode_info, (train_size, 1)), price_data, brand_data, sales_data))

    return input_data

In [None]:
train_input, train_target = make_train_data(train_data, price, b_keyword)
test_input = make_predict_data(train_data, price, b_keyword)
print(train_input.shape, train_target.shape, test_input.shape)

### Custom Dataset

In [None]:
class CustomDataset(Dataset):
    def __init__(self, X, Y, train_size=CFG['TRAIN_WINDOW_SIZE'], predict_size=CFG['PREDICT_SIZE']):
        self.X = X
        self.Y = Y
        self.train_size = train_size
        self.n1 = 459 - train_size - predict_size + 1
        self.n2 = 459 - predict_size

    def __getitem__(self, index):
        if self.Y is None:
            return torch.Tensor(self.X[index])
        else:
            s = (index//self.n1)*self.n2 + index%self.n1
            X_ = self.X[s:s+self.train_size]
            return torch.Tensor(X_), torch.Tensor(self.Y[index])

    def __len__(self):
        if self.Y is None:
            return len(self.X)
        else:
            return len(self.Y)

In [None]:
dataset = CustomDataset(train_input, train_target)

# 전체 데이터셋의 크기
total_size = len(dataset)

# 분리할 데이터셋의 크기 계산
train_size = int(total_size * 0.8)
val_size = total_size - train_size

# Model

### LSTM (baseline)

In [None]:
class LSTM_Model(nn.Module):
    def __init__(self, input_size=CFG['enc_in'], hidden_size=CFG['HIDDEN_SIZE'], output_size=CFG['PREDICT_SIZE'], num_layers=1):
        super(LSTM_Model, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=num_layers)
        self.linear = nn.Sequential(
            nn.Linear(hidden_size, hidden_size//2),
            nn.BatchNorm1d(hidden_size//2),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(hidden_size//2, hidden_size//4),
            nn.Linear(hidden_size//4, output_size),
            nn.ReLU()
        )

    def forward(self, x):
        '''
        ***** shape *****
        x: (b_s, TRAIN_WINDOW_SIZE, enc_in)
        lstm_out: (b_s, seq_len, h_s)
        hidden: ((num_layers, b_s, h_s), (num_layers, b_s, h_s))
        output: (b_s, o_s)
        '''

        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size, x.device)

        # LSTM layer
        lstm_out, hidden = self.lstm(x, hidden)

        # Only use the last output sequence
        last_output = lstm_out[:, -1, :]

        # Fully connected layer
        output = self.linear(last_output)

        return output.squeeze(1)  # 차원 1 제거

    def init_hidden(self, batch_size, device):
        # Initialize hidden state and cell state
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device),
                torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device))

### GRU

In [None]:
class GRU_Model(nn.Module):
    def __init__(self, input_size=CFG['enc_in'], hidden_size=CFG['HIDDEN_SIZE'], output_size=CFG['PREDICT_SIZE'], num_layers=1):
        super(GRU_Model, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True, num_layers=num_layers)

        # fc layer, batchnorm1d 추가했고, default p=0.5에서 0.2로 변경했습니다.
        self.linear = nn.Sequential(
            nn.Linear(hidden_size, hidden_size//2),
            nn.BatchNorm1d(hidden_size//2),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(hidden_size//2, hidden_size//4),
            nn.BatchNorm1d(hidden_size//4),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(hidden_size//4, output_size),
            nn.ReLU()
        )

    def forward(self, x):
        '''
        ***** shape *****
        x: (b_s, TRAIN_WINDOW_SIZE, enc_in)
        lstm_out: (b_s, seq_len, h_s)
        hidden: (num_layers, b_s, h_s)
        output: (b_s, o_s)
        '''

        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size, x.device)

        # GRU layer
        gru_out, hidden = self.gru(x, hidden)

        # Only use the last output sequence
        last_output = gru_out[:, -1, :]

        # Fully connected layer
        output = self.linear(last_output)

        return output.squeeze(1)  # 차원 1 제거

    def init_hidden(self, batch_size, device):
        # Initialize hidden state
        return torch.zeros(self.num_layers, batch_size, self.hidden_size, device=device)

# Train & Inference

### train & validation

In [None]:
def train(model, optimizer, dataset, device, last_epoch=0):
    model.to(device)
    criterion = nn.MSELoss().to(device)
    best_loss = 9999999
    best_model = None
    train_losses = []
    val_losses = []

    for epoch in range(last_epoch+1, last_epoch + CFG['EPOCHS']+1):
        # random_split 함수를 사용해 데이터셋 분리
        generator = torch.Generator().manual_seed(epoch%CFG['SEED'])
        train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

        # DataLoader 인스턴스 생성
        train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

        model.train()
        train_loss = []
        train_mae = []
        for X, Y in tqdm(iter(train_loader)):
            X = X.to(device)
            Y = Y.to(device)

            optimizer.zero_grad()

            output = model(X)
            loss = criterion(output, Y)

            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        val_loss = validation(model, val_loader, criterion, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}]')
        train_losses.append(np.mean(train_loss))
        val_losses.append(val_loss)

        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model
            torch.save({
                'epoch': epoch,
                'model_state_dict': best_model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, f'epoch{epoch}_model.pth')
            print('Model Saved')
        elif epoch == CFG['EPOCHS']+last_epoch:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, f'epoch{epoch}_model.pth')
            print('last epoch Model Saved')

    return best_model, train_losses, val_losses

In [None]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []

    with torch.no_grad():
        for X, Y in tqdm(iter(val_loader)):
            X = X.to(device)
            Y = Y.to(device)

            output = model(X)
            loss = criterion(output, Y)

            val_loss.append(loss.item())
    return np.mean(val_loss)

### Run

LSTM n_layers2, hs=512, bs=4096
- Epoch : [16] Train Loss : [0.01645] Val Loss : [0.01695] **0.5285677486**

GRU n_layers=2, hs=256, train_window_size=100, bs=4096
- Epoch : [40] Train Loss : [0.01598] Val Loss : [0.01564] **best - 0.5441720878**
- Epoch : [60] Train Loss : [0.01490] Val Loss : [0.01460]

GRU n_layers=2, hs=256, train_window_size=90, linear, batchnorm1d 추가, dropout p=0.2로 변경, bs=1024
- Epoch : [30] Train Loss : [0.01454] Val Loss : [0.01429] **0.5317649514**
- Epoch : [40] Train Loss : [0.01336] Val Loss : [0.01300]

In [None]:
# ===== model & optimizer =====
# model = LSTM_Model(num_layers=2)
model = GRU_Model(num_layers=2)
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])

# ===== train =====
infer_model, train_losses, val_losses = train(model, optimizer, dataset, device)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(13,5))
plt.title('train & validation loss')
plt.plot(train_losses, color='b', alpha=0.5)
plt.plot(val_losses, color='g', alpha=0.5)
plt.show()

### Inference

In [None]:
def inference(model, test_loader, device):
    predictions = []

    with torch.no_grad():
        for X in tqdm(iter(test_loader)):
            X = X.to(device)

            output = model(X)

            # 모델 출력인 output을 CPU로 이동하고 numpy 배열로 변환
            output = output.cpu().numpy()

            predictions.extend(output)

    return np.array(predictions)

In [None]:
def inverse_scaling(pred, t=CFG['data_scaling']):
    for idx in range(len(pred)):
        if t == "minmax":
            pred[idx, :] = pred[idx, :] * (scale_dict2[idx] - scale_dict1[idx]) + scale_dict1[idx]
        elif t == "std":
            pred[idx, :] = pred[idx, :] * scale_dict2[idx] + scale_dict1[idx]

    return pred

In [None]:
test_dataset = CustomDataset(test_input, None)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

pred = inference(infer_model, test_loader, device)

# 추론 결과를 inverse scaling
pred = inverse_scaling(pred)

# 결과 후처리
pred = np.round(pred, 0).astype(int)

# 결과 저장
submit = pd.read_csv('sample_submission.csv')
submit.iloc[:,1:] = pred
submit.to_csv('baseline_submit.csv', index=False)

## load_state_dict

In [None]:
# 저장된 모델 불러오기
checkpoint = torch.load('epoch30_model.pth')
model = GRU_Model(num_layers=2).to(device)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
for state in optimizer.state.values():
    for k, v in state.items():
        if torch.is_tensor(v):
            state[k] = v.to(device)
last_epoch = checkpoint['epoch']

In [None]:
# ===== 추가 학습 =====
infer_model, train_losses, val_losses = train(model, optimizer, dataset, device, last_epoch=last_epoch)

In [None]:
test_dataset = CustomDataset(test_input, None)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

pred = inference(infer_model, test_loader, device)

# 추론 결과를 inverse scaling
pred = inverse_scaling(pred)

# 결과 후처리
pred = np.round(pred, 0).astype(int)

# 결과 저장
submit = pd.read_csv('sample_submission.csv')
submit.iloc[:,1:] = pred
submit.to_csv('GRU_nlayers2_hs256.csv', index=False)

# PSFA

In [None]:
indexs_bigcat={}
for bigcat in train_data['대분류'].unique():
    indexs_bigcat[bigcat] = list(train_data.loc[train_data['대분류']==bigcat].index)

indexs_bigcat.keys()

dict_keys([1.0, 0.0114, 0.40577, 0.05596, 0.0])

In [None]:
def PSFA(pred, target):
    PSFA = 1
    for cat in range(5):
        ids = indexs_bigcat[cat]
        for day in range(21):
            total_sell = np.sum(target[ids, day]) # day별 총 판매량
            pred_values = pred[ids, day] # day별 예측 판매량
            target_values = target[ids, day] # day별 실제 판매량

            # 실제 판매와 예측 판매가 같은 경우 오차가 없는 것으로 간주
            denominator = np.maximum(target_values, pred_values)
            diffs = np.where(denominator!=0, np.abs(target_values - pred_values) / denominator, 0)

            if total_sell != 0:
                sell_weights = target_values / total_sell  # Item별 day 총 판매량 내 비중
            else:
                sell_weights = np.ones_like(target_values) / len(ids)  # 1 / len(ids)로 대체

            if not np.isnan(diffs).any():  # diffs에 NaN이 없는 경우에만 PSFA 값 업데이트
                PSFA -= np.sum(diffs * sell_weights) / (21 * 5)


    return PSFA

In [None]:
def psfa_validation(model, dataset, epoch, device):
    pred = []
    target = []
    data_loader = DataLoader(dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

    with torch.no_grad():
        for X, Y in tqdm(iter(val_loader)):
            X = X.to(device)
            Y = Y.to(device)

            Y = Y.cpu().numpy()
            target.extend(Y)

            output = model(X)
            output = output.cpu().numpy()
            pred.extend(output)

    pred = np.array(pred)
    target = np.array(target)


    # 추론 결과를 inverse scaling
    pred = inverse_scaling(pred)
    target = inverse_scaling(target)

    # 결과 후처리
    pred = np.round(pred, 0).astype(int)
    target = np.round(target, 0).astype(int)

    return PSFA(pred, target)

In [None]:
model_path = '/content/drive/MyDrive/checkpoint/GRU_nlayers2_hs256_bs4096/0.5441720878/epoch40_model.pth'
checkpoint = torch.load(model_path)
model_ = GRU_Model(num_layers=2).to(device)
model_.load_state_dict(checkpoint['model_state_dict'])
optimizer_ = torch.optim.Adam(params = model_.parameters(), lr = CFG["LEARNING_RATE"])
optimizer_.load_state_dict(checkpoint['optimizer_state_dict'])
for state in optimizer_.state.values():
    for k, v in state.items():
        if torch.is_tensor(v):
            state[k] = v.to(device)
epoch = checkpoint['epoch']

score = psfa_validation(model, dataset, epoch, device)
print(score)

  0%|          | 0/264 [00:00<?, ?it/s]

1077342


KeyError: ignored