In [1]:
import os
import glob
import re
import random
import numpy as np
import pandas as pd
from datetime import timedelta

import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm

  from pandas.core import (



### Seed & Hyperparams


In [2]:
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
set_seed(42)
LOOKBACK, PREDICT = 28, 7
BATCH_SIZE, EPOCHS = 16, 40
LR = 1e-3
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


### Utils

In [13]:
def _read_train():
    path = './train/train.csv'
    if not os.path.exists(path):
        path = './train.csv'
    df = pd.read_csv(path)
    # ensure datetime
    df['영업일자'] = pd.to_datetime(df['영업일자'])
    return df

In [14]:
def _split_restaurant(name: str) -> str:
    # restaurant is the prefix before last underscore
    if isinstance(name, str) and '_' in name:
        return name.rsplit('_', 1)[0]
    return name

In [15]:
def _dow_one_hot(dates: np.ndarray) -> np.ndarray:
    # Monday=0..Sunday=6 -> one-hot size 7
    dows = pd.to_datetime(dates).weekday.values if hasattr(dates, "values") else pd.to_datetime(dates).weekday
    out = np.zeros((len(dates), 7), dtype=np.float32)
    for i, d in enumerate(dows):
        out[i, int(d)] = 1.0
    return out

In [16]:
def same_dow_median_from_last28(dates_28: np.ndarray, values_28: np.ndarray, horizon=7):
    dates_28 = pd.to_datetime(dates_28)
    med = {}
    for dow in range(7):
        mask = (dates_28.weekday == dow)
        arr = values_28[mask]
        if len(arr) > 0:
            med[dow] = float(np.median(arr))
    # fallback overall median
    overall = float(np.median(values_28)) if len(values_28) else 0.0
    last_date = dates_28.max()
    preds = []
    for h in range(1, horizon+1):
        dow = int((last_date + timedelta(days=h)).weekday())
        preds.append(med.get(dow, overall))
    return np.array(preds, dtype=np.float32)


In [17]:
def postprocess_from_last28(yhat: np.ndarray, last28_vals: np.ndarray, eps_floor=1.0, cap_quantile=99, cap_scale=1.2):
    # negatives -> 0
    yhat = np.maximum(yhat, 0.0)
    # epsilon floor (helps SMAPE when true > 0 but pred = 0)
    yhat = np.where(yhat == 0.0, eps_floor, yhat)
    # gentle cap to avoid huge over-forecast
    if len(last28_vals) > 0:
        cap = np.percentile(last28_vals, cap_quantile) * cap_scale
        if np.isfinite(cap) and cap > 0:
            yhat = np.minimum(yhat, cap)
    return yhat

### Model

In [18]:
class MultiOutputLSTM(nn.Module):
    def __init__(self, input_dim=8, hidden_dim=64, num_layers=2, output_dim=7, dropout=0.1):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)     # (B, T, H)
        out = self.fc(out[:, -1]) # (B, O)
        return out

### Training per item

In [19]:
def train_lstm(train_df: pd.DataFrame):
    trained = {}
    for key, g in tqdm(train_df.groupby('영업장명_메뉴명'), desc='Training LSTM (per item)'):
        g = g.sort_values('영업일자').copy()
        if len(g) < LOOKBACK + PREDICT:
            continue

        # scaler on qty only
        scaler = MinMaxScaler()
        qty = g[['매출수량']].values.astype(np.float32)
        qty_scaled = scaler.fit_transform(qty)  # shape (N,1)

        # DOW features for each step (no scaling)
        dows = _dow_one_hot(g['영업일자'].values)  # (N,7)

        # build windows
        X_list, Y_list = [], []
        for i in range(len(g) - LOOKBACK - PREDICT + 1):
            x_qty = qty_scaled[i:i+LOOKBACK]              # (28,1)
            x_dow = dows[i:i+LOOKBACK]                    # (28,7)
            X = np.concatenate([x_qty, x_dow], axis=1)    # (28,8)
            y = qty_scaled[i+LOOKBACK:i+LOOKBACK+PREDICT, 0]  # (7,)
            X_list.append(X)
            Y_list.append(y)

        X_train = torch.tensor(np.stack(X_list)).float().to(DEVICE)  # (B,28,8)
        y_train = torch.tensor(np.stack(Y_list)).float().to(DEVICE)  # (B,7)

        model = MultiOutputLSTM(input_dim=8, output_dim=PREDICT).to(DEVICE)
        opt = torch.optim.Adam(model.parameters(), lr=LR)
        criterion = nn.MSELoss()

        model.train()
        for epoch in range(EPOCHS):
            idx = torch.randperm(X_train.size(0), device=DEVICE)
            for i in range(0, X_train.size(0), BATCH_SIZE):
                b = idx[i:i+BATCH_SIZE]
                xb, yb = X_train[b], y_train[b]
                pred = model(xb)
                loss = criterion(pred, yb)
                opt.zero_grad()
                loss.backward()
                opt.step()

        trained[key] = {
            'model': model.eval(),
            'scaler': scaler,
        }
    return trained

### Prediction on test windows

In [20]:
def predict_lstm(test_df: pd.DataFrame, trained_models: dict, test_prefix: str, alpha=0.6):
    results = []
    for key, g in test_df.groupby('영업장명_메뉴명'):
        if key not in trained_models:
            continue

        g = g.sort_values('영업일자').copy()
        if len(g) < LOOKBACK:
            continue

        model = trained_models[key]['model']
        scaler = trained_models[key]['scaler']

        # last 28
        last28_vals = g['매출수량'].values[-LOOKBACK:].astype(np.float32)
        last28_dates = pd.to_datetime(g['영업일자']).values[-LOOKBACK:]

        # features
        x_qty_scaled = scaler.transform(last28_vals.reshape(-1,1))           # (28,1)
        x_dow = _dow_one_hot(last28_dates)                                   # (28,7)
        x_input = np.concatenate([x_qty_scaled, x_dow], axis=1)[None, ...]   # (1,28,8)

        with torch.no_grad():
            pred_scaled = model(torch.tensor(x_input).float().to(DEVICE)).squeeze(0).cpu().numpy()  # (7,)

        # inverse transform
        restored = []
        for s in pred_scaled:
            tmp = scaler.inverse_transform(np.array([[s]], dtype=np.float32))[0,0]
            restored.append(float(max(tmp, 0.0)))
        restored = np.array(restored, dtype=np.float32)

        # DOW-median baseline from last 28 only
        dow_med = same_dow_median_from_last28(last28_dates, last28_vals, horizon=PREDICT)

        # blend + postprocess
        blended = alpha * restored + (1 - alpha) * dow_med
        blended = postprocess_from_last28(blended, last28_vals, eps_floor=1.0, cap_quantile=99, cap_scale=1.2)

        # pack rows
        pred_dates = [f"{test_prefix}+{i+1}일" for i in range(PREDICT)]
        for d, val in zip(pred_dates, blended.tolist()):
            results.append({'영업일자': d, '영업장명_메뉴명': key, '매출수량': val})
    return pd.DataFrame(results)


### Submission

In [21]:
def convert_to_submission_format(pred_df: pd.DataFrame, sample_submission: pd.DataFrame):
    pred_df['영업일자'] = pred_df['영업일자'].astype(str)
    pred_df['영업장명_메뉴명'] = pred_df['영업장명_메뉴명'].astype(str)
    pred_dict = dict(zip(zip(pred_df['영업일자'], pred_df['영업장명_메뉴명']), pred_df['매출수량']))
    final_df = sample_submission.copy()
    for row_idx in final_df.index:
        date = final_df.loc[row_idx, '영업일자']
        for col in final_df.columns[1:]:
            final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
    return final_df

In [22]:
def main():
    train_df = _read_train()
    trained = train_lstm(train_df)

    all_preds = []
    test_files = sorted(glob.glob('./test/TEST_*.csv'))
    for path in test_files:
        test_df = pd.read_csv(path)
        # ensure datetime for DOW
        try:
            test_df['영업일자'] = pd.to_datetime(test_df['영업일자'])
        except Exception:
            pass
        test_prefix = re.search(r'(TEST_\d+)', os.path.basename(path)).group(1)
        pred_df = predict_lstm(test_df, trained, test_prefix)
        all_preds.append(pred_df)

    full_pred_df = pd.concat(all_preds, ignore_index=True) if len(all_preds) else pd.DataFrame(columns=['영업일자','영업장명_메뉴명','매출수량'])

    sample_path = './sample_submission.csv'
    if not os.path.exists(sample_path):
        sample_path = '../sample_submission.csv'
    sample = pd.read_csv(sample_path)
    submission = convert_to_submission_format(full_pred_df, sample)
    submission.to_csv('baseline_submission_cv.csv', index=False, encoding='utf-8-sig')

if __name__ == "__main__":
    main()


Training LSTM (per item): 100%|███████████████| 193/193 [31:37<00:00,  9.83s/it]
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_dict.get((date, col), 0.0)
  final_df.loc[row_idx, col] = pred_d