In [1]:
class PatchMixerLayer(nn.Module):
    def __init__(self, dim, a, kernel_size = 8):
        super().__init__()
        self.Resnet = nn.Sequential(
            nn.Conv1d(dim, dim, kernel_size = kernel_size, groups = dim, padding = 'same')
            nn.GELU()
            nn.BatchNorm1d(dim)
        )

        self.Conv_1x1 = nn.Sequential(
            nn.Conv1d(dim, a, kernel_size = 1),
            nn.GELU()
            nn.BatchNorm1d(a)
        )

    def forward(self, x):
        x += self.Resnet(x)
        x = self.Conv_1x1(x)
        return x

class PatchMixerBackbone(nn.Module):
    def __init__(self, configs, revin = True, affine = True, subtract_last = False):
        super().__init__()
        self.n_vals = configs.enc_in
        self.lookback = configs.seq_len
        self.forecasting = configs.pred_len
        self.patch_size = configs.patch_len
        self.stride = configs.stride
        self.kernel_size: int = configs.mixer_kernel_size

        self.PatchMixer_blocks = nn.ModuleList([])
        self.padding_patch_layer = nn.ReplicationPad1d((0, self.stride))
        self.patch_num = int((self.lookback - self.patch_size) / self.stride + 1) + 1
        self.a = self.patch_num
        self.d_model = configs.d_model
        self.dropout_rate = configs.head_dropout
        self.depth = configs.e_layers

        for _ in range(self.depth):
            self.PatchMixer_blocks.append(PatchMixerLayer(dim = self.patch_num, a = self.a, kernel_size = self.kernel_size))
            self.W_P = nn.Linear(self.patch_size, self.d_model)
            self.flatten = nn.Flatten(start_dim = -2)

            self.dropout = nn.Dropout(self.dropout_rate)
            self.revin = revin
            if self.revin:
                self.revin_layer = RevIn(self.n_vals, affine = affine, subtract_last = subtract_last)

    def forward(self, x):
        bs = x.shape[0]
        n_vars = x.shape[-1]

        if self.revin:
            x = self.revin_layer(x, 'norm')

        x = x.permute(0, 2, 1)
        x_lookback = self.padding_patch_layer(x)
        x = x_lookback.unfold(dimension = -1, size = self.patch_size, step = self.stride)

        x = self.W_P(x)
        x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
        x = self.dropout(x)
        
        for PatchMixer_block in self.PatchMixer_blocks:
            x = PatchMixer_block(x)

        # Global representation (flatten for merge)
        x = self.flatten(x) # shape: (batch * n_vars, a * d_models)
        x = x.view(bs, n_vars, -1)
        x = x.mean(dim = 1) # aggregate across variables
        return x # shape: (batch, patch_num * d_models)

# Full Model: PatchMixer + Feature Branch
class PatchMixerFeatureModel(nn.Module):
    def __init__(self, configs, feature_dim = 7):
        super().__init__()
        self.backbone = PatchMixerBackbone(configs)

        # Feature Branch
        self.feature_mlp = nn.Sequential(
            nn.Linear(feature_dim, 16),
            nn.ReLU(),
            nn.Linear(16, feature_dim),
            nn.ReLU()
        )

        # Combined Head
        patch_repr_dim = self.backbone.a * self.backbone.d_model

        self.proj = nn.Sequential(
            nn.Linear(patch_repr_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.1)
        )
        combined_dim = 128 + feature_dim

        self.fc = nn.Sequential(
            nn.Linear(combined_dim, 64),
            nn.ReLU(),
            nn.Linear(64, configs.pred_len)
        )

    def forward(self, ts_input, feature_input):
        # 1. PatchMixer Backbone으로 시계열 입력 처리
        patch_repr = self.backbone(ts_input) # shape: (batch, patch_repr_dim)
        # print(f'patch_repr: {patch_repr.shape}')

        # 2. Projection layer 차원 축소
        patch_repr_proj = self.proj(patch_repr)
        # print(f'patch_repr_proj: {patch_repr_proj.shape}')

        # 3. Feature branch processing
        feature_repr = self.feature_mlp(feature_input)
        # print(f'feature_repr: {feature_repr.shape}')

        # 4. Concatenate
        combined = torch.cat([patch_repr_proj, feature_repr], dim = 1)
        # print(f'combined: {combined.shape}')

        # 5. Fully connected layers -> 최종 예측
        out = self.fc(combined) # shape: (batch, pred_len)
        # print(f'out: {out.shape}')
        return out

SyntaxError: incomplete input (3911924358.py, line 1)

In [None]:
sclaer_X = MinMaxScaler()
X_ts = np.vstack(feature_result['X_ts'].to_list())
X_ts_scaled = scaler_X.fit_transform(X_ts.reshape(-1, 1)).reshape(X_ts.shape)

y_ts = np.vstack(feature_result['y_ts'].to_list())
y_ts_log = np.log1p(y_ts)
scaler_y = MinMaxScaler()
y_ts_scaled = scaler_y.fit_transform(y_ts_log.reshape(-1, 1)).reshape(y_ts_log.shape)

scaler_feat = StandardScaler()
X_feat = np.vstack(feature_result['X_features'].to_list())
X_features_scaled = scaler_feat.fit_transform(X_feat)

In [None]:
class DemandDataset(Dataset):
    def __init__(self, X_ts, X_feat, y_ts):
        self.X_ts = torch.tensor(X_ts, dtype = torch.float32).unsqueeze(-1)
        self.X_feat = torch.tensor(X_feat, dtype = torch.float32)
        self.y_ts = torch.tensor(y_ts, dtype = torch.float32)

    def __len__(self):
        return len(self.X_ts)

    def __getitem__(self, idx):
        return self.X_ts[idx], self.X_feat[idx], self.y_ts[idx]

X_train, X_val, F_train, F_val, y_train, y_val = train_test_split(X_ts_scaled, X_features_scaled, y_ts_scaled, test_size = 0.2, random_state = 42)

train_dataset = DemandDataset(X_train, F_train, y_train)
val_dataset = DemandDataset(X_val, F_val, y_val)

train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size = 32, shuffle = True)

from domain.models.dl_models.models.PatchMixer import PatchMixerFeatureModel

class Config:
    enc_in = 1
    seq_len = lookback_window
    pred_len = horizon
    patch_len = 16
    stride = 8
    mixer_kernel_size = 8
    # d_model = 32
    d_model = 16
    head_dropout = 0.1
    # e_layers = 3
    e_layers = 2

config = Config()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = PatchMixerFeatureModel(config).to(device)

def custom_loss(pred, target, alpha = 0.7):
    baes_loss = nn.functional.smooth_l1_loss(pred, target)
    # rel_error = torch.abs(pred_target) / (target + 1e-6)
    # rel_loss = rel_error.mean()
    # return alpha * base_loss + (1-alpha) * rel_loss
    return base_loss

beta = 0.001
def combined_loss(pred, target):
    base = custom_loss(pred, target)
    smoothness = torch.mean(torch.diff(pred, n = 2, dim = 1) ** 2)
    return base + beta * smoothness

optimizer = torch.optim.AdamW(model.parameters(), lr = 1e-3, weight_decay = 1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = 50)
max_grad_norm = 1.0

patience = 30
best_loss = float('inf')
counter = 0
best_model_path = 'best_model_20250801.pth'
num_epoch = 5000

In [None]:
for epoch in range(num_epoch):
    model.train()
    total_train_loss = 0.0

    for X_ts_batch, X_feat_batch, y_batch in train_loader:
        X_ts_batch, X_feat_batch, y_batch = X_ts_batch.to(device), X_feat_batch.to(device), y_batch.to(device)

        preds = model(X_ts_batch, X_feat_batch)
        loss = combined_loss(preds, y_batch)

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm = max_grad_norm)
        optimizer.step()
        total_train_loss += loss.item()

    avg_train_loss = total_train_loss / len(train_loader)

    model.eval()
    total_val_loss = 0.0
    
    with torch.no_grad():
        for X_ts_batch, X_feat_batch, y_batch in val_loader:
            X_ts_batch, X_feat_batch, y_batch = X_ts_batch.to(device), X_feat_batch.to(device), y_batch.to(device)

            preds = model(X_ts_batch, X_feat_batch)
            val_loss = criterion(preds, y_batch)
            total_val_loss += val_loss.item()
    avg_val_loss = total_val_loss / len(val_loader)
    scheduler.step()
    print(f"Epoch [{epoch + 1}/{num_epoch}], Train Loss: {avg_train_loss:.8f}, Val Loss: {avg_val_loss:.8f}")

    # Early Stopping
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        counter = 0
        torch.save(model.state_dict(), best_model_path)

    else:
        counter += 1
        if counter >= patience:
            print('Early Stopping Triggered')
            break

model.load_state_dict(torch.load(best_model_path))

In [None]:
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

start = 0
target = len(val_dataset)
n_cols = 3
n_rows = (len(target) + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize = (7 * n_cols, 4 * n_rows), sharex = False)
axes = axes.flatten()

model.eval()
for i in tqdm(range(0, len(target)):
    ax = axes[i]
    index = target[i]
    X_ts_sample, X_feat_sample, y_true_sample = val_dataset[index]
    X_ts_sample = X_ts_sample.numpy().squeeze()
    y_true_sample = y_true_sample.numpy()

    with torch.no_grad():
        X_ts_tensor = torch.tensor(X_ts_sample, dtype = torch.float32).unsqueeze(0).unsqueeze(-1).to(device)
        X_feat_tensor = X_feat_sample.unsqueeze(0).to(device)
        y_pred_scaled = model(X_ts_tensor, X_feat_tensor).cpu().numpy().flatten()


    y_pred_log = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
    y_pred = np.expm1(y_pred_log)

    y_true_log = scaler_y.inverse_transform(y_true_sample.reshape(-1, 1)).flatten()
    y_true = np.expm1(y_true_log)

    lookback_series = scaler_X.inverse_transform(X_ts_sample.reshape(-1, 1)).flatten()

    mae = mean_absolute_error(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)

    y_true_cumsum = np.cumsum(y_true)
    y_pred_cumsum = np.cumsum(y_pred)

    ax.plot(range(len(lookback_series)), lookback_series, label = 'History (Lookback)', color = 'blue')
    ax.plot(range(len(lookback_series), len(lookback_series) + len(y_true)), y_true, label = 'Actual(Horizon)', color = 'green')
    ax.plot(range(len(lookback_series), len(lookback_series) + len(y_pred)), y_pred, label = 'Predicted (Horizon)', color = 'red', linestyle = 'dashed')

    ax.axvline(len(lookback_series) - 1, color = 'gray', linestyle = '--', alpha = 0.5)
    ax.set_title(f"sample {i} | True: {y_true.sum():.2f} PRED: {y_pred.sum():.2f}\n {y_pred.sum() / y_true.sum() * 100}")
    ax.legend()
    ax.grid(True)
    ax.yaxis.set_major_formatter(ScalarFormatter(useOffset = False))
    ax.yaxis.get_major_formatter().set_scientific(False)

plt.tight_layout()
plt.show()