In [None]:
import os
import sys
import time
import logging

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, ParameterGrid

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# -----------------------------------------------------------------------------
# Configuration
# -----------------------------------------------------------------------------
INPUT_FEATHER_FILE = 'merged_data_cell_a_pdu6_pdu7_approx100machines_30d.feather'
TARGET_PDU = 'pdu6'
FEATURE_COLUMNS = ['pdu_sum_cpu_usage', 'production_power_util']

# Hyperparameter grid for tuning
PARAM_GRID = {
    'window_size': [64, 96],
    'latent_dim': [16, 32],
    'learning_rate': [1e-3, 5e-4],
    'batch_size': [16, 32]
}

EPOCHS = 100
PATIENCE = 10
NOISE_STEPS = 1000
BETA_START, BETA_END = 1e-4, 0.02
OUTPUT_DIR = 'diffusion_model_output'
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s',
                    stream=sys.stdout)

# -----------------------------------------------------------------------------
# Dataset for time series windows
# -----------------------------------------------------------------------------
class TimeSeriesWindowDataset(Dataset):
    def __init__(self, data: np.ndarray, window_size: int):
        self.data = data
        self.window_size = window_size

    def __len__(self):
        return len(self.data) - self.window_size

    def __getitem__(self, idx):
        x0 = self.data[idx:idx + self.window_size]
        return x0.astype(np.float32)

# -----------------------------------------------------------------------------
# Time embedding for diffusion steps
# -----------------------------------------------------------------------------
class SinusoidalPosEmb(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim

    def forward(self, t):
        device = t.device
        half_dim = self.dim // 2
        emb = np.log(10000) / (half_dim - 1)
        emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
        emb = t[:, None] * emb[None, :]
        emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
        return emb

# -----------------------------------------------------------------------------
# Simple MLP denoiser model
# -----------------------------------------------------------------------------
class Denoiser(nn.Module):
    def __init__(self, window_size, feature_dim, latent_dim):
        super().__init__()
        input_dim = window_size * feature_dim + latent_dim
        hidden = 128
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden),
            nn.ReLU(),
            nn.Linear(hidden, hidden),
            nn.ReLU(),
            nn.Linear(hidden, window_size * feature_dim)
        )

    def forward(self, x_noisy, t_emb):
        # x_noisy: (batch, window_size*feature_dim)
        # t_emb:   (batch, latent_dim)
        h = torch.cat([x_noisy, t_emb], dim=1)
        out = self.net(h)
        return out

# -----------------------------------------------------------------------------
# Diffusion model training and evaluation
# -----------------------------------------------------------------------------
class TimeSeriesDiffusionModel:
    def __init__(self, window_size, feature_dim, latent_dim, lr, batch_size, device):
        self.window_size = window_size
        self.feature_dim = feature_dim
        self.latent_dim = latent_dim
        self.lr = lr
        self.batch_size = batch_size
        self.device = device

        # Beta schedule
        self.betas = torch.linspace(BETA_START, BETA_END, NOISE_STEPS).to(device)
        self.alphas = 1 - self.betas
        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)

        # Model and optimizer
        self.model = Denoiser(window_size, feature_dim, latent_dim).to(device)
        self.pos_emb = SinusoidalPosEmb(latent_dim).to(device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)

    def q_sample(self, x0, t, noise):
        # x0: (batch, window_size*feature_dim)
        sqrt_alphas_cumprod = self.alphas_cumprod[t]**0.5
        sqrt_one_minus_alphas = (1 - self.alphas_cumprod[t])**0.5
        return sqrt_alphas_cumprod[:, None] * x0 + sqrt_one_minus_alphas[:, None] * noise

    def p_loss(self, x0):
        batch_size = x0.size(0)
        # Sample random timesteps
        t = torch.randint(0, NOISE_STEPS, (batch_size,), device=self.device)
        noise = torch.randn_like(x0)
        x_noisy = self.q_sample(x0, t, noise)
        t_emb = self.pos_emb(t)
        pred_noise = self.model(x_noisy, t_emb)
        loss = nn.functional.mse_loss(pred_noise, noise, reduction='mean')
        return loss

    def train(self, train_loader, val_loader):
        best_val = float('inf')
        patience_counter = 0
        for epoch in range(EPOCHS):
            # Training
            self.model.train()
            train_losses = []
            for windows in train_loader:
                x0 = windows.view(-1, self.window_size * self.feature_dim).to(self.device)
                self.optimizer.zero_grad()
                loss = self.p_loss(x0)
                loss.backward()
                self.optimizer.step()
                train_losses.append(loss.item())
            avg_train = np.mean(train_losses)

            # Validation
            self.model.eval()
            val_losses = []
            with torch.no_grad():
                for windows in val_loader:
                    x0 = windows.view(-1, self.window_size * self.feature_dim).to(self.device)
                    val_losses.append(self.p_loss(x0).item())
            avg_val = np.mean(val_losses)

            logging.info(f"Epoch {epoch+1}/{EPOCHS} | Train Loss: {avg_train:.6f} | Val Loss: {avg_val:.6f}")

            # Early stopping
            if avg_val < best_val:
                best_val = avg_val
                torch.save(self.model.state_dict(), os.path.join(OUTPUT_DIR, f"best_diffusion_w{self.window_size}_ld{self.latent_dim}_lr{self.lr}.pth"))
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= PATIENCE:
                    logging.info("Early stopping triggered.")
                    break

        return best_val

    def sample(self, num_samples):
        """Generate synthetic sequences using reverse diffusion."""
        self.model.eval()
        x = torch.randn(num_samples, self.window_size * self.feature_dim).to(self.device)

        for t in reversed(range(len(self.betas))):  # NOISE_STEPS
            t_tensor = torch.full((num_samples,), t, device=self.device, dtype=torch.long)
            t_emb = self.pos_emb(t_tensor)
            pred_noise = self.model(x, t_emb)

            alpha = self.alphas[t]
            alpha_hat = self.alphas_cumprod[t]
            beta = self.betas[t]

            if t > 0:
                noise = torch.randn_like(x)
            else:
                noise = torch.zeros_like(x)

            x = (1 / alpha**0.5) * (x - ((1 - alpha) / (1 - alpha_hat)**0.5) * pred_noise) + (beta**0.5) * noise

        return x.view(-1, self.window_size, self.feature_dim)

# -----------------------------------------------------------------------------
# Main execution with grid search
# -----------------------------------------------------------------------------
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    logging.info(f"Using device: {device}")

    # 1. Load and filter data
    df = pd.read_feather(INPUT_FEATHER_FILE)
    df = df[df['pdu'] == TARGET_PDU].copy()
    df.set_index('datetime', inplace=True)
    df.sort_index(inplace=True)
    data = df[FEATURE_COLUMNS].values

    # 2. Normalize
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data)

    results = []
    for params in ParameterGrid(PARAM_GRID):
        logging.info(f"Training with params: {params}")
        # Prepare windows
        dataset = TimeSeriesWindowDataset(data_scaled, params['window_size'])
        train_size = int(len(dataset) * 0.8)
        val_size = len(dataset) - train_size
        train_ds, val_ds = torch.utils.data.random_split(dataset, [train_size, val_size])

        train_loader = DataLoader(train_ds, batch_size=params['batch_size'], shuffle=True)
        val_loader = DataLoader(val_ds, batch_size=params['batch_size'], shuffle=False)

        # Initialize model
        model = TimeSeriesDiffusionModel(
            window_size=params['window_size'],
            feature_dim=len(FEATURE_COLUMNS),
            latent_dim=params['latent_dim'],
            lr=params['learning_rate'],
            batch_size=params['batch_size'],
            device=device
        )
        # Train
        val_loss = model.train(train_loader, val_loader)
        results.append((params, val_loss))

    # Find best
    best_params, best_loss = sorted(results, key=lambda x: x[1])[0]
    logging.info(f"Best params: {best_params} with Val Loss: {best_loss:.6f}")

if __name__ == '__main__':
    main()


2025-05-10 12:11:25,562 - INFO - Using device: cuda
2025-05-10 12:11:26,172 - INFO - Training with params: {'batch_size': 16, 'latent_dim': 16, 'learning_rate': 0.001, 'window_size': 64}
2025-05-10 12:11:29,011 - INFO - Epoch 1/100 | Train Loss: 0.913502 | Val Loss: 0.786424
2025-05-10 12:11:29,462 - INFO - Epoch 2/100 | Train Loss: 0.663425 | Val Loss: 0.576862
2025-05-10 12:11:30,009 - INFO - Epoch 3/100 | Train Loss: 0.519312 | Val Loss: 0.487597
2025-05-10 12:11:30,471 - INFO - Epoch 4/100 | Train Loss: 0.443991 | Val Loss: 0.412525
2025-05-10 12:11:30,929 - INFO - Epoch 5/100 | Train Loss: 0.390275 | Val Loss: 0.364140
2025-05-10 12:11:31,344 - INFO - Epoch 6/100 | Train Loss: 0.350962 | Val Loss: 0.340567
2025-05-10 12:11:31,758 - INFO - Epoch 7/100 | Train Loss: 0.320169 | Val Loss: 0.302712
2025-05-10 12:11:32,220 - INFO - Epoch 8/100 | Train Loss: 0.302282 | Val Loss: 0.304831
2025-05-10 12:11:32,629 - INFO - Epoch 9/100 | Train Loss: 0.286440 | Val Loss: 0.270625
2025-05-10 1