In [1]:
# ===============================================================
# Single-cell: Advanced Time Series Forecasting (Seq2Seq + Attention + Baselines)
# Cleaned, corrected, improved version
# ===============================================================

import os
import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow as tf
from tensorflow.keras import layers, Model, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
import statsmodels.api as sm
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# ---------------------------
# Logging & reproducibility
# ---------------------------
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger("ts_project")
np.random.seed(42)
tf.random.set_seed(42)

# ---------------------------
# Metrics
# ---------------------------
def rmse(y_true, y_pred):
    return float(np.sqrt(mean_squared_error(y_true, y_pred)))

def mae(y_true, y_pred):
    return float(mean_absolute_error(y_true, y_pred))

def mape(y_true, y_pred):
    y_true = np.array(y_true)
    denom = np.where(np.abs(y_true) < 1e-8, 1e-8, np.abs(y_true))
    return float(np.mean(np.abs((y_true - y_pred) / denom)) * 100)

# ---------------------------
# Synthetic multivariate data
# ---------------------------
def generate_multivariate_series(n_steps=3000, n_features=5, seed=42):
    np.random.seed(seed)
    t = np.arange(n_steps)
    data = {}
    trend = 0.001 * (t ** 1.2)

    for i in range(n_features):
        period1, period2 = 24 + 3 * i, 168 + 5 * i
        seasonal1 = 2 * np.sin(2 * np.pi * t / period1)
        seasonal2 = 1 * np.sin(2 * np.pi * t / period2)
        amp_mod = 1 + 0.001 * t * np.sin(2 * np.pi * t / 1000 + i)
        base = 0.5 * (i + 1) + 0.3 * trend
        noise = 0.5 * np.random.randn(n_steps)
        data[f"f{i}"] = base + amp_mod * (seasonal1 + seasonal2) + noise

    df = pd.DataFrame(data)
    df.index.name = "t"
    logger.info("Generated dataset: %s", df.shape)
    return df

# ---------------------------
# Sliding window generator
# ---------------------------
def make_windows(df, input_len, output_len, target_col="f0"):
    arr = df.values
    n_samples = len(df) - input_len - output_len + 1
    if n_samples <= 0:
        raise ValueError("Not enough data for input/output lengths")

    X = np.zeros((n_samples, input_len, arr.shape[1]), dtype=np.float32)
    y = np.zeros((n_samples, output_len), dtype=np.float32)
    starts = np.zeros(n_samples, dtype=int)

    target_idx = list(df.columns).index(target_col)

    for i in range(n_samples):
        X[i] = arr[i : i + input_len]
        y[i] = arr[i + input_len : i + input_len + output_len, target_idx]
        starts[i] = i

    return X, y, starts

# ---------------------------
# Seq2Seq with Global Attention
# ---------------------------
def build_seq2seq_attention(input_len, n_features, output_len=24, enc_units=64, dec_units=64, dropout=0.2):

    encoder_inputs = Input(shape=(input_len, n_features))
    enc_lstm = layers.Bidirectional(layers.LSTM(enc_units, return_sequences=True, dropout=dropout))
    enc_outputs = enc_lstm(encoder_inputs)

    # Improved attention block
    attn = layers.Attention()([enc_outputs, enc_outputs])
    context = layers.GlobalAveragePooling1D()(attn)

    repeated_context = layers.RepeatVector(output_len)(context)
    dec_outputs = layers.LSTM(dec_units, return_sequences=True, dropout=dropout)(repeated_context)

    outputs = layers.TimeDistributed(layers.Dense(1))(dec_outputs)
    outputs = layers.Reshape((output_len,))(outputs)

    model = Model(encoder_inputs, outputs)
    model.compile(optimizer=Adam(0.001), loss="mse", metrics=["mae"])
    model.summary(print_fn=logger.info)
    return model

# ---------------------------
# Baselines
# ---------------------------
def baseline_sarimax(train_series, steps):
    configs = [
        ((1,1,1), (1,1,1,24)),
        ((1,1,1), (0,0,0,0)),
        ((1,0,0), (0,0,0,0))
    ]
    for order, seas in configs:
        try:
            seasonal_order = seas if seas[3] > 0 else (0,0,0,0)
            m = sm.tsa.SARIMAX(train_series, order=order, seasonal_order=seasonal_order,
                               enforce_stationarity=False, enforce_invertibility=False)
            res = m.fit(disp=False)
            return res.get_forecast(steps).predicted_mean.values
        except:
            continue
    return np.repeat(train_series.iloc[-1], steps)

def baseline_exp_smoothing(train_series, steps):
    try:
        model = ExponentialSmoothing(train_series,
                                     trend="add",
                                     seasonal="add" if len(train_series) > 48 else None,
                                     seasonal_periods=24)
        res = model.fit()
        return res.forecast(steps).values
    except:
        return np.repeat(train_series.iloc[-1], steps)

# ---------------------------
# Data validation
# ---------------------------
def validate_data(df):
    df = df.copy()
    if df.isnull().any().any():
        df = df.ffill().bfill()

    zero_std = df.columns[df.std() == 0]
    for c in zero_std:
        df[c] += np.random.normal(0, 1e-6, len(df))
    return df

# ---------------------------
# Full pipeline
# ---------------------------
def train_and_evaluate(df, input_len=168, output_len=24, epochs=30, batch_size=32, target_col="f0"):

    df = validate_data(df)

    # Train/Val/Test split
    n = len(df)
    test_n = int(0.1 * n)
    val_n = int(0.1 * n)

    train_df = df.iloc[:n - test_n - val_n]
    val_df = df.iloc[n - test_n - val_n : n - test_n]
    test_df = df.iloc[n - test_n :]

    # Scaling
    scaler = StandardScaler().fit(train_df)
    df_scaled = pd.DataFrame(scaler.transform(df), columns=df.columns, index=df.index)

    X, y, starts = make_windows(df_scaled, input_len, output_len, target_col)

    train_limit = len(train_df)
    val_limit = train_limit + len(val_df)

    train_idx = [i for i, s in enumerate(starts) if s + input_len <= train_limit]
    val_idx = [i for i, s in enumerate(starts) if train_limit < s + input_len <= val_limit]
    test_idx = [i for i, s in enumerate(starts) if s + input_len > val_limit]

    X_train, y_train = X[train_idx], y[train_idx]
    X_val, y_val = X[val_idx], y[val_idx]
    X_test, y_test = X[test_idx], y[test_idx]
    starts_test = starts[test_idx]

    # Build model
    model = build_seq2seq_attention(input_len, df.shape[1], output_len=output_len)

    # Ensure path exists
    ckpt = "seq2seq_attention_best.h5"
    os.makedirs(".", exist_ok=True)

    callbacks = [
        EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True),
        ModelCheckpoint(ckpt, save_best_only=True)
    ]

    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs, batch_size=batch_size,
        callbacks=callbacks, verbose=1
    )

    # Predictions
    y_pred_scaled = model.predict(X_test)

    # Unscale
    idx = list(df.columns).index(target_col)
    mean_t = scaler.mean_[idx]
    scale_t = scaler.scale_[idx]

    y_test_orig = y_test * scale_t + mean_t
    y_pred_orig = y_pred_scaled * scale_t + mean_t

    # Metrics
    result_metrics = {
        "rmse": rmse(y_test_orig.flatten(), y_pred_orig.flatten()),
        "mae": mae(y_test_orig.flatten(), y_pred_orig.flatten()),
        "mape": mape(y_test_orig.flatten(), y_pred_orig.flatten())
    }

    return model, y_test_orig, y_pred_orig, starts_test, result_metrics


# ---------------------------
# MAIN
# ---------------------------
if __name__ == "__main__":
    df = generate_multivariate_series()

    plt.plot(df.index, df["f0"])
    plt.title("Feature f0")
    plt.show()

    model, y_true, y_pred, starts_test, metrics = train_and_evaluate(df)

    print("\nFinal Metrics:")
    for k, v in metrics.items():
        print(f"{k:10}: {v:.4f}")

ModuleNotFoundError: No module named 'tensorflow'