In [None]:

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input
from sklearn.preprocessing import RobustScaler
import os
import json
import matplotlib.pyplot as plt
from datetime import timedelta
import matplotlib.dates as mdates  # For better date formatting on the x-axis

# SMAPE calculation function
def smape(yTrue, yPred):
    """SMAPE is used to evaluate the accuracy of the predictions."""
    denominator = (np.abs(yTrue) + np.abs(yPred))
    smape_value = np.mean(2 * np.abs(yPred - yTrue) / np.where(denominator == 0, 1, denominator)) * 100
    return smape_value

# Exponential Smoothing for Time Series
def exponential_smoothing(series, alpha):
    """Apply exponential smoothing to a time series. reduces noise by smoothing out fluctuations."""

    result = [series[0]] 
    for n in range(1, len(series)):
        result.append(alpha * series[n] + (1 - alpha) * result[n-1])
    return np.array(result)

# Double Exponential Smoothing for Time Series
def double_exponential_smoothing(series, alpha, beta):
    """
    Apply double exponential smoothing to a time series, smoothing the level, and Beta is for smoothing the trend.
    """
    result = [series[0]]
    level, trend = series[0], series[1] - series[0]  
    for n in range(1, len(series)):
        value = series[n]
        last_level, level = level, alpha * value + (1 - alpha) * (level + trend)
        trend = beta * (level - last_level) + (1 - beta) * trend
        result.append(level + trend)
    return np.array(result)

# Prepare the data for LSTM input
def prepare_data(data, n_input):

    """Transform the time series data into sequences of input-output pairs for LSTM training """
    X, y = [], []
    for i in range(len(data) - n_input):
        X.append(data[i:(i + n_input)])  # Input sequence of n_input time steps
        y.append(data[i + n_input])      # Output: the value immediately following the input sequence
    return np.array(X), np.array(y)

# Build LSTM Model with Monte Carlo Dropout for Uncertainty Estimation
def build_mc_dropout_model(n_input, units, dropout_rate):

    """Build an LSTM model with Monte Carlo Dropout for uncertainty estimation"""
    
    model = Sequential()
    model.add(Input(shape=(n_input, 1)))  # Input: sequence of n_input time steps with one feature
    
    # Add LSTM layers with Dropout
    model.add(LSTM(units[0], activation='relu', return_sequences=(len(units) > 1)))  # First LSTM layer
    model.add(Dropout(dropout_rate))  # Apply dropout
    
    # Additional LSTM layers (if any)
    for unit in units[1:]:
        model.add(LSTM(unit, activation='relu', return_sequences=False))  # LSTM layers
        model.add(Dropout(dropout_rate))  # Apply dropout
    
    model.add(Dense(1))  # Output layer for regression
    model.compile(optimizer='adam', loss='mse')  # Compile the model with mean squared error loss
    return model

# Monte Carlo Dropout prediction function
def mc_dropout_predict(model, X, n_iter=100):
    """
    Make predictions using Monte Carlo Dropout to estimate uncertainty.
    n_iter determines the number of stochastic forward passes to be made.
    """
    predictions = np.array([model(X, training=True) for _ in range(n_iter)])  # Dropout active during inference
    return predictions.mean(axis=0), predictions.std(axis=0)  # Return the mean and standard deviation

# Generate future dates for forecasting
def generate_future_dates(start_date, periods):
    """
    Generate a list of future dates starting from 'start_date', assuming monthly data. """
    return [start_date + timedelta(days=i*30) for i in range(periods)]  # Assuming 30 days per month

# Step-by-step forecasting using Monte Carlo Dropout
def step_by_step_mc_dropout_forecasting(attack_data, model, scaler, n_input, forecast_horizon, n_iterations=100):

    """Perform step-by-step forecasting using Monte Carlo Dropout.
    Generate predictions for each time step by feeding the previous forecast as input for the next."""

    last_sequence = attack_data[-n_input:].reshape(1, n_input, 1)  # Last n_input values for forecasting
    forecasts = []
    lower_bounds = []
    upper_bounds = []

    for _ in range(forecast_horizon):
        # Predict the next time step using Monte Carlo Dropout
        next_prediction_mean, next_prediction_std = mc_dropout_predict(model, last_sequence, n_iter=n_iterations)
        forecast = next_prediction_mean[0, 0]
        forecasts.append(forecast)

        # Calculate 95% confidence intervals
        lower_bound = forecast - 1.96 * next_prediction_std[0, 0]
        upper_bound = forecast + 1.96 * next_prediction_std[0, 0]
        lower_bounds.append(lower_bound)
        upper_bounds.append(upper_bound)

        # Shift the input sequence and append the new forecast
        last_sequence = np.roll(last_sequence, -1, axis=1)
        last_sequence[0, -1, 0] = forecast

    # Inverse transform the forecasted values back to the original scale
    forecasts_inv = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
    lower_bounds_inv = scaler.inverse_transform(np.array(lower_bounds).reshape(-1, 1)).flatten()
    upper_bounds_inv = scaler.inverse_transform(np.array(upper_bounds).reshape(-1, 1)).flatten()

    return forecasts_inv, lower_bounds_inv, upper_bounds_inv

# Plot forecast and actual data with overlap
def forecast_univariate_mc_dropout(target_variable, attack_data, scaler, model, forecast_horizon, data, last_date, output_plot_dir):

    """Generate and plot the forecast for the target variable using Monte Carlo Dropout."""

    forecasts_inv, lower_bounds_inv, upper_bounds_inv = step_by_step_mc_dropout_forecasting(
        attack_data, model, scaler, best_params['n_input'], forecast_horizon
    )

    # Generate future dates for the forecast
    future_dates = generate_future_dates(last_date, forecast_horizon)

    # Seamless overlap: Append the last historical data point to the beginning of the forecast for smooth plot
    seamless_forecast = np.insert(forecasts_inv, 0, data[target_variable].iloc[-1])
    lower_bounds_inv = np.insert(lower_bounds_inv, 0, data[target_variable].iloc[-1])
    upper_bounds_inv = np.insert(upper_bounds_inv, 0, data[target_variable].iloc[-1])

    # Plot historical data and forecast with overlap and confidence intervals
    plt.figure(figsize=(15, 8))
    plt.plot(data.index, data[target_variable], label='Actual Data', color='blue', linestyle='-')
    plt.plot([last_date] + future_dates, seamless_forecast, label='Forecast', color='red', linestyle='-')
    plt.fill_between([last_date] + future_dates, lower_bounds_inv, upper_bounds_inv, color='green', alpha=0.3, label='95% Confidence Interval')

    plt.title(f'{target_variable} - Univariate')
    plt.xlabel('Year')
    plt.ylabel('Incident Count')
    plt.legend()
    
    plt.tight_layout()
    
    # Save the plot for the original forecast
    plot_name = f'{target_variable}_forecast_mc_dropout.png'
    plot_path = os.path.join(output_plot_dir, plot_name)
    plt.savefig(plot_path)
    plt.show()

    print(f"Forecast plot for {target_variable} saved to {plot_path}.")

# Function to run seed-based forecasting and show results
def run_seed_forecasting(target_variable, selected_data, scaler, model, forecast_horizon, data, last_date, seed=None):

    """ Perform seed-based forecasting by setting a random seed. """
    
    if seed is not None:
        np.random.seed(seed)  # Set the seed for NumPy
        tf.random.set_seed(seed)  # Set the seed for TensorFlow
        print(f"Running forecast with seed {seed}")

    # Prepare for forecast generation
    last_sequence = selected_data[-model.input_shape[1]:]
    forecasts, lower_bounds, upper_bounds = [], [], []
    
    for _ in range(forecast_horizon):
        next_prediction_mean, next_prediction_std = mc_dropout_predict(model, last_sequence.reshape(1, model.input_shape[1], model.input_shape[2]))
        forecasts.append(next_prediction_mean[0, 0])
        
        lower_bound_scaled = next_prediction_mean[0, 0] - 1.96 * next_prediction_std[0, 0]
        upper_bound_scaled = next_prediction_mean[0, 0] + 1.96 * next_prediction_std[0, 0]
        
        lower_bounds.append(lower_bound_scaled)
        upper_bounds.append(upper_bound_scaled)
        
        last_sequence = np.roll(last_sequence, -1, axis=0)
        last_sequence[-1, -1] = next_prediction_mean[0, 0]
    
    # Inverse transform the forecasts and confidence intervals
    dummy_array = np.zeros((len(forecasts), model.input_shape[2]))
    dummy_array[:, -1] = forecasts
    forecasts_inv = scaler.inverse_transform(dummy_array)[:, -1]
    
    dummy_array[:, -1] = lower_bounds
    lower_bounds_inv = scaler.inverse_transform(dummy_array)[:, -1]
    
    dummy_array[:, -1] = upper_bounds
    upper_bounds_inv = scaler.inverse_transform(dummy_array)[:, -1]
    
    # Generate future dates for the forecast
    future_dates = generate_future_dates(last_date, forecast_horizon)
    future_dates = [last_date] + future_dates
    
    # Append the last historical data point to the beginning of the forecast for a seamless plot
    seamless_forecast = np.insert(forecasts_inv, 0, data[target_variable].iloc[-1])
    lower_bounds_inv = np.insert(lower_bounds_inv, 0, data[target_variable].iloc[-1])
    upper_bounds_inv = np.insert(upper_bounds_inv, 0, data[target_variable].iloc[-1])
    
    # Plot the historical data and forecast
    plt.figure(figsize=(15, 8))
    plt.plot(data.index, data[target_variable], label='Actual Data', color='blue', linestyle='-')
    plt.plot(future_dates, seamless_forecast, label=f'Forecast (Seed {seed})', color='red', linestyle='--')
    plt.fill_between(future_dates, lower_bounds_inv, upper_bounds_inv, color='green', alpha=0.3, label='95% Confidence Interval')

    plt.title(f'{target_variable} - Forecast with Seed {seed}')
    plt.xlabel('Year')
    plt.ylabel('Incident Count')
    plt.legend()
    
    plt.gca().xaxis.set_major_locator(mdates.YearLocator())
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    plt.gcf().autofmt_xdate()  # Rotate dates for better readability
    
    plt.tight_layout()
    plt.show()




# Main execution for univariate forecasting with Monte Carlo Dropout
if __name__ == "__main__":
    # Load and preprocess data
    data = pd.read_csv('FinalDataset.csv')
    data['Date'] = pd.to_datetime(data['Date'], format='%b-%y')
    data.set_index('Date', inplace=True)

    attacks = ['DDoS-ALL', 'Phishing-ALL', 'Ransomware-ALL', 'Password Attack-ALL', 'SQL Injection-ALL', 'Account Hijacking-ALL', 
            'Defacement-ALL', 'Trojan-ALL', 'Vulnerability-ALL', 'Zero-day-ALL', 'Malware-ALL', 'Advanced persistent threat-ALL', 
            'XSS-ALL', 'Data Breach-ALL', 'Disinformation/Misinformation-ALL', 'Targeted Attack-ALL','Adware-ALL',
            'Brute Force Attack-ALL', 'Malvertising-ALL', 'Backdoor-ALL', 'Botnet-ALL', 'Cryptojacking-ALL',
            'Worms-ALL', 'Spyware-ALL']

    param_dir = 'univariateparam25'
    output_plot_dir = 'univariate_forecast_plots_36months_U'
    os.makedirs(output_plot_dir, exist_ok=True)

    n_forecast = 36  # Forecast horizon (36 months)

    for attack in attacks:
        print(f"Forecasting for attack: {attack}")

        # Load best parameters for the current attack
        with open(os.path.join(param_dir, f'{attack}_best_params.json'), 'r') as f:
            best_params = json.load(f)['Best Parameters']

        # Preprocess the attack data
        attack_data = data[attack].values.reshape(-1, 1)
        scaler = RobustScaler()
        attack_data_scaled = scaler.fit_transform(attack_data)

        # Build the model using the best hyperparameters with Monte Carlo Dropout
        model = build_mc_dropout_model(best_params['n_input'], best_params['units'], best_params['dropout_rate'])

        # Prepare data for model training
        X_train, y_train = prepare_data(attack_data_scaled, best_params['n_input'])
        X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))

        # Train the model
        model.fit(X_train, y_train, epochs=best_params['n_epochs'], batch_size=40, verbose=0)

        # Forecast future values using the original model
        forecast_univariate_mc_dropout(attack, attack_data_scaled, scaler, model, n_forecast, data, data.index[-1], output_plot_dir)

        # Perform seed-based forecasting (showing results only)
        for seed in [1, 2, 3]:
            run_seed_forecasting(attack, attack_data_scaled, scaler, model, n_forecast, data, data.index[-1], seed=seed)

print("Univariate forecasting seed-based forecasting completed for all attacks.")
