In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from scipy.stats import boxcox, invboxcox
import tensorflow as tf
import tensorflow_probability as tfp

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

def load_and_prepare_data(file_path, attack_column):
    data = pd.read_csv(file_path)
    attack_data = data[attack_column].values
    return attack_data

def exponential_smoothing(series, alpha):
    result = [series[0]]
    for n in range(1, len(series)):
        result.append(alpha * series[n] + (1 - alpha) * result[n-1])
    return np.array(result)

def double_exponential_smoothing(series, alpha, beta):
    result = [series[0]]
    level, trend = series[0], series[1] - series[0]
    for n in range(1, len(series)):
        value = series[n]
        last_level, level = level, alpha * value + (1 - alpha) * (level + trend)
        trend = beta * (level - last_level) + (1 - beta) * trend
        result.append(level + trend)
    return np.array(result)

def check_stationarity(timeseries):
    result = adfuller(timeseries)
    return result[1] <= 0.05

def preprocess_data(data, alpha=0.2, beta=0.1):
    # Apply Box-Cox transformation
    data_boxcox, lambda_param = boxcox(data + 1)  # Add 1 to avoid log(0)
    
    # Apply double exponential smoothing
    smoothed_data = double_exponential_smoothing(data_boxcox, alpha, beta)
    
    # Check stationarity
    if not check_stationarity(smoothed_data):
        # If not stationary, take first difference
        smoothed_data = np.diff(smoothed_data)
    
    # Normalize the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(smoothed_data.reshape(-1, 1))
    
    return scaled_data, scaler, lambda_param

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:(i + seq_length)])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)

def build_bayesian_lstm_model(seq_length, n_features=1):
    model = Sequential([
        tfp.layers.DenseVariational(64, activation='relu', input_shape=(seq_length, n_features)),
        tfp.layers.LSTMCellReparameterization(64, recurrent_dropout=0.2),
        tfp.layers.DenseVariational(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
    return model

def create_model(seq_length, n_features=1):
    def model():
        return build_bayesian_lstm_model(seq_length, n_features)
    return model

def train_model(X_train, y_train, X_val, y_val, seq_length):
    model = KerasRegressor(build_fn=create_model(seq_length), epochs=100, batch_size=32, verbose=0)
    
    # Define hyperparameters to search
    param_dist = {
        'epochs': [50, 100, 150],
        'batch_size': [16, 32, 64],
        'learning_rate': [0.001, 0.0001, 0.00001]
    }
    
    # Perform random search
    random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=10, cv=3, verbose=1, n_jobs=-1)
    random_search_result = random_search.fit(X_train, y_train, validation_data=(X_val, y_val), callbacks=[EarlyStopping(monitor='val_loss', patience=10)])
    
    best_model = random_search_result.best_estimator_.model
    return best_model, random_search_result.best_params_

def evaluate_model(model, X_test, y_test, scaler, lambda_param):
    predictions = model.predict(X_test)
    
    # Inverse transform predictions and actual values
    predictions = scaler.inverse_transform(predictions)
    y_test = scaler.inverse_transform(y_test.reshape(-1, 1))
    
    # Inverse Box-Cox transform
    predictions = invboxcox(predictions, lambda_param) - 1
    y_test = invboxcox(y_test, lambda_param) - 1
    
    mae = mean_absolute_error(y_test, predictions)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    
    return predictions, mae, rmse

def plot_results(actual, predicted, title):
    plt.figure(figsize=(12, 6))
    plt.plot(actual, label='Actual')
    plt.plot(predicted, label='Predicted')
    plt.title(title)
    plt.legend()
    plt.show()

def forecast_future(model, last_sequence, n_future, scaler, lambda_param):
    future_predictions = []
    current_sequence = last_sequence.copy()
    
    for _ in range(n_future):
        next_pred = model.predict(current_sequence.reshape(1, current_sequence.shape[0], 1))
        future_predictions.append(next_pred[0, 0])
        current_sequence = np.roll(current_sequence, -1)
        current_sequence[-1] = next_pred
    
    future_predictions = np.array(future_predictions).reshape(-1, 1)
    future_predictions = scaler.inverse_transform(future_predictions)
    future_predictions = invboxcox(future_predictions, lambda_param) - 1
    
    return future_predictions

# Main execution
if __name__ == "__main__":
    file_path = 'FinalDataset.csv'
    attack_column = 'DDoS-ALL'  # Change this to the desired attack type
    
    # Load and preprocess data
    raw_data = load_and_prepare_data(file_path, attack_column)
    processed_data, scaler, lambda_param = preprocess_data(raw_data)
    
    # Create sequences
    seq_length = 12  # Use 12 months of historical data to predict the next month
    X, y = create_sequences(processed_data, seq_length)
    
    # Split the data
    train_size = int(len(X) * 0.7)
    val_size = int(len(X) * 0.15)
    X_train, y_train = X[:train_size], y[:train_size]
    X_val, y_val = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
    X_test, y_test = X[train_size+val_size:], y[train_size+val_size:]
    
    # Train the model
    model, best_params = train_model(X_train, y_train, X_val, y_val, seq_length)
    print("Best hyperparameters:", best_params)
    
    # Evaluate the model
    predictions, mae, rmse = evaluate_model(model, X_test, y_test, scaler, lambda_param)
    
    print(f"Mean Absolute Error: {mae:.2f}")
    print(f"Root Mean Squared Error: {rmse:.2f}")
    
    # Plot results
    plot_results(y_test, predictions, f"{attack_column} - Actual vs Predicted")
    
    # Make future predictions (36 months)
    future_months = 36
    last_sequence = X_test[-1]
    future_predictions = forecast_future(model, last_sequence, future_months, scaler, lambda_param)
    
    print("Predictions for the next 36 months:")
    for i, pred in enumerate(future_predictions):
        print(f"Month {i+1}: {pred[0]:.2f}")
    
    # Plot future predictions
    plt.figure(figsize=(12, 6))
    plt.plot(range(len(y_test)), y_test, label='Historical Data')
    plt.plot(range(len(y_test), len(y_test) + future_months), future_predictions, label='Future Predictions')
    plt.title(f"{attack_column} - Historical Data and Future Predictions")
    plt.legend()
    plt.show()

ModuleNotFoundError: No module named 'tensorflow.keras.wrappers'

In [6]:
pip install scikeras

Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Collecting joblib>=1.2.0 (from scikit-learn>=1.4.2->scikeras)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Installing collected packages: joblib, scikeras
  Attempting uninstall: joblib
    Found existing installation: joblib 1.1.1
    Uninstalling joblib-1.1.1:
      Successfully uninstalled joblib-1.1.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pandas-profiling 3.2.0 requires joblib~=1.1.0, but you have joblib 1.4.2 which is incompatible.[0m[31m
[0mSuccessfully installed joblib-1.4.2 scikeras-0.13.0
Note: you may need to restart the kernel to use updated packages.


In [9]:
pip install joblib==1.1.0


Collecting joblib==1.1.0
  Downloading joblib-1.1.0-py2.py3-none-any.whl.metadata (5.2 kB)
Downloading joblib-1.1.0-py2.py3-none-any.whl (306 kB)
Installing collected packages: joblib
  Attempting uninstall: joblib
    Found existing installation: joblib 1.4.2
    Uninstalling joblib-1.4.2:
      Successfully uninstalled joblib-1.4.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
scikit-learn 1.5.1 requires joblib>=1.2.0, but you have joblib 1.1.0 which is incompatible.[0m[31m
[0mSuccessfully installed joblib-1.1.0
Note: you may need to restart the kernel to use updated packages.


In [10]:
pip check


moviepy 1.0.3 has requirement decorator<5.0,>=4.0.2, but you have decorator 5.1.1.
scikit-learn 1.5.1 has requirement joblib>=1.2.0, but you have joblib 1.1.0.
pydantic-settings 2.4.0 has requirement pydantic>=2.7.0, but you have pydantic 1.10.2.
Note: you may need to restart the kernel to use updated packages.
