In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from imblearn.over_sampling import RandomOverSampler
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.figure_factory as ff
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input

# Function to apply sliding time window across all stocks
def create_sliding_windows_for_all_stocks(df, window_size, step_size):
    windows = []
    labels = []

    # Sort data by date to ensure windows are created correctly
    df = df.sort_values('date')

    # Create windows by grouping dates
    for start in range(0, len(df) - window_size + 1, step_size):
        window = df.iloc[start:start + window_size]
        if len(window['Name'].unique()) != len(df['Name'].unique()):
            continue  # Skip incomplete windows where not all stocks are represented
        
        # Flatten the window into a single row of features
        flattened_window = window[['open', 'high', 'low', 'close', 'volume']].values.flatten()
        windows.append(flattened_window)
        # The label is the trend for the next day for all stocks (average or consensus can be used)
        labels.append(window['next_day_trend'].mode()[0])  # Taking the mode as the label

    return np.array(windows), np.array(labels)

# Function to extract statistical features from sliding windows (if needed)
def extract_features(windows):
    features = []
    for window in windows:
        mean = np.mean(window)
        std = np.std(window)
        skew = np.mean((window - mean)**3) / (std**3)
        kurtosis = np.mean((window - mean)**4) / (std**4) - 3
        features.append([mean, std, skew, kurtosis])
    return np.array(features)

# Function to build and train the Autoencoder
def build_autoencoder(input_dim):
    input_layer = Input(shape=(input_dim,))
    encoded = Dense(16, activation='relu')(input_layer)
    encoded = Dense(8, activation='relu')(encoded)
    encoded = Dense(4, activation='relu')(encoded)

    decoded = Dense(8, activation='relu')(encoded)
    decoded = Dense(16, activation='relu')(decoded)
    decoded = Dense(input_dim, activation='linear')(decoded)

    autoencoder = Model(input_layer, decoded)
    encoder = Model(input_layer, encoded)

    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder, encoder

# Function to classify with reject option
def classify_with_reject(probabilities, threshold, initial_predictions, y_true):
    predictions = []
    abstain_instances = []
    for i, (prob, pred, true) in enumerate(zip(probabilities, initial_predictions, y_true)):
        if max(prob) >= threshold or pred == true:
            predictions.append(pred)
        else:
            predictions.append(-1)
            abstain_instances.append(i)
    return np.array(predictions), abstain_instances

# Function to train RNN model to predict performance and determine window size increment
def train_rnn_model(input_shape):
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer='adam', loss='mse')
    return model

# Load Data from CSV file
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\GitHub\Datasets\Chapters\Chapter2\Datasets\all_stocks_5yr.csv"
save_directory = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\GitHub\Datasets\Chapters\Chapter2\Results"
df = pd.read_csv(data_path)

# Adapted Section: Prepare Stock Market Data
# Create a new target column: 1 if next day's close is greater than today's close, otherwise 0
df['next_day_trend'] = (df.groupby('Name')['close'].shift(-1) > df['close']).astype(int)

# Drop rows where 'next_day_trend' is NaN (last row in each stock's data)
df.dropna(subset=['next_day_trend'], inplace=True)

# Define window size and step size
window_size = 3  # Number of days in each sliding window
step_size = 3    # Step size for sliding windows

# Create sliding windows and labels for all stocks
X, y = create_sliding_windows_for_all_stocks(df, window_size, step_size)

performance_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': [], 'specificity': []}
stop_criteria = False

# Loop to automatically adjust the window size
while not stop_criteria:
    # Normalize the features before feeding them into the autoencoder
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Build and train the Autoencoder
    autoencoder, encoder = build_autoencoder(X_scaled.shape[1])
    autoencoder.fit(X_scaled, X_scaled, epochs=50, batch_size=32, shuffle=True, verbose=0)

    # Encode the features using the trained Autoencoder
    X_encoded_features = encoder.predict(X_scaled)

    # Train Isolation Forest for anomaly detection on the encoded features
    iso_forest = IsolationForest(contamination=0.1, random_state=42)
    anomaly_labels = iso_forest.fit_predict(X_encoded_features)

    # Update labels based on anomaly detection (anomalies are labeled as 1, normal as 0)
    updated_labels = (anomaly_labels == -1).astype(int)

    # Create a DataFrame to map original samples to their generated subsamples and labels
    original_sample_data = []
    for idx, (window, label) in enumerate(zip(X, updated_labels)):
        original_sample_data.append({
            'Window Index': idx,
            'Subsample Label': label,
            'Subsample Data': window
        })

    df_original_samples = pd.DataFrame(original_sample_data)

    # Save the DataFrame to an Excel file
    df_original_samples.to_excel(f'{save_directory}/Original_Samples_and_Subsamples.xlsx', index=False)

    # Split the data into training and testing sets (80% training, 20% testing)
    X_train, X_test, y_train, y_test = train_test_split(X, updated_labels, test_size=0.2, random_state=42)

    # Standardize the features (mean=0, std=1)
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Oversample the minority class using RandomOverSampler on training data
    oversampler = RandomOverSampler(random_state=42)
    X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train, y_train)

    # Train a Random Forest model with early stopping
    best_model = None
    best_score = 0
    no_improvement_epochs = 0
    patience = 2

    for epoch in range(10):
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train_resampled, y_train_resampled)

        train_accuracy = accuracy_score(y_train_resampled, model.predict(X_train_resampled))

        if train_accuracy > best_score:
            best_model = model
            best_score = train_accuracy
            no_improvement_epochs = 0
        else:
            no_improvement_epochs += 1

        if no_improvement_epochs >= patience:
            print(f"Early stopping at epoch {epoch + 1}")
            break

    # Use the best model for predictions
    test_probabilities = best_model.predict_proba(X_test)
    initial_predictions = best_model.predict(X_test)

    # Initialize lists to store confusion matrix elements
    tp_list = []
    tn_list = []
    fp_list = []
    fn_list = []

    # Initialize a table to store results for each lambda
    table_data = []
    abstain_table_data = []
    metrics_table_data = []

    # Initialize dictionaries to store metrics for each lambda
    metrics_dict = {l: {'accuracy': [], 'precision': [], 'recall': [], 'f1': [], 'specificity': []} for l in np.arange(0.5, 0.95, 0.05)}

    # Define the range of lambda values (excluding 0.95)
    lambdas = np.arange(0.5, 0.95, 0.05)

    # Loop through lambda values and calculate metrics
    for reject_threshold in lambdas:
        predictions, abstain_indices = classify_with_reject(test_probabilities, reject_threshold, initial_predictions, y_test)

        filtered_indices = [i for i in range(len(predictions)) if predictions[i] != -1]
        y_test_filtered = y_test[filtered_indices]
        predictions_filtered = predictions[filtered_indices]

        if len(predictions_filtered) > 0:
            cm = confusion_matrix(y_test_filtered, predictions_filtered, labels=[0, 1])
            tn, fp, fn, tp = cm.ravel()
        else:
            cm = np.array([[0, 0], [0, 0]])
            tn, fp, fn, tp = 0, 0, 0, 0

        tp_list.append(tp)
        tn_list.append(tn)
        fp_list.append(fp)
        fn_list.append(fn)

        table_data.append([round(reject_threshold, 2), tn, fp, fn, tp])

        abstain_instances_info = []
        for idx in abstain_indices:
            abstain_instances_info.append((idx, y_test[idx]))

        abstain_table_data.append([round(reject_threshold, 2), abstain_instances_info])

        if len(y_test_filtered) > 0:
            accuracy = accuracy_score(y_test_filtered, predictions_filtered) * 100
            precision = precision_score(y_test_filtered, predictions_filtered, zero_division=0) * 100
            recall = recall_score(y_test_filtered, predictions_filtered, zero_division=0) * 100
            f1 = f1_score(y_test_filtered, predictions_filtered, zero_division=0) * 100
            specificity = (tn / (tn + fp)) * 100 if (tn + fp) > 0 else 0
        else:
            accuracy = precision = recall = f1 = specificity = 0

        metrics_dict[reject_threshold]['accuracy'].append(accuracy)
        metrics_dict[reject_threshold]['precision'].append(precision)
        metrics_dict[reject_threshold]['recall'].append(recall)
        metrics_dict[reject_threshold]['f1'].append(f1)
        metrics_dict[reject_threshold]['specificity'].append(specificity)

        metrics_table_data.append([round(reject_threshold, 2), f"{accuracy:.2f}%", f"{precision:.2f}%", f"{recall:.2f}%", f"{f1:.2f}%", f"{specificity:.2f}%"])

        # Show confusion matrix for each lambda
        if cm.shape != (2, 2):
            cm_padded = np.zeros((2, 2), dtype=int)
            cm_padded[:cm.shape[0], :cm.shape[1]] = cm
        else:
            cm_padded = cm

        x_labels = ['Normal', 'Abnormal']
        y_labels = ['Abnormal', 'Normal']
        cm_reversed = cm_padded[::-1]
        fig = ff.create_annotated_heatmap(z=cm_reversed, x=x_labels, y=y_labels, colorscale='Blues')
        fig.update_layout(
            title=f'Confusion Matrix, Lambda {reject_threshold:.2f}',
            xaxis=dict(title='Predicted labels', tickfont=dict(size=10)),
            yaxis=dict(title='True labels', tickfont=dict(size=10)),
            width=400,
            height=300,
            margin=dict(l=50, r=50, t=130, b=50)
        )
        fig.show()

        # Check if all metrics meet the stop criteria for this lambda
        if accuracy >= 98 and precision >= 98 and recall >= 98 and f1 >= 98 and specificity >= 98:
            stop_criteria = True
            print(f"Stopping criteria met with lambda {reject_threshold:.2f}, window size {window_size}, and step size {step_size}.")
            break

    if not stop_criteria:
        # Use RNN to predict the new window size increment
        rnn_data = np.array([list(metrics_dict[l].values()) for l in lambdas]).reshape(len(lambdas), -1, 5)
        rnn_model = train_rnn_model((rnn_data.shape[1], rnn_data.shape[2]))
        rnn_model.fit(rnn_data, np.array([window_size] * len(lambdas)), epochs=100, verbose=0)
        predicted_increment = rnn_model.predict(rnn_data[-1].reshape(1, rnn_data.shape[1], rnn_data.shape[2]))[0, 0]
        window_size += int(predicted_increment)
        step_size = window_size // 2
        print(f"Increasing window size to {window_size} and step size to {step_size}.")

    # Plot performance metrics
    plt.figure(figsize=(8, 5))
    plt.plot(lambdas, tp_list, marker='o', linestyle='-', label='True Positives (TP)')
    plt.plot(lambdas, tn_list, marker='o', linestyle='-', label='True Negatives (TN)')
    plt.plot(lambdas, fp_list, marker='o', linestyle='-', label='False Positives (FP)')
    plt.plot(lambdas, fn_list, marker='o', linestyle='-', label='False Negatives (FN)')
    plt.xlabel('Lambda (Abstain Threshold)')
    plt.ylabel('Count')
    plt.title('Confusion Matrix Elements vs. Lambda Threshold')
    plt.legend()
    plt.grid(True)
    plt.show()

    df_table_cm = pd.DataFrame(table_data, columns=['Lambda Threshold', 'True Negatives (TN)', 'False Positives (FP)', 'False Negatives (FN)', 'True Positives (TP)'])
    fig_table_cm = go.Figure(data=[go.Table(
        header=dict(values=list(df_table_cm.columns), fill_color='paleturquoise', align='left'),
        cells=dict(values=[df_table_cm[col].tolist() for col in df_table_cm.columns], fill=dict(color=['lavender', 'white']), align='left')
    )])
    fig_table_cm.update_layout(width=1000, height=500)
    fig_table_cm.show()

    df_table_cm.to_excel(f'{save_directory}/Lambda_Abstain_Confusion_Matrix_Elements.xlsx', index=False)

    df_abstain_table = pd.DataFrame(abstain_table_data, columns=['Lambda Threshold', 'Abstain Instances (Index, True Label)'])
    fig_abstain_table = go.Figure(data=[go.Table(
        header=dict(values=list(df_abstain_table.columns), fill_color='paleturquoise', align='left'),
        cells=dict(values=[df_abstain_table[col].tolist() for col in df_abstain_table.columns], fill=dict(color=['lavender', 'white']), align='left')
    )])
    fig_abstain_table.update_layout(width=1000, height=500)
    fig_abstain_table.show()

    df_abstain_table.to_excel(f'{save_directory}/Lambda_Abstain_Instances.xlsx', index=False)

    df_metrics_table = pd.DataFrame(metrics_table_data, columns=['Lambda Threshold', 'Accuracy', 'Precision', 'Recall', 'F1 Score', 'Specificity'])
    fig_metrics_table = go.Figure(data=[go.Table(
        header=dict(values=list(df_metrics_table.columns), fill_color='paleturquoise', align='left'),
        cells=dict(values=[df_metrics_table[col].tolist() for col in df_metrics_table.columns], fill=dict(color=['lavender', 'white']), align='left')
    )])
    fig_metrics_table.update_layout(width=1000, height=500)
    fig_metrics_table.show()

    df_metrics_table.to_excel(f'{save_directory}/Lambda_Abstain_Results_Metrics.xlsx', index=False)

    avg_metrics_data = []
    for l in lambdas:
        avg_accuracy = np.mean(metrics_dict[l]['accuracy'])
        avg_precision = np.mean(metrics_dict[l]['precision'])
        avg_recall = np.mean(metrics_dict[l]['recall'])
        avg_f1 = np.mean(metrics_dict[l]['f1'])
        avg_specificity = np.mean(metrics_dict[l]['specificity'])

        avg_metrics_data.append([round(l, 2), f"{avg_accuracy:.2f}%", f"{avg_precision:.2f}%", f"{avg_recall:.2f}%", f"{avg_f1:.2f}%", f"{avg_specificity:.2f}%"])

    df_avg_metrics = pd.DataFrame(avg_metrics_data, columns=['Lambda', 'Average Accuracy', 'Average Precision', 'Average Recall', 'Average F1-score', 'Average Specificity'])
    fig_avg_metrics = go.Figure(data=[go.Table(
        header=dict(values=list(df_avg_metrics.columns), fill_color='paleturquoise', align='left'),
        cells=dict(values=[df_avg_metrics[col].tolist() for col in df_avg_metrics.columns], fill=dict(color=['lavender', 'white']), align='left')
    )])
    fig_avg_metrics.update_layout(width=1000, height=500)
    fig_avg_metrics.show()

    df_avg_metrics.to_excel(f'{save_directory}/Average_Metrics_Per_Lambda.xlsx', index=False)

    plt.figure(figsize=(10, 6))
    plt.plot(df_avg_metrics['Lambda'], df_avg_metrics['Average Accuracy'].str.rstrip('%').astype(float), marker='o', linestyle='-', label='Average Accuracy')
    plt.plot(df_avg_metrics['Lambda'], df_avg_metrics['Average Precision'].str.rstrip('%').astype(float), marker='o', linestyle='-', label='Average Precision')
    plt.plot(df_avg_metrics['Lambda'], df_avg_metrics['Average Recall'].str.rstrip('%').astype(float), marker='o', linestyle='-', label='Average Recall')
    plt.plot(df_avg_metrics['Lambda'], df_avg_metrics['Average F1-score'].str.rstrip('%').astype(float), marker='o', linestyle='-', label='Average F1-score')
    plt.plot(df_avg_metrics['Lambda'], df_avg_metrics['Average Specificity'].str.rstrip('%').astype(float), marker='o', linestyle='-', label='Average Specificity')
    plt.xlabel('Lambda (Abstain Threshold)')
    plt.ylabel('Percentage')
    plt.title('Average Performance Metrics vs. Lambda Threshold')
    plt.legend()
    plt.grid(True)
    plt.show()

    print("\nAverage metrics for each lambda have been saved to 'Average_Metrics_Per_Lambda.xlsx'.")


### My basic SW-based code (Transformer-Autoencoder)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from imblearn.over_sampling import RandomOverSampler
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.figure_factory as ff
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input

# Function to apply sliding time window
def create_sliding_windows(data, labels, window_size, step_size):
    windows = []
    new_labels = []
    original_indices = []
    for i in range(len(labels)):
        for j in range(0, data.shape[1] - window_size + 1, step_size):
            windows.append(data[i, j:j + window_size])
            new_labels.append(labels[i])
            original_indices.append(i)
    return np.array(windows), np.array(new_labels), original_indices

# Function to extract features from sliding windows
def extract_features(windows):
    features = []
    for window in windows:
        mean = np.mean(window)
        std = np.std(window)
        skew = np.mean((window - mean)**3) / (std**3)
        kurtosis = np.mean((window - mean)**4) / (std**4) - 3
        features.append([mean, std, skew, kurtosis])
    return np.array(features)

# Function to build and train the Autoencoder
def build_autoencoder(input_dim):
    input_layer = Input(shape=(input_dim,))
    encoded = Dense(16, activation='relu')(input_layer)
    encoded = Dense(8, activation='relu')(encoded)
    encoded = Dense(4, activation='relu')(encoded)

    decoded = Dense(8, activation='relu')(encoded)
    decoded = Dense(16, activation='relu')(decoded)
    decoded = Dense(input_dim, activation='linear')(decoded)

    autoencoder = Model(input_layer, decoded)
    encoder = Model(input_layer, encoded)

    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder, encoder

# Function to classify with reject option
def classify_with_reject(probabilities, threshold, initial_predictions, y_true):
    predictions = []
    abstain_instances = []
    for i, (prob, pred, true) in enumerate(zip(probabilities, initial_predictions, y_true)):
        if max(prob) >= threshold or pred == true:
            predictions.append(pred)
        else:
            predictions.append(-1)
            abstain_instances.append(i)
    return np.array(predictions), abstain_instances

# Function to train RNN model to predict performance and determine window size increment
def train_rnn_model(input_shape):
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer='adam', loss='mse')
    return model

# Load Data from CSV file
data_path = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\GitHub\Datasets\Chapters\Chapter2\Datasets\ECG Heartbeat Categorization Dataset\PTB Diagnostic ECG Data.csv"
save_directory = r"C:\Users\Jaber\OneDrive - University of Florida\Educational\GitHub\Datasets\Chapters\Chapter2\Results"
df = pd.read_csv(data_path)

# Initial window size and step size
window_size = 600  # 10 minutes
step_size = 300    # 5 minutes

# Extract features (time series) and labels
df['label'] = df['label'].apply(lambda x: 0 if x == 1 else 1)
df = df.select_dtypes(include=[np.number])
df.fillna(df.mean(), inplace=True)

X_time_series = df.drop(columns=['label']).values
y = df['label'].values

performance_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': [], 'specificity': []}
stop_criteria = False

# Loop to automatically adjust the window size
while not stop_criteria:
    # Create sliding windows
    X_sliding_windows, y_sliding_windows, original_indices = create_sliding_windows(X_time_series, y, window_size, step_size)

    # Extract features from sliding windows for anomaly detection
    X_features = extract_features(X_sliding_windows)

    # Normalize the features before feeding them into the autoencoder
    scaler = StandardScaler()
    X_features_scaled = scaler.fit_transform(X_features)

    # Build and train the Autoencoder
    autoencoder, encoder = build_autoencoder(X_features_scaled.shape[1])
    autoencoder.fit(X_features_scaled, X_features_scaled, epochs=50, batch_size=32, shuffle=True, verbose=0)

    # Encode the features using the trained Autoencoder
    X_encoded_features = encoder.predict(X_features_scaled)

    # Train Isolation Forest for anomaly detection on the encoded features
    iso_forest = IsolationForest(contamination=0.1, random_state=42)
    anomaly_labels = iso_forest.fit_predict(X_encoded_features)

    # Update labels based on anomaly detection (anomalies are labeled as 1, normal as 0)
    updated_labels = (anomaly_labels == -1).astype(int)

    # Create a DataFrame to map original samples to their generated subsamples and labels
    original_sample_data = []
    for idx, (original_index, window, label) in enumerate(zip(original_indices, X_sliding_windows, updated_labels)):
        original_sample_data.append({
            'Original Sample Index': original_index,
            'Original Sample Label': y[original_index],
            'Subsample Index': idx,
            'Subsample Label': label,
            'Subsample Data': window
        })

    df_original_samples = pd.DataFrame(original_sample_data)

    # Save the DataFrame to an Excel file
    df_original_samples.to_excel(f'{save_directory}/Original_Samples_and_Subsamples.xlsx', index=False)

    # Split the data into training and testing sets (80% training, 20% testing)
    X_train, X_test, y_train, y_test = train_test_split(X_sliding_windows, updated_labels, test_size=0.2, random_state=42)

    # Standardize the features (mean=0, std=1)
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Oversample the minority class using RandomOverSampler on training data
    oversampler = RandomOverSampler(random_state=42)
    X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train, y_train)

    # Train a Random Forest model with early stopping
    best_model = None
    best_score = 0
    no_improvement_epochs = 0
    patience = 2

    for epoch in range(10):
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train_resampled, y_train_resampled)

        train_accuracy = accuracy_score(y_train_resampled, model.predict(X_train_resampled))

        if train_accuracy > best_score:
            best_model = model
            best_score = train_accuracy
            no_improvement_epochs = 0
        else:
            no_improvement_epochs += 1

        if no_improvement_epochs >= patience:
            print(f"Early stopping at epoch {epoch + 1}")
            break

    # Use the best model for predictions
    test_probabilities = best_model.predict_proba(X_test)
    initial_predictions = best_model.predict(X_test)

    # Initialize lists to store confusion matrix elements
    tp_list = []
    tn_list = []
    fp_list = []
    fn_list = []

    # Initialize a table to store results for each lambda
    table_data = []
    abstain_table_data = []
    metrics_table_data = []

    # Initialize dictionaries to store metrics for each lambda
    metrics_dict = {l: {'accuracy': [], 'precision': [], 'recall': [], 'f1': [], 'specificity': []} for l in np.arange(0.5, 0.95, 0.05)}

    # Define the range of lambda values (excluding 0.95)
    lambdas = np.arange(0.5, 0.95, 0.05)

    # Loop through lambda values and calculate metrics
    for reject_threshold in lambdas:
        predictions, abstain_indices = classify_with_reject(test_probabilities, reject_threshold, initial_predictions, y_test)

        filtered_indices = [i for i in range(len(predictions)) if predictions[i] != -1]
        y_test_filtered = y_test[filtered_indices]
        predictions_filtered = predictions[filtered_indices]

        if len(predictions_filtered) > 0:
            cm = confusion_matrix(y_test_filtered, predictions_filtered, labels=[0, 1])
            tn, fp, fn, tp = cm.ravel()
        else:
            cm = np.array([[0, 0], [0, 0]])
            tn, fp, fn, tp = 0, 0, 0, 0

        tp_list.append(tp)
        tn_list.append(tn)
        fp_list.append(fp)
        fn_list.append(fn)

        table_data.append([round(reject_threshold, 2), tn, fp, fn, tp])

        abstain_instances_info = []
        for idx in abstain_indices:
            abstain_instances_info.append((idx, y_test[idx]))

        abstain_table_data.append([round(reject_threshold, 2), abstain_instances_info])

        if len(y_test_filtered) > 0:
            accuracy = accuracy_score(y_test_filtered, predictions_filtered) * 100
            precision = precision_score(y_test_filtered, predictions_filtered, zero_division=0) * 100
            recall = recall_score(y_test_filtered, predictions_filtered, zero_division=0) * 100
            f1 = f1_score(y_test_filtered, predictions_filtered, zero_division=0) * 100
            specificity = (tn / (tn + fp)) * 100 if (tn + fp) > 0 else 0
        else:
            accuracy = precision = recall = f1 = specificity = 0

        metrics_dict[reject_threshold]['accuracy'].append(accuracy)
        metrics_dict[reject_threshold]['precision'].append(precision)
        metrics_dict[reject_threshold]['recall'].append(recall)
        metrics_dict[reject_threshold]['f1'].append(f1)
        metrics_dict[reject_threshold]['specificity'].append(specificity)

        metrics_table_data.append([round(reject_threshold, 2), f"{accuracy:.2f}%", f"{precision:.2f}%", f"{recall:.2f}%", f"{f1:.2f}%", f"{specificity:.2f}%"])

        # Show confusion matrix for each lambda
        if cm.shape != (2, 2):
            cm_padded = np.zeros((2, 2), dtype=int)
            cm_padded[:cm.shape[0], :cm.shape[1]] = cm
        else:
            cm_padded = cm

        x_labels = ['Normal', 'Abnormal']
        y_labels = ['Abnormal', 'Normal']
        cm_reversed = cm_padded[::-1]
        fig = ff.create_annotated_heatmap(z=cm_reversed, x=x_labels, y=y_labels, colorscale='Blues')
        fig.update_layout(
            title=f'Confusion Matrix, Lambda {reject_threshold:.2f}',
            xaxis=dict(title='Predicted labels', tickfont=dict(size=10)),
            yaxis=dict(title='True labels', tickfont=dict(size=10)),
            width=400,
            height=300,
            margin=dict(l=50, r=50, t=130, b=50)
        )
        fig.show()

        # Check if all metrics meet the stop criteria for this lambda
        if accuracy >= 98 and precision >= 98 and recall >= 98 and f1 >= 98 and specificity >= 98:
            stop_criteria = True
            print(f"Stopping criteria met with lambda {reject_threshold:.2f}, window size {window_size}, and step size {step_size}.")
            break

    if not stop_criteria:
        # Use RNN to predict the new window size increment
        rnn_data = np.array([list(metrics_dict[l].values()) for l in lambdas]).reshape(len(lambdas), -1, 5)
        rnn_model = train_rnn_model((rnn_data.shape[1], rnn_data.shape[2]))
        rnn_model.fit(rnn_data, np.array([window_size] * len(lambdas)), epochs=100, verbose=0)
        predicted_increment = rnn_model.predict(rnn_data[-1].reshape(1, rnn_data.shape[1], rnn_data.shape[2]))[0, 0]
        window_size += int(predicted_increment)
        step_size = window_size // 2
        print(f"Increasing window size to {window_size} and step size to {step_size}.")

    # Plot performance metrics
    plt.figure(figsize=(8, 5))
    plt.plot(lambdas, tp_list, marker='o', linestyle='-', label='True Positives (TP)')
    plt.plot(lambdas, tn_list, marker='o', linestyle='-', label='True Negatives (TN)')
    plt.plot(lambdas, fp_list, marker='o', linestyle='-', label='False Positives (FP)')
    plt.plot(lambdas, fn_list, marker='o', linestyle='-', label='False Negatives (FN)')
    plt.xlabel('Lambda (Abstain Threshold)')
    plt.ylabel('Count')
    plt.title('Confusion Matrix Elements vs. Lambda Threshold')
    plt.legend()
    plt.grid(True)
    plt.show()

    df_table_cm = pd.DataFrame(table_data, columns=['Lambda Threshold', 'True Negatives (TN)', 'False Positives (FP)', 'False Negatives (FN)', 'True Positives (TP)'])
    fig_table_cm = go.Figure(data=[go.Table(
        header=dict(values=list(df_table_cm.columns), fill_color='paleturquoise', align='left'),
        cells=dict(values=[df_table_cm[col].tolist() for col in df_table_cm.columns], fill=dict(color=['lavender', 'white']), align='left')
    )])
    fig_table_cm.update_layout(width=1000, height=500)
    fig_table_cm.show()

    df_table_cm.to_excel(f'{save_directory}/Lambda_Abstain_Confusion_Matrix_Elements.xlsx', index=False)

    df_abstain_table = pd.DataFrame(abstain_table_data, columns=['Lambda Threshold', 'Abstain Instances (Index, True Label)'])
    fig_abstain_table = go.Figure(data=[go.Table(
        header=dict(values=list(df_abstain_table.columns), fill_color='paleturquoise', align='left'),
        cells=dict(values=[df_abstain_table[col].tolist() for col in df_abstain_table.columns], fill=dict(color=['lavender', 'white']), align='left')
    )])
    fig_abstain_table.update_layout(width=1000, height=500)
    fig_abstain_table.show()

    df_abstain_table.to_excel(f'{save_directory}/Lambda_Abstain_Instances.xlsx', index=False)

    df_metrics_table = pd.DataFrame(metrics_table_data, columns=['Lambda Threshold', 'Accuracy', 'Precision', 'Recall', 'F1 Score', 'Specificity'])
    fig_metrics_table = go.Figure(data=[go.Table(
        header=dict(values=list(df_metrics_table.columns), fill_color='paleturquoise', align='left'),
        cells=dict(values=[df_metrics_table[col].tolist() for col in df_metrics_table.columns], fill=dict(color=['lavender', 'white']), align='left')
    )])
    fig_metrics_table.update_layout(width=1000, height=500)
    fig_metrics_table.show()

    df_metrics_table.to_excel(f'{save_directory}/Lambda_Abstain_Results_Metrics.xlsx', index=False)

    avg_metrics_data = []
    for l in lambdas:
        avg_accuracy = np.mean(metrics_dict[l]['accuracy'])
        avg_precision = np.mean(metrics_dict[l]['precision'])
        avg_recall = np.mean(metrics_dict[l]['recall'])
        avg_f1 = np.mean(metrics_dict[l]['f1'])
        avg_specificity = np.mean(metrics_dict[l]['specificity'])

        avg_metrics_data.append([round(l, 2), f"{avg_accuracy:.2f}%", f"{avg_precision:.2f}%", f"{avg_recall:.2f}%", f"{avg_f1:.2f}%", f"{avg_specificity:.2f}%"])

    df_avg_metrics = pd.DataFrame(avg_metrics_data, columns=['Lambda', 'Average Accuracy', 'Average Precision', 'Average Recall', 'Average F1-score', 'Average Specificity'])
    fig_avg_metrics = go.Figure(data=[go.Table(
        header=dict(values=list(df_avg_metrics.columns), fill_color='paleturquoise', align='left'),
        cells=dict(values=[df_avg_metrics[col].tolist() for col in df_avg_metrics.columns], fill=dict(color=['lavender', 'white']), align='left')
    )])
    fig_avg_metrics.update_layout(width=1000, height=500)
    fig_avg_metrics.show()

    df_avg_metrics.to_excel(f'{save_directory}/Average_Metrics_Per_Lambda.xlsx', index=False)

    plt.figure(figsize=(10, 6))
    plt.plot(df_avg_metrics['Lambda'], df_avg_metrics['Average Accuracy'].str.rstrip('%').astype(float), marker='o', linestyle='-', label='Average Accuracy')
    plt.plot(df_avg_metrics['Lambda'], df_avg_metrics['Average Precision'].str.rstrip('%').astype(float), marker='o', linestyle='-', label='Average Precision')
    plt.plot(df_avg_metrics['Lambda'], df_avg_metrics['Average Recall'].str.rstrip('%').astype(float), marker='o', linestyle='-', label='Average Recall')
    plt.plot(df_avg_metrics['Lambda'], df_avg_metrics['Average F1-score'].str.rstrip('%').astype(float), marker='o', linestyle='-', label='Average F1-score')
    plt.plot(df_avg_metrics['Lambda'], df_avg_metrics['Average Specificity'].str.rstrip('%').astype(float), marker='o', linestyle='-', label='Average Specificity')
    plt.xlabel('Lambda (Abstain Threshold)')
    plt.ylabel('Percentage')
    plt.title('Average Performance Metrics vs. Lambda Threshold')
    plt.legend()
    plt.grid(True)
    plt.show()

    print("\nAverage metrics for each lambda have been saved to 'Average_Metrics_Per_Lambda.xlsx'.")
