In [1]:
# Set root directory of the project as the current working directory
import os
initial_dir = os.getcwd()  # Save initial directory (notebooks/)
os.chdir('..')  # Move to project/

from config.defaults import Config
from src.models import load_preprocessed_data, train_lstm_model
#from src.TimeSHAP import local_event_explainer
#from timeshap.explainer.kernel import TimeShapKernel
from tensorflow.keras.models import Model

# Load the default configuration
config = Config()

import plotly.graph_objects as go
import numpy as np
import shap
from scipy.stats import pearsonr
import pandas as pd

# set the random seed for reproducibility
np.random.seed(1)


In [2]:
import logging

# Suppress all INFO logs at the root level
logging.getLogger().setLevel(logging.WARNING)

In [3]:
# Load preprocessed data
model_task = "lstm_regression"

config = Config()

X_train, X_val, X_test, y_train_reg, y_val_reg, y_test_reg, metadata = load_preprocessed_data(model_task = model_task, eol_capacity=config.eol_capacity)

# Train the model
model, history = train_lstm_model(config, X_train, y_train_reg, X_val, y_val_reg)

Epoch 1/50
[1m141/142[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - loss: 0.0350 - mae: 0.1366
Epoch 1: val_loss improved from inf to 0.00520, saving model to experiments/models/lstm_regression_eol65_20250310_121848_best.keras
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 0.0348 - mae: 0.1360 - val_loss: 0.0052 - val_mae: 0.0535
Epoch 2/50
[1m140/142[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 8ms/step - loss: 0.0074 - mae: 0.0645
Epoch 2: val_loss improved from 0.00520 to 0.00413, saving model to experiments/models/lstm_regression_eol65_20250310_121848_best.keras
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.0074 - mae: 0.0645 - val_loss: 0.0041 - val_mae: 0.0524
Epoch 3/50
[1m141/142[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - loss: 0.0059 - mae: 0.0577
Epoch 3: val_loss did not improve from 0.00413
[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

In [5]:
# Force graph initialization
dummy_input = np.zeros((1, 120, 1))  # Matches expected shape (None, 120, 1)
model.predict(dummy_input)  # Builds the graph

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step


array([[0.12176113]], dtype=float32)

In [12]:
test = X_test[0]

pred, hs = f(test)
pred, hs

AttributeError: The layer sequential has never been called and thus has no defined input.

In [None]:
# Prediction function for TimeSHAP
def f(X, model):
    inputs = model.input
    outputs = model.output
    hidden_states = [layer.output for layer in model.layers if "lstm" in layer.name.lower()]
    model_with_hs = tf.keras.Model(inputs=inputs, outputs=[outputs] + hidden_states)
    
    X_reshaped = X.reshape(-1, 120, 1) if len(X.shape) == 2 else X
    predictions, *hs = model_with_hs.predict(X_reshaped, verbose=0)
    
    return predictions, tuple(hs)

# Prediction function for SHAP
def f_shap(X):
    X_reshaped = X.reshape(-1, 120, 1) if len(X.shape) == 2 else X
    return model.predict(X_reshaped, verbose=0)

# Prepare the baseline for SHAP
baseline_shap = X_train[:50].reshape(-1, 120) # Shape: (50, 120)

# Baseline for TimeSHAP (subset of training data)
baseline_timeshap = X_train[:50]  # Shape: (50, 120, 1)

In [9]:
# Local event explainer
def local_event_explainer(f, data, baseline, mode, varying=None, pruned_idx=0, reverse=False, random_seed=42, nsamples=3000):
    explainer = TimeShapKernel(f, baseline, random_seed, mode=mode, varying=varying)
    shap_values_arr = explainer.shap_values(data, pruning_idx=pruned_idx, nsamples=nsamples)
    if reverse:
        shap_values_arr = shap_values_arr[::-1]
    sequence_values = data[0, pruned_idx:, 0]
    return pd.DataFrame({"Shapley Value": shap_values_arr, "Sequence Value": sequence_values})

# Compute correlations
def compute_correlations(test_data, baseline_shap, baseline_timeshap, label, mode="event", varying=None, pruned_idx=0, reverse=False):
    correlations = []
    for idx in range(test_data.shape[0]):
        test_3d = test_data[idx:idx+1]
        test_2d = test_3d.reshape(1, -1)
        
        # SHAP
        explainer = shap.KernelExplainer(f_shap, baseline_shap)
        shap_values = explainer.shap_values(test_2d, silent=True, nsamples=3000)
        shap_first = shap_values[0].flatten()
        
        # TimeSHAP
        event_explanation = local_event_explainer(
            f, test_3d, baseline_timeshap, mode=mode, varying=varying, pruned_idx=pruned_idx, reverse=reverse, nsamples=3000
        )
        
        correlation, _ = pearsonr(shap_first, event_explanation["Shapley Value"])
        correlations.append(correlation)
        print(f"Correlation for sequence {idx} ({label}, mode={mode}, pruned_idx={pruned_idx}, reverse={reverse}): {correlation:.4f}")
        
        if idx < 3:
            fig = go.Figure()
            fig.add_trace(go.Scatter(x=np.arange(120), y=shap_first, mode='lines+markers', name='SHAP', line=dict(color='blue')))
            fig.add_trace(go.Scatter(x=event_explanation.index, y=event_explanation["Shapley Value"], mode='lines+markers', name='TimeSHAP', line=dict(color='red')))
            fig.update_layout(
                title=f"SHAP vs TimeSHAP for Sequence {idx} ({label}, mode={mode}, pruned_idx={pruned_idx}, reverse={reverse})",
                xaxis_title="Timestep",
                yaxis_title="Explanation Value",
                template="plotly_white"
            )
            fig.add_hline(y=0, line_dash="dash", line_color="gray")
            fig.show()

    print(f"\nResults for {label} (mode={mode}, pruned_idx={pruned_idx}, reverse={reverse}):")
    print(f"Mean Correlation: {np.mean(correlations):.4f}")
    print(f"Median Correlation: {np.median(correlations):.4f}")
    print(f"Min Correlation: {np.min(correlations):.4f}")
    print(f"Max Correlation: {np.max(correlations):.4f}")
    print(f"Standard Deviation: {np.std(correlations):.4f}")
    return correlations

In [10]:
# sample 5 sequences from the test data using numpy
np.random.seed(42)
sample_idx = np.random.choice(np.arange(X_test.shape[0]), 5, replace=False)
test_data = X_test[sample_idx]


correlations_event_hs = compute_correlations(test_data, baseline_shap, baseline_timeshap, "Synthetic Background", mode="event", pruned_idx=0, reverse=True)

NameError: name 'TimeShapKernel' is not defined

In [17]:
# Compute correlations for multiple sequences
n_sequences = 20  # Number of sequences to test
seq_indices = np.random.randint(0, len(X_test), size=n_sequences)
correlations = []

In [8]:
for idx in seq_indices:
    # Prepare test sequence
    test_3d = X_test[idx:idx+1]  # Shape: (1, 120, 1)
    test_2d = test_3d.reshape(1, -1)  # Shape: (1, 120)
    
    # SHAP Computation
    explainer = shap.KernelExplainer(predict_wrapper, baseline_shap)
    shap_values = explainer.shap_values(test_2d, silent=True, nsamples=3000)
    shap_first = shap_values[0].flatten()  # Shape: (120,)
    
    # TimeSHAP Computation
    event_explanation = local_event_explainer(
        f,
        test_3d,
        baseline_timeshap,
        pruned_idx=0,
        nsamples=3000
    )
    
    # Compute correlation
    correlation, _ = pearsonr(shap_first, event_explanation["Shapley Value"])
    correlations.append(correlation)
    print(f"Correlation for sequence {idx}: {correlation:.4f}")
    
    # Optional: Plot for each sequence
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=np.arange(120), y=shap_first, mode='lines+markers', name='SHAP',
                             line=dict(color='blue')))
    fig.add_trace(go.Scatter(x=event_explanation.index, y=event_explanation["Shapley Value"],
                             mode='lines+markers', name='TimeSHAP',
                             line=dict(color='red')))
    fig.update_layout(
        title=f"SHAP vs TimeSHAP for Test Sequence {idx}",
        xaxis_title="Timestep/Event Number",
        yaxis_title="Explanation Value",
        template="plotly_white"
    )
    fig.add_hline(y=0, line_dash="dash", line_color="gray")
    fig.show()

# Summarize correlations
print(f"\nMean Correlation: {np.mean(correlations):.4f}")
print(f"Median Correlation: {np.median(correlations):.4f}")
print(f"Min Correlation: {np.min(correlations):.4f}")
print(f"Max Correlation: {np.max(correlations):.4f}")

Correlation for sequence 37: 0.9733


Correlation for sequence 235: 0.9854


Correlation for sequence 72: 0.9662


Correlation for sequence 255: 0.9795


Correlation for sequence 203: 0.9719


Correlation for sequence 133: 0.9742


Correlation for sequence 335: 0.9828


Correlation for sequence 144: 0.9846


Correlation for sequence 129: 0.9777


Correlation for sequence 71: 0.9648


Correlation for sequence 237: 0.9866


Correlation for sequence 390: 0.9676


Correlation for sequence 281: 0.9856


Correlation for sequence 178: 0.9326


Correlation for sequence 276: 0.9859


Correlation for sequence 254: 0.9790


Correlation for sequence 357: 0.9852


Correlation for sequence 252: 0.9815


Correlation for sequence 156: 0.9826


Correlation for sequence 50: 0.9431



Mean Correlation: 0.8347
Median Correlation: 0.8571
Min Correlation: 0.4292
Max Correlation: 0.9866


## Sequences with knee point

In [None]:
end_capacities = []
for idx in range(X_test.shape[0]):  # Loop over all sequences
    last_value = X_test[idx, -1, 0]  # Capacity at the end: [idx, 119, 0]
    end_capacities.append((idx, last_value))

# Convert to numpy array for easier manipulation
end_capacities = np.array([(idx, val) for idx, val in end_capacities])

# Step 2: Get indices of the 20 sequences with the lowest end capacity
# Use argsort to get indices sorted by end capacity (ascending), then take the first 20 (lowest)
lowest_indices = np.argsort(end_capacities[:, 1])[:n_sequences]
lowest_indices = end_capacities[lowest_indices, 0].astype(int)  # Extract the sequence indices

# Step 3: Extract the top 20 sequences from X_test
lowest_20_sequences = X_test[lowest_indices]

# Verify the shape
print("Shape of lowest_20_sequences:", lowest_20_sequences.shape)

# Plot the sequences in a line plot
fig = go.Figure()
for idx, sequence in enumerate(lowest_20_sequences):
    fig.add_trace(go.Scatter
    (
        x=np.arange(120),
        y=sequence.flatten(),
        mode='lines',
        name=f"Sequence {idx}",
        line=dict(width=1)
    ))
fig.update_layout(
    title="Lowest Capacity at end Sequences",
    xaxis_title="Timestep",
    yaxis_title="Capacity",
    template="plotly_white"
)
fig.show()

Shape of lowest_20_sequences: (20, 120, 1)


In [32]:
# Initialize list to store correlations
correlations = []

# Loop over the 20 sequences in lowest_20_sequences
for idx in range(lowest_20_sequences.shape[0]):  # 0 to 19
    # Prepare test sequence
    test_3d = lowest_20_sequences[idx:idx+1]  # Shape: (1, 120, 1)
    test_2d = test_3d.reshape(1, -1)       # Shape: (1, 120)

    # SHAP Computation
    explainer = shap.KernelExplainer(predict_wrapper, baseline_shap)
    shap_values = explainer.shap_values(test_2d, silent=True, nsamples=3000)
    shap_first = shap_values[0].flatten()  # Shape: (120,)

    # TimeSHAP Computation
    event_explanation = local_event_explainer(
        f,
        test_3d,
        baseline_timeshap,
        pruned_idx=0,
        nsamples=3000
    )

    # Compute correlation
    correlation, _ = pearsonr(shap_first, event_explanation["Shapley Value"])
    correlations.append(correlation)
    print(f"Correlation for sequence {idx} in top 20: {correlation:.4f}")

    # Optional: Plot for each sequence
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=np.arange(120), y=shap_first, mode='lines+markers', name='SHAP',
                             line=dict(color='blue')))
    fig.add_trace(go.Scatter(x=event_explanation.index, y=event_explanation["Shapley Value"],
                             mode='lines+markers', name='TimeSHAP',
                             line=dict(color='red')))
    fig.update_layout(
        title=f"SHAP vs TimeSHAP for Top 20 Sequence {idx}",
        xaxis_title="Timestep/Event Number",
        yaxis_title="Explanation Value",
        template="plotly_white"
    )
    fig.add_hline(y=0, line_dash="dash", line_color="gray")
    fig.show()

# Summarize correlations
print(f"\nMean Correlation: {np.mean(correlations):.4f}")
print(f"Median Correlation: {np.median(correlations):.4f}")
print(f"Min Correlation: {np.min(correlations):.4f}")
print(f"Max Correlation: {np.max(correlations):.4f}")

Correlation for sequence 0 in top 20: 0.9585


Correlation for sequence 1 in top 20: 0.9600


Correlation for sequence 2 in top 20: 0.9582


Correlation for sequence 3 in top 20: 0.9627


Correlation for sequence 4 in top 20: 0.9596


Correlation for sequence 5 in top 20: 0.9648


Correlation for sequence 6 in top 20: 0.9614


Correlation for sequence 7 in top 20: 0.9609


Correlation for sequence 8 in top 20: 0.9655


Correlation for sequence 9 in top 20: 0.9619


Correlation for sequence 10 in top 20: 0.9668


Correlation for sequence 11 in top 20: 0.9676


Correlation for sequence 12 in top 20: 0.9631


Correlation for sequence 13 in top 20: 0.9651


Correlation for sequence 14 in top 20: 0.9702


Correlation for sequence 15 in top 20: 0.9690


Correlation for sequence 16 in top 20: 0.9686


Correlation for sequence 17 in top 20: 0.9706


Correlation for sequence 18 in top 20: 0.9731


Correlation for sequence 19 in top 20: 0.9694



Mean Correlation: 0.9649
Median Correlation: 0.9649
Min Correlation: 0.9582
Max Correlation: 0.9731


## With Hidden States

## Synthetic data

In [7]:
# Generate synthetic test data
def generate_synthetic_sequences(n_seq=3, length=120, noise_std=0.1):
    sequences = np.zeros((n_seq, length, 1))
    for i in range(n_seq):
        sequences[i, :60, 0] = 0.9 + np.random.normal(0, noise_std, 60)
        sequences[i, 60:, 0] = np.linspace(0.9, 0.1, 60) + np.random.normal(0, noise_std, 60)
    return sequences

# Generate synthetic background
def generate_synthetic_background(n_seq=50, length=120, drop_range=(20, 100), noise_std=0.1):
    sequences = np.zeros((n_seq, length, 1))
    drop_points = []
    for i in range(n_seq):
        drop_point = np.random.randint(drop_range[0], drop_range[1])
        drop_points.append(drop_point)
        sequences[i, :drop_point, 0] = 0.9 + np.random.normal(0, noise_std, drop_point)
        sequences[i, drop_point:, 0] = np.linspace(0.9, 0.1, length - drop_point) + np.random.normal(0, noise_std, length - drop_point)
    print("Background drop points:", drop_points)
    return sequences

# Generate constant background
def generate_constant_background(n_seq=50, length=120, value=0.9):
    sequences = np.ones((n_seq, length, 1)) * value
    return sequences

# Generate data
synthetic_test_data = generate_synthetic_sequences()
synthetic_background = generate_synthetic_background()
constant_background = generate_constant_background()

# Baselines
baseline_shap_synth = synthetic_background.reshape(50, 120)
baseline_timeshap_synth = synthetic_background
baseline_shap_const = constant_background.reshape(50, 120)
baseline_timeshap_const = constant_background

Background drop points: [20, 97, 79, 21, 86, 52, 73, 80, 71, 73, 59, 20, 32, 32, 35, 43, 29, 87, 58, 32, 28, 91, 73, 36, 72, 41, 90, 39, 45, 61, 80, 80, 93, 63, 28, 57, 52, 26, 24, 29, 45, 94, 97, 85, 73, 79, 60, 33, 81, 87]


In [11]:
correlations_event_hs = compute_correlations(synthetic_test_data, baseline_shap_synth, baseline_timeshap_synth, "Synthetic Background", mode="event", pruned_idx=0, reverse=True)

Provided model function fails when applied to the provided data set.


AttributeError: The layer sequential_1 has never been called and thus has no defined input.

In [25]:
# Mean sequence of training data with Shape: (1, 120)
baseline_seq_2d = np.mean(X_train, axis=0).reshape(1, 120)  # Shape: (1, 120)
baseline_seq_3d = np.mean(X_train, axis=0).reshape(1, 120, 1)  # Shape: (1, 120, 1)

# Compute correlations for multiple sequences
n_sequences = 20  # Number of sequences to test
seq_indices = np.random.randint(0, len(X_test), size=n_sequences)
correlations = []

for idx in seq_indices:
    # Prepare test sequence
    test_3d = X_test[idx:idx+1]  # Shape: (1, 120, 1)
    test_2d = test_3d.reshape(1, -1)  # Shape: (1, 120)
    
    # SHAP Computation
    explainer = shap.KernelExplainer(predict_wrapper, baseline_seq_2d)
    shap_values = explainer.shap_values(test_2d, silent=True)
    shap_first = shap_values[0].flatten()  # Shape: (120,)
    
    # TimeSHAP Computation
    event_explanation = local_event_explainer(
        f,
        test_3d,
        baseline_seq_3d,
        pruned_idx=0,
        nsamples=1000
    )
    
    # Compute correlation
    correlation, _ = pearsonr(shap_first, event_explanation["Shapley Value"])
    correlations.append(correlation)
    print(f"Correlation for sequence {idx}: {correlation:.4f}")
    
    # Optional: Plot for each sequence
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=np.arange(120), y=shap_first, mode='lines+markers', name='SHAP',
                             line=dict(color='blue')))
    fig.add_trace(go.Scatter(x=event_explanation.index, y=event_explanation["Shapley Value"],
                             mode='lines+markers', name='TimeSHAP',
                             line=dict(color='red')))
    fig.update_layout(
        title=f"SHAP vs TimeSHAP for Test Sequence {idx}",
        xaxis_title="Timestep/Event Number",
        yaxis_title="Explanation Value",
        template="plotly_white"
    )
    fig.add_hline(y=0, line_dash="dash", line_color="gray")
    fig.show()

# Summarize correlations
print(f"\nMean Correlation: {np.mean(correlations):.4f}")
print(f"Median Correlation: {np.median(correlations):.4f}")
print(f"Min Correlation: {np.min(correlations):.4f}")
print(f"Max Correlation: {np.max(correlations):.4f}")

Correlation for sequence 42: 0.9719


Correlation for sequence 77: 0.9789


Correlation for sequence 350: 0.9958


Correlation for sequence 322: 0.9923


Correlation for sequence 319: 0.9886


Correlation for sequence 348: 0.9962


Correlation for sequence 358: 0.9876


Correlation for sequence 51: 0.9895


Correlation for sequence 16: 0.9479


Correlation for sequence 210: 0.9940


Correlation for sequence 226: 0.9953


Correlation for sequence 32: 0.9628


Correlation for sequence 327: 0.9965


Correlation for sequence 336: 0.9999


Correlation for sequence 300: 0.9662


Correlation for sequence 76: 0.9880


Correlation for sequence 67: 0.9997


Correlation for sequence 49: 0.9864


Correlation for sequence 157: 0.9562


Correlation for sequence 197: 0.9999



Mean Correlation: 0.9847
Median Correlation: 0.9891
Min Correlation: 0.9479
Max Correlation: 0.9999


### Test with synthetic data

In [43]:
# Generate synthetic test data with noise in pre-drop period
def generate_synthetic_sequences(n_seq=20, length=120, noise_std=0.1):
    sequences = np.zeros((n_seq, length, 1))
    for i in range(n_seq):
        # Noisy constant capacity before drop
        sequences[i, :60, 0] = 0.9 + np.random.normal(0, noise_std, 60)
        # Linear decline after drop with noise
        sequences[i, 60:, 0] = np.linspace(0.9, 0.1, 60) + np.random.normal(0, noise_std, 60)
    return sequences

# Generate synthetic background with wider drop range
def generate_synthetic_background(n_seq=50, length=120, drop_range=(20, 100), noise_std=0.1):
    sequences = np.zeros((n_seq, length, 1))
    drop_points = []
    for i in range(n_seq):
        drop_point = np.random.randint(drop_range[0], drop_range[1])
        drop_points.append(drop_point)
        # Noisy constant capacity before drop
        sequences[i, :drop_point, 0] = 0.9 + np.random.normal(0, noise_std, drop_point)
        # Linear decline after drop with noise, ensuring continuity
        post_drop_length = length - drop_point
        linear_part = np.linspace(0.9, 0.1, post_drop_length)
        noise = np.random.normal(0, noise_std, post_drop_length)
        sequences[i, drop_point:, 0] = linear_part + noise
    return sequences

# Generate synthetic test and background data
synthetic_test_data = generate_synthetic_sequences()
synthetic_background = generate_synthetic_background()

# Update the baseline for SHAP and TimeSHAP
baseline_shap = synthetic_background.reshape(50, 120)  # Shape: (50, 120)
baseline_timeshap = synthetic_background  # Shape: (50, 120, 1)

In [None]:
correlations = []

for idx in range(synthetic_test_data.shape[0]):
    # Prepare test sequence
    test_3d = synthetic_test_data[idx:idx+1]  # Shape: (1, 120, 1)
    test_2d = test_3d.reshape(1, -1)  # Shape: (1, 120)
    
    # SHAP Computation
    explainer = shap.KernelExplainer(predict_wrapper, baseline_shap)
    shap_values = explainer.shap_values(test_2d, silent=True, n_samples=3000)
    shap_first = shap_values[0].flatten()  # Shape: (120,)
    
    # TimeSHAP Computation
    event_explanation = local_event_explainer(
        f,
        test_3d,
        baseline_timeshap,
        pruned_idx=0,
        nsamples=3000
    )
    
    # Compute correlation
    correlation, _ = pearsonr(shap_first, event_explanation["Shapley Value"])
    correlations.append(correlation)
    print(f"Correlation for sequence {idx}: {correlation:.4f}")
    
    # Optional: Plot for each sequence
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=np.arange(120), y=shap_first, mode='lines+markers', name='SHAP',
                             line=dict(color='blue')))
    fig.add_trace(go.Scatter(x=event_explanation.index, y=event_explanation["Shapley Value"],
                             mode='lines+markers', name='TimeSHAP',
                             line=dict(color='red')))
    fig.update_layout(
        title=f"SHAP vs TimeSHAP for Test Sequence {idx}",
        xaxis_title="Timestep/Event Number",
        yaxis_title="Explanation Value",
        template="plotly_white"
    )
    fig.add_hline(y=0, line_dash="dash", line_color="gray")
    fig.show()

# Summarize correlations
print(f"\nMean Correlation: {np.mean(correlations):.4f}")
print(f"Median Correlation: {np.median(correlations):.4f}")
print(f"Min Correlation: {np.min(correlations):.4f}")
print(f"Max Correlation: {np.max(correlations):.4f}")

Correlation for sequence 0: 0.9994


Correlation for sequence 1: 0.9985


Correlation for sequence 2: 0.9988


Correlation for sequence 3: 0.9935


Correlation for sequence 4: 0.9979


Correlation for sequence 5: 0.9986


Correlation for sequence 6: 0.9977


Correlation for sequence 7: 0.9994


KeyboardInterrupt: 

In [6]:
from timeshap.explainer.kernel import TimeShapKernel
import pandas as pd

# Generate synthetic test data
def generate_synthetic_sequences(n_seq=3, length=120, noise_std=0.1):
    sequences = np.zeros((n_seq, length, 1))
    for i in range(n_seq):
        sequences[i, :60, 0] = 0.9 + np.random.normal(0, noise_std, 60)
        sequences[i, 60:, 0] = np.linspace(0.9, 0.1, 60) + np.random.normal(0, noise_std, 60)
    return sequences

# Generate synthetic background
def generate_synthetic_background(n_seq=50, length=120, drop_range=(20, 100), noise_std=0.1):
    sequences = np.zeros((n_seq, length, 1))
    drop_points = []
    for i in range(n_seq):
        drop_point = np.random.randint(drop_range[0], drop_range[1])
        drop_points.append(drop_point)
        sequences[i, :drop_point, 0] = 0.9 + np.random.normal(0, noise_std, drop_point)
        sequences[i, drop_point:, 0] = np.linspace(0.9, 0.1, length - drop_point) + np.random.normal(0, noise_std, length - drop_point)
    print("Background drop points:", drop_points)
    return sequences

# Generate constant background
def generate_constant_background(n_seq=50, length=120, value=0.9):
    sequences = np.ones((n_seq, length, 1)) * value
    return sequences

# Generate data
synthetic_test_data = generate_synthetic_sequences()
synthetic_background = generate_synthetic_background()
constant_background = generate_constant_background()

# Baselines
baseline_shap_synth = synthetic_background.reshape(50, 120)
baseline_timeshap_synth = synthetic_background
baseline_shap_const = constant_background.reshape(50, 120)
baseline_timeshap_const = constant_background

# Local event explainer
def local_event_explainer(f, data, baseline, mode, varying=None, pruned_idx=0, reverse=False, random_seed=42, nsamples=3000):
    explainer = TimeShapKernel(f, baseline, random_seed, mode=mode, varying=varying)
    shap_values_arr = explainer.shap_values(data, pruning_idx=pruned_idx, nsamples=nsamples)
    if reverse:
        shap_values_arr = shap_values_arr[::-1]
    sequence_values = data[0, pruned_idx:, 0]
    return pd.DataFrame({"Shapley Value": shap_values_arr, "Sequence Value": sequence_values})

# Compute correlations
def compute_correlations(test_data, baseline_shap, baseline_timeshap, label, mode="event", varying=None, pruned_idx=0, reverse=False):
    correlations = []
    for idx in range(test_data.shape[0]):
        test_3d = test_data[idx:idx+1]
        test_2d = test_3d.reshape(1, -1)
        
        # SHAP
        explainer = shap.KernelExplainer(predict_wrapper, baseline_shap)
        shap_values = explainer.shap_values(test_2d, silent=True, nsamples=3000)
        shap_first = shap_values[0].flatten()
        
        # TimeSHAP
        event_explanation = local_event_explainer(
            predict_wrapper, test_3d, baseline_timeshap, mode=mode, varying=varying, pruned_idx=pruned_idx, reverse=reverse, nsamples=3000
        )
        
        correlation, _ = pearsonr(shap_first, event_explanation["Shapley Value"])
        correlations.append(correlation)
        print(f"Correlation for sequence {idx} ({label}, mode={mode}, pruned_idx={pruned_idx}, reverse={reverse}): {correlation:.4f}")
        
        if idx < 3:
            fig = go.Figure()
            fig.add_trace(go.Scatter(x=np.arange(120), y=shap_first, mode='lines+markers', name='SHAP', line=dict(color='blue')))
            fig.add_trace(go.Scatter(x=event_explanation.index, y=event_explanation["Shapley Value"], mode='lines+markers', name='TimeSHAP', line=dict(color='red')))
            fig.update_layout(
                title=f"SHAP vs TimeSHAP for Sequence {idx} ({label}, mode={mode}, pruned_idx={pruned_idx}, reverse={reverse})",
                xaxis_title="Timestep",
                yaxis_title="Explanation Value",
                template="plotly_white"
            )
            fig.add_hline(y=0, line_dash="dash", line_color="gray")
            fig.show()

    print(f"\nResults for {label} (mode={mode}, pruned_idx={pruned_idx}, reverse={reverse}):")
    print(f"Mean Correlation: {np.mean(correlations):.4f}")
    print(f"Median Correlation: {np.median(correlations):.4f}")
    print(f"Min Correlation: {np.min(correlations):.4f}")
    print(f"Max Correlation: {np.max(correlations):.4f}")
    print(f"Standard Deviation: {np.std(correlations):.4f}")
    return correlations

# Run tests
print("Testing with Synthetic Background (Event Mode, pruned_idx=0, reverse=True):")
correlations_event_rev = compute_correlations(synthetic_test_data, baseline_shap_synth, baseline_timeshap_synth, "Synthetic Background", mode="event", pruned_idx=0, reverse=True)

print("\nTesting with Synthetic Background (Event Mode, pruned_idx=0, reverse=False):")
correlations_event_no_rev = compute_correlations(synthetic_test_data, baseline_shap_synth, baseline_timeshap_synth, "Synthetic Background", mode="event", pruned_idx=0, reverse=False)

print("\nTesting with Synthetic Background (Cell Mode, pruned_idx=0, reverse=False):")
correlations_cell = compute_correlations(synthetic_test_data, baseline_shap_synth, baseline_timeshap_synth, "Synthetic Background", mode="cell", varying=(range(120), [0]), pruned_idx=0, reverse=False)

print("\nTesting with Constant Background (Cell Mode, pruned_idx=0, reverse=False):")
correlations_cell_const = compute_correlations(synthetic_test_data, baseline_shap_const, baseline_timeshap_const, "Constant Background", mode="cell", varying=(range(120), [0]), pruned_idx=0, reverse=False)

Background drop points: [20, 97, 79, 21, 86, 52, 73, 80, 71, 73, 59, 20, 32, 32, 35, 43, 29, 87, 58, 32, 28, 91, 73, 36, 72, 41, 90, 39, 45, 61, 80, 80, 93, 63, 28, 57, 52, 26, 24, 29, 45, 94, 97, 85, 73, 79, 60, 33, 81, 87]
Testing with Synthetic Background (Event Mode, pruned_idx=0, reverse=True):


NameError: name 'predict_wrapper' is not defined