In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, Dense, Dropout, Flatten, Add, Activation
from tensorflow.keras.utils import to_categorical
import shap
import tensorflow as tf
import matplotlib.pyplot as plt
from lime.lime_time_series import LimeTimeSeriesExplainer
from sklearn.inspection import permutation_importance

# Temporal Convolutional Network (TCN) Block
def TCNBlock(input_layer, filters, kernel_size, dilation_rate, dropout_rate):
    conv1 = Conv1D(filters=filters, kernel_size=kernel_size, padding='causal',
                   dilation_rate=dilation_rate, activation='relu')(input_layer)
    drop1 = Dropout(dropout_rate)(conv1)
    conv2 = Conv1D(filters=filters, kernel_size=kernel_size, padding='causal',
                   dilation_rate=dilation_rate, activation='relu')(drop1)
    drop2 = Dropout(dropout_rate)(conv2)

    # Residual connection
    if input_layer.shape[-1] != filters:
        input_layer = Conv1D(filters=filters, kernel_size=1, padding='same')(input_layer)

    output = Add()([drop2, input_layer])
    output = Activation('relu')(output)
    return output

# Path to folder containing the Excel files
folder_path = "D:/OneDrive_2024-12-27/Segmented by seperating spindles/Balanced_segmented"

# Load and combine data from all Excel files
all_data = []
all_labels = []
for file in os.listdir(folder_path):
    if file.endswith(".xlsx"):
        file_path = os.path.join(folder_path, file)
        df = pd.read_excel(file_path)
        data = df.iloc[:, :-1].values  # Features (all columns except last)
        labels = df.iloc[:, -1].values  # Labels (last column)
        all_data.append(data)
        all_labels.append(labels)

# Combine all files into a single dataset
all_data = np.vstack(all_data)
all_labels = np.hstack(all_labels)

# Normalize the data
all_data = (all_data - np.min(all_data)) / (np.max(all_data) - np.min(all_data))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(all_data, all_labels, test_size=0.2, random_state=42, stratify=all_labels)

# Reshape data for TCN input
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Convert labels to categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Build the TCN model
input_layer = Input(shape=(X_train.shape[1], 1))
tcn1 = TCNBlock(input_layer, filters=32, kernel_size=3, dilation_rate=1, dropout_rate=0.2)
tcn2 = TCNBlock(tcn1, filters=64, kernel_size=3, dilation_rate=2, dropout_rate=0.2)
tcn3 = TCNBlock(tcn2, filters=128, kernel_size=3, dilation_rate=4, dropout_rate=0.2)

flatten = Flatten()(tcn3)
dense = Dense(128, activation='relu')(flatten)
dropout = Dropout(0.5)(dense)
output_layer = Dense(2, activation='softmax')(dropout)

model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate the model
eval_metrics = model.evaluate(X_test, y_test, verbose=0)
print("\nModel Evaluation:")
print(f"Loss: {eval_metrics[0]:.4f}, Accuracy: {eval_metrics[1]:.4f}")

# Predict and print the confusion matrix
y_pred = np.argmax(model.predict(X_test), axis=1)
y_true = np.argmax(y_test, axis=1)

print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred))

print("\nClassification Report:")
print(classification_report(y_true, y_pred))

# SHAP - Explain the model
explainer = shap.KernelExplainer(model.predict, X_train[:100])  # Use a small sample from training data
shap_values = explainer.shap_values(X_test[:10])  # Get SHAP values for the first 10 samples of test data
shap.summary_plot(shap_values[0], X_test[:10])  # Plot the feature importance

# Grad-CAM - Visualize the importance of specific parts of the input
def compute_grad_cam(model, X_input, target_class):
    last_conv_layer = model.get_layer('conv1d_2')  # Modify based on your architecture
    grad_model = tf.keras.models.Model([model.inputs], [last_conv_layer.output, model.output])
    
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(X_input)
        loss = predictions[:, target_class]
    
    grads = tape.gradient(loss, conv_outputs)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1))
    
    conv_outputs = conv_outputs.numpy()[0]
    pooled_grads = pooled_grads.numpy()
    cam = np.dot(conv_outputs.T, pooled_grads)
    
    cam = np.maximum(cam, 0)
    cam = cam / np.max(cam)
    return cam

def plot_grad_cam(cam, time_steps):
    plt.figure(figsize=(10, 5))
    plt.imshow(cam, aspect='auto', cmap='jet')
    plt.colorbar()
    plt.xlabel('Time steps')
    plt.ylabel('Channel')
    plt.title('Grad-CAM')
    plt.show()

# Example of usage
X_sample = X_test[:1]  # Take a sample from the test data
target_class = np.argmax(y_test[0])  # Choose the predicted class
cam = compute_grad_cam(model, X_sample, target_class)
plot_grad_cam(cam, X_sample.shape[1])

# LIME - Local Interpretable Model-agnostic Explanations
explainer = LimeTimeSeriesExplainer()
idx = 10  # Index of the sample to explain
explanation = explainer.explain_instance(X_test[idx], model.predict, num_features=10)
explanation.as_pyplot_figure()
plt.show()

# Permutation Importance - Assess feature importance
result = permutation_importance(model, X_test, y_test, n_repeats=10, random_state=42)
plt.bar(range(X_test.shape[1]), result.importances_mean)
plt.xlabel('Feature Index')
plt.ylabel('Permutation Importance')
plt.title('Feature Importance (Permutation)')
plt.show()


ModuleNotFoundError: No module named 'lime'