# Project ARI3205 Interpretable AI for Deep Learning Models *(Part 3)*
---

**Name:** Sean David Muscat 

**ID No:** 0172004L

---


# First install our libraries for Part 3:

In [44]:
%pip install alibi[tensorflow] --quiet

Note: you may need to restart the kernel to use updated packages.


In [45]:
# Check and install required libraries from the libraries.json file
import json

# Read the libraries from the text file
with open('../Libraries/Part1_Lib.json', 'r') as file:
    libraries = json.load(file)

# ANSI escape codes for colored output
GREEN = "\033[92m"  # Green text
RED = "\033[91m"    # Red text
RESET = "\033[0m"   # Reset to default color

# Function to check and install libraries
def check_and_install_libraries(libraries):
    for lib, import_name in libraries.items():
        try:
            # Attempt to import the library
            __import__(import_name)
            print(f"[{GREEN}✔{RESET}] Library '{lib}' is already installed.")
        except ImportError:
            # If import fails, try to install the library
            print(f"[{RED}✖{RESET}] Library '{lib}' is not installed. Installing...")
            %pip install {lib}

# Execute the function to check and install libraries
check_and_install_libraries(libraries)

# Import necessary libraries for data analysis and modeling
import warnings                                                                     # Disable warnings
import pandas as pd                                                                 # Data manipulation and analysis                #type: ignore
import numpy as np                                                                  # Numerical computations                        #type: ignore
import matplotlib.pyplot as plt                                                     # Data visualization                            #type: ignore
import seaborn as sns                                                               # Statistical data visualization                #type: ignore
import statsmodels.formula.api as smf                                               # Statistical models                            #type: ignore
# Alibi imports for the MNIST example
import tensorflow as tf
tf.get_logger().setLevel(40)  # suppress deprecation messages
tf.compat.v1.disable_v2_behavior()  # disable TF2 behaviour as Alibi code still relies on TF1 constructs
from tensorflow.keras.layers import Conv2D, Dropout, Flatten, MaxPooling2D, UpSampling2D, Dense, Input  
from tensorflow.keras.models import Model, Sequential, load_model   
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split                                # Train-test split                              #type: ignore                                                              
from tensorflow.keras.optimizers import Adam                                        # Neural network optimizer                      #type: ignore
from sklearn.preprocessing import StandardScaler,  OneHotEncoder                    # Data scaling                                  #type: ignore
from sklearn.impute import SimpleImputer                                            # Missing value imputation                      #type: ignore
from sklearn.inspection import PartialDependenceDisplay, permutation_importance     # Feature importance                            #type: ignore
from alibi.explainers import ALE, plot_ale                                          # ALE plots                                     #type: ignore
from sklearn.neural_network import MLPClassifier                                    # Neural network classifier                     #type: ignore
from sklearn.metrics import accuracy_score                                          # Model evaluation                              #type: ignore
import statsmodels.api as sm 
from alibi.explainers import Counterfactual
from time import time                                                       # Statistical models                            #type: ignore








import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
import os
from time import time
from alibi.explainers import CounterfactualProto

# Suppress specific warnings
warnings.filterwarnings("ignore", message="X does not have valid feature names") 
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)

[[92m✔[0m] Library 'tensorflow' is already installed.
[[92m✔[0m] Library 'scikit-learn' is already installed.
[[92m✔[0m] Library 'matplotlib' is already installed.
[[92m✔[0m] Library 'seaborn' is already installed.
[[92m✔[0m] Library 'pandas' is already installed.
[[92m✔[0m] Library 'numpy' is already installed.
[[92m✔[0m] Library 'scipy' is already installed.
[[92m✔[0m] Library 'alibi' is already installed.


In [46]:
# Define the filenames
train_filename = '../Datasets/Titanic/train.csv'
test_filename = '../Datasets/Titanic/test.csv'
gender_submission_filename = '../Datasets/Titanic/gender_submission.csv'

# Load the datasets
try:
    train_data = pd.read_csv(train_filename)
    test_data = pd.read_csv(test_filename)
    gender_submission_data = pd.read_csv(gender_submission_filename)
    print(f"'{train_filename}' dataset loaded successfully.")
    print(f"'{test_filename}' dataset loaded successfully.")
    print(f"'{gender_submission_filename}' dataset loaded successfully.")
except FileNotFoundError as e:
    print(f"Error: {e.filename} was not found. Please ensure it is in the correct directory.")
    exit()
except pd.errors.EmptyDataError as e:
    print(f"Error: {e.filename} is empty.")
    exit()
except pd.errors.ParserError as e:
    print(f"Error: There was a problem parsing {e.filename}. Please check the file format.")
    exit()

# Dataset insights
print("\nTrain Dataset Overview:")
print(train_data.info())
print("\nTrain Dataset Statistical Summary:")
print(train_data.describe())

print("\nTest Dataset Overview:")
print(test_data.info())
print("\nTest Dataset Statistical Summary:")
print(test_data.describe())

print("\nGender Submission Dataset Overview:")
print(gender_submission_data.info())

'../Datasets/Titanic/train.csv' dataset loaded successfully.
'../Datasets/Titanic/test.csv' dataset loaded successfully.
'../Datasets/Titanic/gender_submission.csv' dataset loaded successfully.

Train Dataset Overview:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
None

Train Dataset Stati

## Feed-Forward Neural Network

In [47]:
# Load the Titanic dataset
train_data = pd.read_csv('../Datasets/Titanic/train.csv')

# Preprocessing
# Separate features and target
y = train_data['Survived']  # Target
X = train_data.drop(columns=['Survived', 'PassengerId', 'Name', 'Ticket', 'Cabin'])  # Features

# Handle categorical variables with one-hot encoding
categorical_features = ['Sex', 'Embarked']
one_hot_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
categorical_encoded = one_hot_encoder.fit_transform(X[categorical_features])
categorical_encoded_df = pd.DataFrame(categorical_encoded, columns=one_hot_encoder.get_feature_names_out(categorical_features))

# Drop original categorical columns and append the encoded columns
X = X.drop(columns=categorical_features)
X = pd.concat([X.reset_index(drop=True), categorical_encoded_df.reset_index(drop=True)], axis=1)

# Handle missing values with mean imputation
imputer = SimpleImputer(strategy='mean')
X_imputed = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

# Standardize the features
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X_imputed), columns=X.columns)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
print("Training data shape:", X_train.shape)
print("Test data shape:", X_test.shape)

Training data shape: (712, 11)
Test data shape: (179, 11)


In [48]:
# Build the feed-forward neural network
model = Sequential([
    Input(shape=(X_train.shape[1],)),  # Define input shape explicitly
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32, verbose=1)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")



NotImplementedError: numpy() is only available when eager execution is enabled.

## Surrogate Model - MLPClassifier

In [49]:
# Train a surrogate model (MLPClassifier)
surrogate_model = MLPClassifier(hidden_layer_sizes=(32,), activation='logistic', random_state=1, max_iter=1000).fit(X_train, y_train)
print('Accuracy (MLPClassifier): ' + str(surrogate_model.score(X_train, y_train)))

Accuracy (MLPClassifier): 0.800561797752809


# Part 3.1

### Set up Counterfactuals

In [51]:
# 1. Make predictions on the test set
y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()

# 2. Identify misclassified samples
incorrect_indices = np.where(y_pred != y_test.values)[0]
print(f"Number of incorrectly predicted samples: {len(incorrect_indices)}")

if len(incorrect_indices) < 1:
    print("No misclassified samples found. Cannot generate counterfactuals.")
else:
    # 3. Select one misclassified example
    sample_idx = incorrect_indices[0]  # pick the first one for demonstration
    # X_test.iloc[[sample_idx]] is shape (1, 11)
    x_test_sample = X_test.iloc[[sample_idx]].values  
    actual_label = y_test.values[sample_idx]
    print(f"Sample index: {sample_idx}, Actual label: {actual_label}, Predicted: {y_pred[sample_idx]}")
    print("\nSample features (scaled):")
    display(X_test.iloc[[sample_idx]])

    # 4. Define a new predict_fn that outputs [p(died), p(survived)]
    def predict_fn(x: np.ndarray) -> np.ndarray:
        """
        x is expected to be shape (batch_size, n_features).
        For a single sample, shape is (1, 11).
        """
        # If x is rank-1, reshape to (1, n_features).
        if x.ndim == 1:
            x = x.reshape(1, -1)
        # model.predict(...) returns shape (batch_size, 1) for single sigmoid output.
        p_survived = model.predict(x).flatten()  # shape (batch_size,)
        # Convert to shape (batch_size, 2): [p(died), p(survived)] for each sample.
        p_died = 1.0 - p_survived
        return np.vstack([p_died, p_survived]).T  # shape (batch_size, 2)

    # 5. Determine feature_range from training data
    lower_bounds = X_train.min(axis=0).values
    upper_bounds = X_train.max(axis=0).values
    feature_range = (lower_bounds, upper_bounds)

    # 6. Decide on target_proba to 'flip' the original label
    #    If actual_label=0 (died), aim for Survived -> set target_proba > 0.5 (e.g. 0.8)
    #    If actual_label=1 (survived), aim for Died -> set target_proba < 0.5 (e.g. 0.2)
    desired_proba = 0.8 if actual_label == 0 else 0.2

    # 7. Instantiate the Counterfactual explainer with shape=(1, 11)
    #    This ensures Alibi interprets your data as 1 sample, 11 features, 2 classes.
    cf_explainer = Counterfactual(
        predict_fn=predict_fn,
        shape=(1, X_train.shape[1]),  # e.g. shape=(1, 11)
        target_proba=desired_proba,
        max_iter=1000,
        feature_range=feature_range,
        lam_init=1e-1,
        max_lam_steps=10,
        learning_rate_init=1e-2
    )

    # 8. Generate a counterfactual explanation
    #    x_test_sample is shape (1, 11), so no [0] indexing needed!
    explanation = cf_explainer.explain(x_test_sample)

    # 9. Print results
    print("\n--- Counterfactual Explanation ---")
    print("Original 2-column probability:", predict_fn(x_test_sample))
    if explanation.cf is not None:
        cf_sample = explanation.cf['X']  # shape is (1, 11)
        print("\nCounterfactual feature values (scaled):")
        display(cf_sample)

        print("Counterfactual 2-column probability:", predict_fn(cf_sample))
        
        # Show the numerical difference between original and CF
        changes = cf_sample[0] - x_test_sample[0]
        print("\nDifference between CF and original sample:")
        for col, diff in zip(X_test.columns, changes):
            print(f"{col}: {diff:.3f}")
    else:
        print("No counterfactual found within the specified parameters.")




Number of incorrectly predicted samples: 120
Sample index: 1, Actual label: 0, Predicted: 1
Sample features (scaled):
       Pclass       Age     SibSp     Parch      Fare  Sex_female  Sex_male  \
439 -0.369365  0.100109 -0.474545 -0.473674 -0.437007   -0.737695  0.737695   

     Embarked_C  Embarked_Q  Embarked_S  Embarked_nan  
439   -0.482043   -0.307562    0.619306     -0.047431  


ValueError: Cannot feed value of shape (1, 2) for Tensor cf_search/assign_target:0, which has shape (11, 2)