# Part 1: Feature-Level Interpretability

## Imports and Loading Datasets

In [None]:
# mount drive - to access files from google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [46]:
# import
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

# loading boston data - https://scikit-learn.org/1.0/modules/generated/sklearn.datasets.load_boston.html
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

# loading titanic dataset
titanic_train_df = pd.read_csv("/content/drive/MyDrive/Third Year/ARI3205 - InterpretableAI_Project/Colab Notebooks/titanicDataset/train.csv")
print(f"Titanic Training Dataset: \n{titanic_train_df.head()}\n")

titanic_test_df = pd.read_csv("/content/drive/MyDrive/Third Year/ARI3205 - InterpretableAI_Project/Colab Notebooks/titanicDataset/test.csv")
print(f"Titanic Testing Dataset: \n{titanic_test_df.head()}\n")

Titanic Training Dataset: 
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450  

## Boston Feed Forward Neural Network

#### Handling Data

In [47]:
# Ensuring the data is numeric
data_features = data.astype(float)
data_target = target.astype(float)

# Splitting the dataset into training and testing sets with an 80/20 split
X_train_boston, X_test_boston, y_train_boston, y_test_boston = train_test_split(data_features, data_target, test_size=0.2, random_state=42)

# Standardize the features (important for neural networks)
scaler = StandardScaler()
X_train_boston_scaled = scaler.fit_transform(X_train_boston)
X_test_boston_scaled = scaler.transform(X_test_boston)  # Use transform, not fit_transform, on the test set

#### Training Model

In [49]:
# # define the Feed-Forward Neural Network model for Boston Housing (regression)
# model_boston = Sequential()
# model_boston.add(Dense(64, input_dim=X_train_boston.shape[1], activation='relu'))  # Input layer
# model_boston.add(Dense(32, activation='relu'))  # Hidden layer
# model_boston.add(Dense(1))  # Output layer (regression)

# # compile the model
# model_boston.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
# # model summary
# model_boston.summary()
# print("\n")

# # train the model
# history_boston = model_boston.fit(X_train_boston, y_train_boston, epochs=20, batch_size=32, validation_split=0.2)
# print("\n")

# # evaluate the model
# loss_boston = model_boston.evaluate(X_test_boston, y_test_boston)
# print(f"Boston Housing Test loss: {round(loss_boston, 4)}")

# Define the Feed-Forward Neural Network model for Boston Housing (regression)
model_boston_housing = Sequential()
model_boston_housing.add(Dense(64, input_dim=X_train_boston_scaled.shape[1], activation='relu'))  # Input layer
model_boston_housing.add(Dropout(0.3))  # Dropout for regularization
model_boston_housing.add(Dense(32, activation='relu'))  # Hidden layer
model_boston_housing.add(Dropout(0.3))  # Dropout for regularization
model_boston_housing.add(Dense(1))  # Output layer (regression)

# Compile the model with Adam optimizer and mean squared error loss function
model_boston_housing.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Model summary
model_boston_housing.summary()
print("\n")

# Implementing EarlyStopping and ReduceLROnPlateau callbacks
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
learning_rate_reduction_callback = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, min_lr=0.00001)

# Train the model with early stopping and learning rate reduction
history_boston = model_boston_housing.fit(X_train_boston_scaled, y_train_boston, epochs=20, batch_size=32, validation_split=0.2, callbacks=[early_stopping_callback, learning_rate_reduction_callback])
print("\n")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)




Epoch 1/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - loss: 617.2901 - val_loss: 542.6339 - learning_rate: 0.0010
Epoch 2/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 607.4313 - val_loss: 518.9136 - learning_rate: 0.0010
Epoch 3/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 553.6509 - val_loss: 490.6814 - learning_rate: 0.0010
Epoch 4/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 527.2919 - val_loss: 455.0292 - learning_rate: 0.0010
Epoch 5/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 473.9139 - val_loss: 409.8420 - learning_rate: 0.0010
Epoch 6/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 443.0437 - val_loss: 353.9836 - learning_rate: 0.0010
Epoch 7/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 412.3700 - val_loss: 286.647

#### Evaluating Model

In [51]:
# Evaluate the model on the test set
test_loss_boston = model_boston_housing.evaluate(X_test_boston_scaled, y_test_boston)
print(f"Boston Housing Test Loss (Mean Squared Error): {round(test_loss_boston, 4)}\n")

# Additional performance metrics
predictions_boston = model_boston_housing.predict(X_test_boston_scaled)

# Calculate Mean Absolute Error (MAE) and Root Mean Squared Error (RMSE)
mean_absolute_error_boston = mean_absolute_error(y_test_boston, predictions_boston)
mean_squared_error_boston = mean_squared_error(y_test_boston, predictions_boston)
root_mean_squared_error_boston = np.sqrt(mean_squared_error_boston)

print(f"Boston Housing Test Mean Absolute Error: {round(mean_absolute_error_boston, 4)}")
print(f"Boston Housing Test Root Mean Squared Error: {round(root_mean_squared_error_boston, 4)}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 21.8374 
Boston Housing Test Loss (Mean Squared Error): 27.8152

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Boston Housing Test Mean Absolute Error: 3.5287
Boston Housing Test Root Mean Squared Error: 5.274


## Titanic Feed Forward Neural Network

#### Handling Data

In [34]:
# Handle missing values
titanic_train_df['Age'].fillna(titanic_train_df['Age'].mean(), inplace=True)
titanic_train_df['Embarked'].fillna(titanic_train_df['Embarked'].mode()[0], inplace=True)

# Drop columns that are not needed for prediction
titanic_train_df.drop(columns=['Name', 'Ticket', 'Cabin', 'PassengerId'], inplace=True)

# Convert categorical columns to numeric using one-hot encoding
titanic_train_df = pd.get_dummies(titanic_train_df, columns=['Sex', 'Embarked'], drop_first=True)

# Separate features and target
X_titanic = titanic_train_df.drop(columns=['Survived'])
y_titanic = titanic_train_df['Survived']

# Check for NaN or Infinite values in the dataset
print("Checking for NaN or Infinite values in the dataset...")
print(np.any(np.isnan(X_titanic)), np.any(np.isinf(X_titanic)))  # Should return False
print(np.any(np.isnan(y_titanic)), np.any(np.isinf(y_titanic)))  # Should return False

# Split the dataset into training and testing sets
X_train_titanic, X_test_titanic, y_train_titanic, y_test_titanic = train_test_split(X_titanic, y_titanic, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_titanic = scaler.fit_transform(X_train_titanic)
X_test_titanic = scaler.transform(X_test_titanic)  # Use transform, not fit_transform, on the test set

Checking for NaN or Infinite values in the dataset...
False False
False False


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  titanic_train_df['Age'].fillna(titanic_train_df['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  titanic_train_df['Embarked'].fillna(titanic_train_df['Embarked'].mode()[0], inplace=True)


#### Training Model

In [36]:
# Define the Feed-Forward Neural Network model for Titanic (classification)
model_titanic = Sequential()
model_titanic.add(Dense(64, input_dim=X_train_titanic.shape[1], activation='relu'))  # Input layer
model_titanic.add(Dropout(0.3))  # Dropout for regularization
model_titanic.add(Dense(32, activation='relu'))  # Hidden layer
model_titanic.add(Dropout(0.3))  # Dropout for regularization
model_titanic.add(Dense(1, activation='sigmoid'))  # Output layer (binary classification)

# Compile the model with a reduced learning rate and gradient clipping
optimizer = Adam(learning_rate=0.001, clipvalue=1.0)  # Slightly higher learning rate for faster convergence
model_titanic.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
model_titanic.summary()
print("\n")

# Implement EarlyStopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history_titanic = model_titanic.fit(X_train_titanic, y_train_titanic, epochs=20, batch_size=32, validation_split=0.2, callbacks=[early_stopping])
print("\n")

# Evaluate the model on the test set
loss_titanic, accuracy_titanic = model_titanic.evaluate(X_test_titanic, y_test_titanic)
print(f"Titanic Test Loss: {round(loss_titanic * 100, 4)}%")
print(f"Titanic Test Accuracy: {round(accuracy_titanic * 100, 4)}%")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)




Epoch 1/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.6747 - loss: 0.6503 - val_accuracy: 0.8042 - val_loss: 0.5868
Epoch 2/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6854 - loss: 0.6166 - val_accuracy: 0.8182 - val_loss: 0.5253
Epoch 3/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7820 - loss: 0.5407 - val_accuracy: 0.7972 - val_loss: 0.4790
Epoch 4/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7582 - loss: 0.5455 - val_accuracy: 0.7902 - val_loss: 0.4451
Epoch 5/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7612 - loss: 0.5235 - val_accuracy: 0.7902 - val_loss: 0.4282
Epoch 6/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7688 - loss: 0.5031 - val_accuracy: 0.8252 - val_loss: 0.4140
Epoch 7/20
[1m18/18[0m [32m━━━━━━━

## 1. Partial Dependence Plots (PDP) and Individual Conditional Expectation (ICE) plots

#### a. Use PDP to examine the average effect of at least two features

In [29]:
# history_boston
# history_titanic