# EXPLAINABLE AI: LIME ALGORITHM

In [None]:
# Setting up the data for modelling

y=df['target'].to_frame() # define Y
X=df[df.columns.difference(['target'])] # define X

X_train, X_test, y_train, y_test = None, None, None, None #Initializing required variables
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Building model - Xgboost
model = XGBClassifier()
model.fit(X_train, y_train)

# Making prediction with test data
y_pred = model.predict(X_test)

# Performance Measurement
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

# Classifier function for lime explanation
classifier_fn = lambda x: model.predict_proba(x).astype(float)

# LIME Explainer
explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values,
                                                  mode='classification',
                                                  feature_names=X_train.columns,
                                                  class_names=['negative', 'positive'])

# Explain a single data point (e.g., 5th index)
exp = explainer.explain_instance(X_test.values[5], classifier_fn, num_features=X_test.shape[1])

# Show the explanation in a notebook form
exp.show_in_notebook(show_all=False)

# Draw a bar chart of the above explained data point
exp.as_pyplot_figure()


# EXPLAINABLE AI: SHAP ALGORITHM

In [None]:
# Setting up the data for modelling

y=df['target'].to_frame() # define Y
X=df[df.columns.difference(['target'])] # define X

X_train, X_test, y_train, y_test = None, None, None, None #Initializing required variables
### Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Building model - Xgboost
model = XGBClassifier()
model.fit(X_train, y_train)

# Making prediction with test data
y_pred = model.predict(X_test)

# Performance Measurement
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

# LIME Explainer
explainer = shap.TreeExplainer(model)

# Store SHAP values and expected values
shap_values = explainer.shap_values(X_test)
expected_values = explainer.expected_value

# Make a summary plot of feature importance
shap.summary_plot(shap_values, X_test, feature_names=X_test.columns)

# Make a bar graph plot
shap.summary_plot(shap_values[1], X_test, feature_names=X_test.columns, plot_type="bar")



# Anomaly Detection SOM Experiment

In [None]:
!pip install minisom

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from minisom import MiniSom

# Loading Data
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Credit_Card_Applications.csv')

# Shape of the data:
print("Shape of the data:", data.shape)
#********************#

# Info of the data:
print("Info of the data:")
print(data.info())
#********************#

# Defining X variables for the input of SOM
X = data.iloc[:, 1:14].values
y = data.iloc[:, -1].values

# Convert X variable into a pandas DataFrame
X = pd.DataFrame(X)
#********************#

from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler(feature_range=(0, 1))
X = sc.fit_transform(X)
X = pd.DataFrame(X)

# Set the hyperparameters
som_grid_rows = 10
som_grid_columns = 10
iterations = 20000
sigma = 1
learning_rate = 0.5

# Create MiniSom Model
som = MiniSom(x=som_grid_rows, y=som_grid_columns, input_len=13, sigma=sigma, learning_rate=learning_rate)
#********************#

# Initializing the weights
som.random_weights_init(X.values)

# Training
som.train_random(data=X.values, num_iteration=iterations)
#********************#

# Returns the distance map from the weights:
som.distance_map()

from pylab import plot, axis, show, pcolor, colorbar, bone

bone()
pcolor(som.distance_map().T)       # Distance map as background
colorbar()
show()
bone()
pcolor(som.distance_map().T)
colorbar() #gives legend

markers = ['o', 's']                 # if the observation is fraud then red circular color or else green square
colors = ['r', 'g']

for i, x in enumerate(X.values):
    w = som.winner(x)
    plot(w[0] + 0.5,
         w[1] + 0.5,
         markers[y[i]],
         markeredgecolor = colors[y[i]],
         markerfacecolor = 'None',
         markersize = 10,
         markeredgewidth = 2)

show()

# Store som win_map into a variable named mappings
mappings = som.win_map(X.values)
#********************#

print("Number of neurons in the winning mapping:", len(mappings.keys()))
#********************#

mappings[(9,8)]
frauds = np.concatenate((mappings[(0,9)], mappings[(8,9)]), axis = 0)
frauds

# Convert the fraud customers back into original values using Standard Scaler(sc) inverse_transform
frauds1 = sc.inverse_transform(frauds)
frauds1 = pd.DataFrame(frauds1)
frauds1


# Auto Encoders

In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.model_selection import train_test_split

# Loading the dataset
iris = datasets.load_iris()

X = iris.data
y = iris.target
target_names = iris.target_names

# Scaling the data
scaler = MinMaxScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

# Function for plotting the graphs
def plot3clusters(X, title, vtitle):
    plt.figure()
    colors = ['navy','turquoise','darkorange']
    for color, i, target_name in zip(colors, [0,1,2], target_names):
        plt.scatter(X[y==i, 0], X[y==i, 1], color=color, label=target_name)
    plt.legend(loc='upper left')
    plt.title(title)
    plt.xlabel(vtitle + "1")
    plt.ylabel(vtitle + "2")
    plt.show()

# Split the data into training and testing sets
X_train, X_test, _, _ = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define the Autoencoder model
input_layer = Input(shape=(X.shape[1],))
encoded_layer = Dense(2, activation='relu')(input_layer)
decoded_layer = Dense(X.shape[1], activation='sigmoid')(encoded_layer)

autoencoder = Model(input_layer, decoded_layer)

# Compile the model
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = autoencoder.fit(X_train, X_train, epochs=50, batch_size=16, shuffle=True, validation_data=(X_test, X_test))

# Plot the loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Train vs Validation Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()

# Use the encoded layer to encode the training input
encoded = autoencoder.predict(X_scaled)

# Plot the encoded representation
plot3clusters(encoded, "Autoencoder Encoded Representation", "Encoded Feature ")


# CNN (MNIST DIGIT DATASET)


In [None]:
import keras
import cv2
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
import matplotlib.pyplot as plt
from keras.utils.vis_utils import plot_model

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 28, 28, 1)  # Reshape to include the channel dimension
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)  # Reshape to include the channel dimension

input_shape = (28, 28, 1)

y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

for i in range(10):
    plt.subplot(5, 2, i + 1)
    plt.imshow(x_train[i].reshape(28, 28), cmap='gray')  # Reshape to display as 28x28 image

# Build CNN Model

x_train /= 255
x_test /= 255
model = None  # Initialize model here

'''
Define batch size of 64 ,
No of classes (interpret from data)
Train for 5 epochs

'''
batch_size = 64
epochs = 5

def build_model(optimizer):

    '''
    Define a sequential model with categorical cross-entropy as the loss function consisting
    of 2 convolution and 2 pooling layers with ReLU as the activation function followed by
    dropout -> flatten -> dense -> dropout -> dense

    Convolution layer - (kernel size = (3*3))
    Pooling layer - (pool size = (2*2))

    '''

    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=optimizer,
                  metrics=['accuracy'])

    return model

# Call build model with ADAM and ADAGRAD Optimizer
model_adam = build_model(optimizer='adam')
model_adagrad = build_model(optimizer='adagrad')

# Plot the model architecture
plot_model(model_adam, to_file="mnist_model_adam.jpg", show_shapes=True)
plot_model(model_adagrad, to_file="mnist_model_adagrad.jpg", show_shapes=True)

# Train the model
hist_adam = model_adam.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
hist_adagrad = model_adagrad.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))

# Plot accuracy curves
plt.figure(figsize=(10, 6))
plt.plot(hist_adam.history['accuracy'], label='Adam Training')
plt.plot(hist_adam.history['val_accuracy'], label='Adam Validation')
plt.plot(hist_adagrad.history['accuracy'], label='Adagrad Training')
plt.plot(hist_adagrad.history['val_accuracy'], label='Adagrad Validation')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Save the model
keras.models.save_model(model_adam, "mnist_adam.h5", save_format="h5")
keras.models.save_model(model_adagrad, "mnist_adagrad.h5", save_format="h5")

# Load the model
model_loaded_adam = load_model('mnist_adam.h5')

# Define a function to predict and display an image
def predict_image(model, img):
    img = img.reshape(1, 28, 28, 1)
    prediction = model.predict(img)
    predicted_label = np.argmax(prediction)
    print(f"Predicted Label: {predicted_label}")

# Predict and display an image
predict_image(model_loaded_adam, x_test[7])
plt.imshow(x_test[7].reshape(28, 28), cmap='gray')
plt.show()


# CNN (MNIST DIGIT DATASET)


In [None]:
import keras
import cv2
import numpy as np
from keras.datasets import fashion_mnist
from keras.models import Sequential , load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
import matplotlib.pyplot as plt
from keras.utils.vis_utils import plot_model

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

x_train= x_train.reshape(60000,28,28,1)  # Reshape to include the channel dimension
x_test= x_test.reshape(x_test.shape[0],28,28,1)  # Reshape to include the channel dimension

input_shape=(28,28,1)

y_train=keras.utils.to_categorical(y_train,10)

y_test=keras.utils.to_categorical(y_test,10)

x_train= x_train.astype('float32')
x_test= x_test.astype('float32')

for i in range(10):
    plt.subplot(5,2,i+1)
    plt.imshow(x_train[i].reshape(28, 28), cmap='gray')  # Reshape to display as 28x28 image

x_train /= 255
x_test /= 255

# Initialize model here
model = build_model(optimizer='rmsprop')  # Call build_model with RMSprop optimizer

def build_model(optimizer):
    model = Sequential()

    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=optimizer,
                  metrics=['accuracy'])

    return model

# Call build model with RMSprop optimizer and SGD optimizer
model_rmsprop = build_model(optimizer='rmsprop')
model_sgd = build_model(optimizer='sgd')

# Plot the model architecture
plot_model(model_rmsprop, to_file="mnist_model_rmsprop.jpg", show_shapes=True)
plot_model(model_sgd, to_file="mnist_model_sgd.jpg", show_shapes=True)

# Train the model
batch_size = 64
epochs = 5
hist_rmsprop = model_rmsprop.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))
hist_sgd = model_sgd.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))

# Save the model
keras.models.save_model(model_rmsprop, "mnist_rmsprop.h5", save_format="h5")
keras.models.save_model(model_sgd, "mnist_sgd.h5", save_format="h5")

# Plot accuracy curves
plt.figure(figsize=(10, 6))
plt.plot(hist_rmsprop.history['accuracy'], label='RMSprop Training')
plt.plot(hist_rmsprop.history['val_accuracy'], label='RMSprop Validation')
plt.plot(hist_sgd.history['accuracy'], label='SGD Training')
plt.plot(hist_sgd.history['val_accuracy'], label='SGD Validation')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Load the model
model_loaded_rmsprop = load_model('mnist_rmsprop.h5')

# Define a function to predict and display an image
def predict_image(model, img):
    img = img.reshape(1, 28, 28, 1)
    prediction = model.predict(img)
    predicted_label = np.argmax(prediction)
    print(f"Predicted Label: {predicted_label} - {labels[predicted_label]}")

# Predict and display an image
predict_image(model_loaded_rmsprop, x_test[7])
plt.imshow(x_test[7].reshape(28, 28), cmap='gray')
plt.show()


# LSTM Experiment

In [None]:
import numpy as np
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding
from keras.preprocessing import sequence
from keras.preprocessing.sequence import pad_sequences

# fix random seed for reproducibility
np.random.seed(7)

top_words = 5000

# Load the IMDB Movie Review dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

# truncate and/or pad input sequences
max_review_length = 400
X_train = pad_sequences(X_train, maxlen=max_review_length)
X_test = pad_sequences(X_test, maxlen=max_review_length)

print(X_train.shape)
print(X_train[1])

import tensorflow as tf

# Create the model
embedding_vector_length = 32

# Create LSTM Model
model = Sequential()
model.add(Embedding(top_words, embedding_vector_length, input_length=max_review_length))
model.add(LSTM(10))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Train the model
hist = model.fit(X_train, y_train, epochs=10, batch_size=256, verbose=1, validation_data=(X_test, y_test))

# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)

# Print Accuracy achieved by the model
print("Accuracy: %.2f%%" % (scores[1] * 100))

# Plot graph between epoch vs Accuracy
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot graph between epoch vs Loss
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Make predictions on test data
pred = model.predict(X_test)
print(pred)


# PCA

In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler

# Loading the dataset
iris = datasets.load_iris()

X = iris.data
y = iris.target
print("X:", X[0])
target_names = iris.target_names

# Scaling the data
scaler = MinMaxScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

# Define a function for plotting graphs
def plot3clusters(X, title, vtitle):
    plt.scatter(X[:, 0], X[:, 1], c=y)
    plt.title(title)
    plt.xlabel(vtitle + " 1")
    plt.ylabel(vtitle + " 2")
    plt.show()

# Implement and visualize PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Display new reduced dimension values
reduced_df = pd.DataFrame(data=X_pca, columns=['PC1', 'PC2'])
print("Reduced Dimension Values:")
print(reduced_df.head())

# Plot the results
plot3clusters(X_pca, "PCA of Iris Dataset", "Principal Component")

# Display explained variance ratio
explained_variance_ratio = pca.explained_variance_ratio_
print("Explained Variance Ratio:")
print(explained_variance_ratio)
