# Implement Grad-CAM

In [1]:
import h5py
import os
import pickle
from tqdm import tqdm
from time import gmtime, strftime
import numpy as np
import math
from sklearn import metrics
from sklearn.metrics import roc_curve, confusion_matrix, roc_auc_score
import tensorflow as tf
from tensorflow.keras import layers,Model
from sklearn.model_selection import KFold
import gc
import time
from sklearn.model_selection import KFold
import import_test as data_load

In [17]:
MAX_SEQ_LENGTH= 1100
#NUM_FEATURE = 1024
NUM_FEATURE = 1024 # esm1 & 2 a
NUM_FILTER = 64
NUM_HIDDEN = 512#100
BATCH_SIZE  = 16
WINDOW_SIZES = [4, 8, 16]
NUM_CLASSES = 2
CLASS_NAMES = ['1','0']
EPOCHS      =15
K_Fold = 5
VALIDATION_MODE="independent" # cross or independent
class_names = ["Sodium", "Membrane"]

In [3]:
# Example usage:
x_train,y_train,x_test,y_test= data_load.MCNN_data_load(NUM_CLASSES)

C:/jupyter/Malik/SodiumTransporters/ProtTrans/All_Train_data.npy
C:/jupyter/Malik/SodiumTransporters/ProtTrans/All_Train_labels.npy
C:/jupyter/Malik/SodiumTransporters/ProtTrans/All_Test_data.npy
C:/jupyter/Malik/SodiumTransporters/ProtTrans/All_Test_labels.npy


In [4]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(4638, 1, 1100, 1024)
(4638, 2)
(1161, 1, 1100, 1024)
(1161, 2)


In [14]:
from tensorflow.keras import layers, Model
import tensorflow as tf
import numpy as np

class DeepScan(Model):
    def __init__(self, window_sizes, num_filters, num_hidden, num_classes, model_input_shape=(None, None, 1024)):
        super(DeepScan, self).__init__()
        self.window_sizes = window_sizes
        self.num_filters = num_filters
        self.num_hidden = num_hidden
        self.num_classes = num_classes
        self._model_input_shape = model_input_shape
        self.build_model()
    
    def build_model(self):
        inputs = layers.Input(shape=self._model_input_shape, name='input_layer')
        self.conv_outputs = []
        
        conv_outputs = []
        for i, window_size in enumerate(self.window_sizes, start=1):
            x = layers.Conv2D(
                filters=self.num_filters,
                kernel_size=(1, window_size),
                activation='relu',
                padding='valid',
                kernel_initializer='glorot_uniform',
                bias_initializer=tf.constant_initializer(0.1),
                name=f'conv2d_{i}'
            )(inputs)
            
            self.conv_outputs.append(x)
            
            x = layers.MaxPooling2D(
                pool_size=(1, 2),
                strides=(1, 1),
                padding='valid',
                name=f'max_pooling2d_{i}'
            )(x)
            
            x = layers.GlobalAveragePooling2D(name=f'global_pool_{i}')(x)
            conv_outputs.append(x)

        x = layers.Concatenate(name="concatenate")(conv_outputs)
        x = layers.Dropout(rate=0.7)(x)
        
        x = layers.Dense(
            self.num_hidden,
            activation='relu',
            kernel_initializer='glorot_uniform',
            bias_initializer=tf.constant_initializer(0.1),
            name='dense_1'
        )(x)
        
        outputs = layers.Dense(
            self.num_classes,
            activation='softmax',
            kernel_regularizer=tf.keras.regularizers.l2(1e-3),
            name='dense_2'
        )(x)
        
        self.model = Model(inputs=inputs, outputs=outputs, name='deepscan')
        
    def call(self, inputs):
        return self.model(inputs)
    
    def get_grad_cam_heatmap(self, x, class_index=None, layer_name='conv2d_1'):
        """
        Computes Grad-CAM heatmap for the specified layer using TensorFlow operations.
        """
        # Find target layer
        target_layer = None
        for layer in self.model.layers:
            if layer.name == layer_name:
                target_layer = layer
                break
                
        if target_layer is None:
            raise ValueError(f"Layer {layer_name} not found in the model.")
        
        # Create Grad-CAM model
        grad_model = Model(
            inputs=[self.model.inputs],
            outputs=[target_layer.output, self.model.output]
        )
        
        # Convert input to tensor if necessary
        if not isinstance(x, tf.Tensor):
            x = tf.convert_to_tensor(x)
            
        # Compute gradients using TensorFlow operations
        with tf.GradientTape() as tape:
            conv_output, predictions = grad_model(x)
            if class_index is None:
                class_index = tf.argmax(predictions[0])
            class_channel = predictions[:, class_index]
            
        # Get gradients
        grads = tape.gradient(class_channel, conv_output)
        
        # Compute weighted feature map using TF operations
        pooled_grads = tf.reduce_mean(grads, axis=(1, 2))
        weighted_conv_output = tf.multiply(conv_output[0], pooled_grads[0, :, None, None])
        heatmap = tf.reduce_sum(weighted_conv_output, axis=-1)
        
        # ReLU and normalize
        heatmap = tf.maximum(heatmap, 0)
        max_val = tf.reduce_max(heatmap)
        if max_val != 0:
            heatmap = heatmap / max_val
            
        return heatmap.numpy()

    def summary(self):
        return self.model.summary()

In [None]:
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the directories to save the heatmaps
heatmap_save_dir = "C:/jupyter/Malik/SodiumTransporters/Code/HeatMaps_All"
os.makedirs(f"{heatmap_save_dir}/Positives", exist_ok=True)
os.makedirs(f"{heatmap_save_dir}/Negatives", exist_ok=True)

# Load your data (replace these lines with actual loading logic)
x_train = x_train  # Replace with actual data loading
y_train = y_train
x_test = x_test
y_test = y_test

# Initialize DataGenerator with training data
generator = DataGenerator(
    data=x_train,
    labels=y_train,
    batch_size=BATCH_SIZE
)

# Initialize DeepScan model
model = DeepScan(
    window_sizes=WINDOW_SIZES,  # Replace with actual window sizes
    num_filters=NUM_FILTER,    # Replace with actual number of filters
    num_hidden=NUM_HIDDEN,     # Replace with actual number of hidden units
    num_classes=NUM_CLASSES,   # Replace with actual number of classes
    model_input_shape=(1, 1100, 1024)
)

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Print model summary
model.summary()

# Train the model
history = model.fit(
    generator,
    epochs=EPOCHS,
    shuffle=True
)

# Function to generate Grad-CAM heatmap and save as PNG
def generate_grad_cam_heatmaps(model, data, labels, save_dir, num_samples=3):
    """
    Generate Grad-CAM heatmaps for the given data and save them as PNG files
    in separate folders for positives and negatives.
    
    Args:
        model (DeepScan): Trained model.
        data (numpy array): Testing data.
        labels (numpy array): Corresponding labels.
        save_dir (str): Directory to save heatmaps.
        num_samples (int): Number of heatmaps to display from positive and negative classes.
    """
    pos_samples = []  # To store positive samples
    neg_samples = []  # To store negative samples

    for i, (x, label) in enumerate(zip(data, labels)):
        # Expand dimensions to match model input shape (1, 1100, 1024)
        x = np.expand_dims(x, axis=0)
        
        # Generate Grad-CAM heatmap
        heatmap = model.get_grad_cam_heatmap(x, class_index=np.argmax(label), layer_name='conv2d_1')

        # Normalize heatmap to range [0, 1]
        heatmap = np.maximum(heatmap, 0)
        heatmap /= np.max(heatmap)

        # Store heatmap based on class
        class_label = np.argmax(label)  # 1 for positive, 0 for negative
        if class_label == 1 and len(pos_samples) < num_samples:
            pos_samples.append(heatmap)
        elif class_label == 0 and len(neg_samples) < num_samples:
            neg_samples.append(heatmap)

        # Optionally, log progress
        if i % 100 == 0:
            print(f"Processed {i}/{len(data)} samples")

        # Stop if we have enough positive and negative samples
        if len(pos_samples) >= num_samples and len(neg_samples) >= num_samples:
            break

    # Display heatmaps
    for idx, sample in enumerate(pos_samples):
        plt.figure(figsize=(8, 6))
        plt.imshow(sample, cmap='jet', interpolation='nearest')
        plt.colorbar()
        plt.title(f"Positive Sample {idx + 1} - Grad-CAM Heatmap")
        plt.show()

    for idx, sample in enumerate(neg_samples):
        plt.figure(figsize=(8, 6))
        plt.imshow(sample, cmap='jet', interpolation='nearest')
        plt.colorbar()
        plt.title(f"Negative Sample {idx + 1} - Grad-CAM Heatmap")
        plt.show()

    # Optionally, save the heatmaps to disk
    for idx, sample in enumerate(pos_samples):
        save_path = os.path.join(save_dir, "Positives", f"positive_sample_{idx + 1}_heatmap.png")
        plt.imsave(save_path, sample, cmap='jet')

    for idx, sample in enumerate(neg_samples):
        save_path = os.path.join(save_dir, "Negatives", f"negative_sample_{idx + 1}_heatmap.png")
        plt.imsave(save_path, sample, cmap='jet')

# Generate Grad-CAM heatmaps for testing data
if VALIDATION_MODE == "independent":
    print("Generating Grad-CAM heatmaps for testing data...")
    generate_grad_cam_heatmaps(model, x_test, y_test, heatmap_save_dir)

print("Heatmap generation complete!")

In [19]:
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the directories to save the heatmaps
heatmap_save_dir = "C:/jupyter/Malik/SodiumTransporters/Code/HeatMaps_All"
os.makedirs(f"{heatmap_save_dir}/Positives", exist_ok=True)
os.makedirs(f"{heatmap_save_dir}/Negatives", exist_ok=True)

# Load your data (replace these lines with actual loading logic)
x_train = x_train  # Replace with actual data loading
y_train = y_train
x_test = x_test
y_test = y_test

# Initialize DataGenerator with training data
generator = DataGenerator(
    data=x_train,
    labels=y_train,
    batch_size=BATCH_SIZE
)

# Initialize DeepScan model
model = DeepScan(
    window_sizes=WINDOW_SIZES,  # Replace with actual window sizes
    num_filters=NUM_FILTER,    # Replace with actual number of filters
    num_hidden=NUM_HIDDEN,     # Replace with actual number of hidden units
    num_classes=NUM_CLASSES,   # Replace with actual number of classes
    model_input_shape=(1, 1100, 1024)
)

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Print model summary
model.summary()

# Train the model
history = model.fit(
    generator,
    epochs=EPOCHS,
    shuffle=True
)

# Function to generate Grad-CAM heatmap and save as PNG
def generate_grad_cam_heatmaps(model, data, labels, save_dir):
    """
    Generate Grad-CAM heatmaps for the given data and save them as PNG files
    in separate folders for positives and negatives.
    
    Args:
        model (DeepScan): Trained model.
        data (numpy array): Testing data.
        labels (numpy array): Corresponding labels.
        save_dir (str): Directory to save heatmaps.
    """
    for i, (x, label) in enumerate(zip(data, labels)):
        # Expand dimensions to match model input shape (1, 1100, 1024)
        x = np.expand_dims(x, axis=0)
        
        # Generate Grad-CAM heatmap
        heatmap = model.get_grad_cam_heatmap(x, class_index=np.argmax(label), layer_name='conv2d_1')

        # Normalize heatmap to range [0, 1]
        heatmap = np.maximum(heatmap, 0)
        heatmap /= np.max(heatmap)

        # Convert heatmap to a format suitable for display
        plt.imshow(heatmap, cmap='jet', interpolation='nearest')
        plt.colorbar()

        # Determine if the sample is positive or negative
        class_label = np.argmax(label)  # 1 for positive, 0 for negative
        label_class = "Positives" if class_label == 1 else "Negatives"

        # Save heatmap as a PNG file
        save_path = os.path.join(save_dir, label_class, f"sample_{i}_heatmap.png")
        plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
        plt.close()  # Close the plot to free up memory

        # Optionally, log progress
        if i % 100 == 0:
            print(f"Processed {i}/{len(data)} samples for {label_class}")

# Generate Grad-CAM heatmaps for testing data
if VALIDATION_MODE == "independent":
    print("Generating Grad-CAM heatmaps for testing data...")
    generate_grad_cam_heatmaps(model, x_test, y_test, heatmap_save_dir)

print("Heatmap generation complete!")

Model: "deepscan"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 1, 1100, 10  0           []                               
                                24)]                                                              
                                                                                                  
 conv2d_1 (Conv2D)              (None, 1, 1097, 64)  262208      ['input_layer[0][0]']            
                                                                                                  
 conv2d_2 (Conv2D)              (None, 1, 1093, 64)  524352      ['input_layer[0][0]']            
                                                                                                  
 conv2d_3 (Conv2D)              (None, 1, 1085, 64)  1048640     ['input_layer[0][0]']     

## Select few image to display

In [25]:
import random

# Define the directories where the heatmaps are saved
heatmap_save_dir = "C:/jupyter/Malik/SodiumTransporters/Code/HeatMaps_All"
positives_dir = os.path.join(heatmap_save_dir, "Positives")
negatives_dir = os.path.join(heatmap_save_dir, "Negatives")

# Function to randomly select and load heatmaps from the directory
def load_random_heatmaps(directory, num_samples=3):
    """
    Randomly select a number of heatmap files from the given directory.
    
    Args:
        directory (str): The directory containing the heatmap files.
        num_samples (int): The number of heatmap files to select.
        
    Returns:
        list: A list of loaded heatmaps.
    """
    # Get all the heatmap file names in the directory
    heatmap_files = [f for f in os.listdir(directory) if f.endswith('.png')]
    
    # Randomly select 'num_samples' heatmaps
    selected_files = random.sample(heatmap_files, num_samples)
    
    heatmaps = []
    for file in selected_files:
        # Load the heatmap image and append it to the list
        heatmap_path = os.path.join(directory, file)
        heatmap = plt.imread(heatmap_path)  # Read the image file
        heatmaps.append(heatmap)
    
    return heatmaps

# Function to plot and save the heatmaps in a single figure
def plot_and_save_heatmaps(positives, negatives, save_dir):
    """
    Plot and save a figure containing positive and negative heatmaps.
    
    Args:
        positives (list): List of positive heatmaps to plot.
        negatives (list): List of negative heatmaps to plot.
        save_dir (str): Directory to save the figure.
    """
    # Create a 2x3 grid (2 rows, 3 columns)
    fig, axes = plt.subplots(2, 3, figsize=(12, 8))  # Adjust size as needed
    
    # Plot heading for positives (Sodium Transporters)
    axes[0, 0].text(0.5, 1.05, "Sodium Transporters", ha='center', va='bottom', fontsize=14, fontweight='bold')
    for idx, sample in enumerate(positives):
        ax = axes[0, idx]
        ax.imshow(sample, cmap='jet', interpolation='nearest')
        ax.axis('off')  # Hide axes for clarity

    # Plot heading for negatives (Membrane Proteins)
    axes[1, 0].text(0.5, 1.05, "Membrane Proteins", ha='center', va='bottom', fontsize=14, fontweight='bold')
    for idx, sample in enumerate(negatives):
        ax = axes[1, idx]
        ax.imshow(sample, cmap='jet', interpolation='nearest')
        ax.axis('off')  # Hide axes for clarity

    # Save the figure at high resolution (300 DPI)
    save_path = os.path.join(save_dir, "combined_heatmaps.png")
    plt.tight_layout()  # Adjust layout to avoid overlap
    plt.savefig(save_path, dpi=300)  # Save with high resolution
    plt.close()  # Close the plot to free up memory

    print(f"Heatmap figure saved at {save_path}")

# Load three random positive and three random negative heatmaps
positives = load_random_heatmaps(positives_dir, num_samples=3)
negatives = load_random_heatmaps(negatives_dir, num_samples=3)

# Plot and save the figure
plot_and_save_heatmaps(positives, negatives, heatmap_save_dir)

print("Heatmap generation complete!")

Heatmap figure saved at C:/jupyter/Malik/SodiumTransporters/Code/HeatMaps_All\combined_heatmaps.png
Heatmap generation complete!
