In [None]:
# import necessary libraries
from PIL import Image
import numpy as np
np.random.seed(42)
import os
import pydot
import pandas as pd
import matplotlib.pyplot as plt
import time
from tqdm import tqdm
import tensorflow as tf
tf.random.set_seed(42)
from tensorflow import keras
from keras import backend as K
from keras.layers import Lambda
from keras.regularizers import l2
from keras.initializers import HeNormal, GlorotUniform
from tensorflow.keras import layers, models
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, LeakyReLU, GRU, Embedding, Attention, Concatenate
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from sklearn.utils import class_weight
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import KFold, train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix, roc_auc_score, auc, average_precision_score, roc_curve, precision_recall_curve, recall_score, precision_score, matthews_corrcoef,accuracy_score, f1_score
import seaborn as sns
# from sklearn.metrics import r2_score, mean_squared_error
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import Descriptors
from rdkit.ML.Descriptors import MoleculeDescriptors
from mordred import Calculator, descriptors
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem import AllChem

In [None]:
#Path to the train images
train_images_path = "TrainCan Smiles images new" # Replace with your train images path

#Path to the train set
train_set = "Train Cleaned Data.xlsx" # Replace with your train set path

#Path to the test images
test_images_path = "TestCan Smiles images new" # Replace with your test images path

#Path to the test set
test_set = "Test Cleaned Data.xlsx" # Replace with your test set path

#Path to the External Validation set
external_set = "Cleaned External.xlsx" # Replace with your external set path

#Path to the Rat External Validation Images
external_images_path = "External Smiles images new" # Replace with your external images path

In [None]:
if os.path.exists(train_images_path):
    # List all files in the folder
    train_file_list = os.listdir(train_images_path)
    train_image_files = [f for f in train_file_list if f.endswith(('.png', '.jpg', '.jpeg'))]
    print(f"Found {len(train_image_files)} image files in the folder.")
    # Sort the image filenames based on their string+numerical file name
    sortedtrain_image_files = sorted(train_image_files, key=lambda x: int(''.join(filter(str.isdigit, x))))
    # Print the sorted image filenames
    for index,sortedtrain_image_file in enumerate(sortedtrain_image_files):
      print(f"Index: {index}, Image: {sortedtrain_image_file}")
      
      
if os.path.exists(test_images_path):
    # List all files in the folder
    test_file_list = os.listdir(test_images_path)
    test_image_files = [f for f in test_file_list if f.endswith(('.png', '.jpg', '.jpeg'))]
    print(f"Found {len(test_image_files)} image files in the folder.")
    # Sort the image filenames based on their string+numerical file name
    sortedtest_image_files = sorted(test_image_files, key=lambda x: int(''.join(filter(str.isdigit, x))))
    # Print the sorted image filenames
    for index,sortedtest_image_file in enumerate(sortedtest_image_files):
      print(f"Index: {index}, Image: {sortedtest_image_file}")
      


if os.path.exists(external_images_path):
    # List all files in the folder
    external_file_list = os.listdir(external_images_path)
    external_image_files = [f for f in external_file_list if f.endswith(('.png', '.jpg', '.jpeg'))]
    print(f"Found {len(external_image_files)} image files in the folder.")
    # Sort the image filenames based on their string+numerical file name
    sortedexternal_image_files = sorted(external_image_files, key=lambda x: int(''.join(filter(str.isdigit, x))))
    # Print the sorted image filenames
    for index,sortedexternal_image_file in enumerate(sortedexternal_image_files):
      print(f"Index: {index}, Image: {sortedexternal_image_file}")

In [None]:
# Load the Train Label values 
Train_Toxicity = pd.read_excel(train_set)
Train_Toxicity.shape
Train_Eff_Toxicity = Train_Toxicity['Labels']
print(Train_Eff_Toxicity.shape)


# Load the Test Label values
Test_Toxicity = pd.read_excel(test_set)
Test_Toxicity.shape
Test_Eff_Toxicity = Test_Toxicity['Labels']
print(Test_Eff_Toxicity.shape)


# Load the External Label values
External_Toxicity = pd.read_excel(external_set)
External_Toxicity.shape
External_Eff_Toxicity = External_Toxicity['label']
print(External_Eff_Toxicity.shape)

In [None]:
# Train image
# Set the target size for resizing
Train_target_Size = (128,128)

# Create a list to store resized and preprocessed images
Train_image_Arrays = []

# Iterate over the imagers in the folder
for Trainimage_file in sortedtrain_image_files:
  Trainimage_path = os.path.join(train_images_path, Trainimage_file)
  Trainimage = Image.open(Trainimage_path)

  #  Resize the image to the target size
  resizedtrain_Image = Trainimage.resize(Train_target_Size)

  #  Convert the resized image into RGB
  resizedtrain_Image_RGB = resizedtrain_Image.convert("RGB")

  # Convert the image to a NumPy array
  Trainimage_Array = np.array(resizedtrain_Image_RGB)

  # Normalize the Pixel values to the range [0,1]
  Trainimage_Array = Trainimage_Array/255.0

  # Ensure the image has the desired shape
  if Trainimage_Array.shape != (Train_target_Size[0], Train_target_Size[1], 3):
    Trainimage_Array = np.resize(Trainimage_Array, (Train_target_Size[0],Train_target_Size[1],3))

  # Append the preprocessed image array to the list
  Train_image_Arrays.append(Trainimage_Array)

# Convert the List to a NumPy array
Train_image_Arrays = np.array(Train_image_Arrays)



# Test image
# Set the target size for resizing
Test_target_Size = (128,128)

# Create a list to store resized and preprocessed images
Test_image_Arrays = []

# Iterate over the imagers in the folder
for Testimage_file in sortedtest_image_files:
  Testimage_path = os.path.join(test_images_path, Testimage_file)
  Testimage = Image.open(Testimage_path)

  #  Resize the image to the target size
  resizedtest_Image = Testimage.resize(Test_target_Size)

  #  Convert the resized image into RGB
  resizedtest_Image_RGB = resizedtest_Image.convert("RGB")

  # Convert the image to a NumPy array
  Testimage_Array = np.array(resizedtest_Image_RGB)

  # Normalize the Pixel values to the range [0,1]
  Testimage_Array = Testimage_Array/255.0

  # Ensure the image has the desired shape
  if Testimage_Array.shape != (Test_target_Size[0], Test_target_Size[1], 3):
    Testimage_Array = np.resize(Testimage_Array, (Test_target_Size[0],Test_target_Size[1],3))

  # Append the preprocessed image array to the list
  Test_image_Arrays.append(Testimage_Array)

# Convert the List to a NumPy array
Test_image_Arrays = np.array(Test_image_Arrays)


# External image
# Set the target size for resizing
External_target_Size = (128,128)

# Create a list to store resized and preprocessed images
External_image_Arrays = []

# Iterate over the imagers in the folder
for Externalimage_file in sortedexternal_image_files:
  Externalimage_path = os.path.join(external_images_path, Externalimage_file)
  Externalimage = Image.open(Externalimage_path)

  #  Resize the image to the target size
  resizedexternal_Image = Externalimage.resize(External_target_Size)

  #  Convert the resized image into RGB
  resizedexternal_Image_RGB = resizedexternal_Image.convert("RGB")

  # Convert the image to a NumPy array
  Externalimage_Array = np.array(resizedexternal_Image_RGB)

  # Normalize the Pixel values to the range [0,1]
  Externalimage_Array = Externalimage_Array/255.0

  # Ensure the image has the desired shape
  if Externalimage_Array.shape != (External_target_Size[0], External_target_Size[1], 3):
    Externalimage_Array = np.resize(Externalimage_Array, (External_target_Size[0],External_target_Size[1],3))

  # Append the preprocessed image array to the list
  External_image_Arrays.append(Externalimage_Array)

# Convert the List to a NumPy array
External_image_Arrays = np.array(External_image_Arrays)

In [None]:
# Define the CNN model for 2D molecular images
def create_cnn_image_model(input_shape):
    input_images = Input(shape=(Train_target_Size[0], Train_target_Size[1], 3))
    x1 = Conv2D(64, (3, 3), activation='relu', padding='same')(input_images) 
    x1 = MaxPooling2D(pool_size=(2, 2))(x1)
    x1 = Conv2D(32, (3, 3), activation='relu', padding='same')(x1) 
    x1 = MaxPooling2D(pool_size=(2, 2))(x1)
    x1 = Conv2D(16, (3, 3), activation='relu', padding='same')(x1) 
    x1 = MaxPooling2D(pool_size=(2, 2))(x1)
    x1 = Flatten()(x1)
    x1 = Dense(16, activation='relu')(x1) 
    x1 = Dropout(0.5)(x1)
    print(x1.shape)
    return input_images, x1

In [None]:
# Train
Data = pd.read_excel(train_set)
canon_SMILES = Data.SMILES

# Function to calculate ECFPs from SMILES with error handling
def calculate_ECFPs_from_SMILES(canon_SMILES, radius=2, n_bits=1024):
    ecfp_list = []
    for smiles in canon_SMILES:
        try:
            mol = Chem.MolFromSmiles(smiles)
            if mol:
                ecfp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
                ecfp_list.append(ecfp)
            else:
                # If the molecule is None, append a zero vector
                ecfp_list.append([0] * n_bits)
        except Exception as e:
            # Handle any errors during fingerprint calculation by appending a zero vector
            print(f"Error calculating fingerprint for SMILES: {smiles}, Error: {e}")
            ecfp_list.append([0] * n_bits)
    return ecfp_list

# Calculate fingerprints again, handling errors
ecfp_fingerprints = calculate_ECFPs_from_SMILES(canon_SMILES)

# Convert the fingerprint bit vectors to a DataFrame
ecfp_data = [list(ecfp) for ecfp in ecfp_fingerprints]  # Converting fingerprints to lists
Morgan_Fps = pd.DataFrame(ecfp_data, columns=[f"FP_{i+1}" for i in range(1024)])

# Ensure fingerprint_input is a NumPy array
fingerprint_input = np.array(Morgan_Fps)
print(f"fingerprint_input shape: {fingerprint_input.shape}")

# Test
TestData = pd.read_excel(test_set)
testcanon_SMILES = TestData.SMILES

# Function to calculate ECFPs from SMILES with error handling
def calculate_ECFPs_from_SMILES(testcanon_SMILES, radius=2, n_bits=1024):
    ecfp_list_test = []
    for smiles in testcanon_SMILES:
        try:
            mol = Chem.MolFromSmiles(smiles)
            if mol:
                ecfp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
                ecfp_list_test.append(ecfp)
            else:
                # If the molecule is None, append a zero vector
                ecfp_list_test.append([0] * n_bits)
        except Exception as e:
            # Handle any errors during fingerprint calculation by appending a zero vector
            print(f"Error calculating fingerprint for SMILES: {smiles}, Error: {e}")
            ecfp_list_test.append([0] * n_bits)
    return ecfp_list_test

# Calculate fingerprints again, handling errors
testecfp_fingerprints = calculate_ECFPs_from_SMILES(testcanon_SMILES)

# Convert the fingerprint bit vectors to a DataFrame
testecfp_data = [list(ecfp) for ecfp in testecfp_fingerprints]  # Converting fingerprints to lists
testMorgan_Fps = pd.DataFrame(testecfp_data, columns=[f"FP_{i+1}" for i in range(1024)])

# Ensure fingerprint_input is a NumPy array
testfingerprint_input = np.array(testMorgan_Fps)
print(f"Test fingerprint_input shape: {testfingerprint_input.shape}")


# External
ExternalData = pd.read_excel(external_set)
externalcanon_SMILES = ExternalData.Canonical_Smiles

# Function to calculate ECFPs from SMILES with error handling
def calculate_ECFPs_from_SMILES(externalcanon_SMILES, radius=2, n_bits=1024):
    ecfp_list_external = []
    for smiles in externalcanon_SMILES:
        try:
            mol = Chem.MolFromSmiles(smiles)
            if mol:
                ecfp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
                ecfp_list_external.append(ecfp)
            else:
                # If the molecule is None, append a zero vector
                ecfp_list_external.append([0] * n_bits)
        except Exception as e:
            # Handle any errors during fingerprint calculation by appending a zero vector
            print(f"Error calculating fingerprint for SMILES: {smiles}, Error: {e}")
            ecfp_list_external.append([0] * n_bits)
    return ecfp_list_external

# Calculate fingerprints again, handling errors
externalecfp_fingerprints = calculate_ECFPs_from_SMILES(externalcanon_SMILES)

# Convert the fingerprint bit vectors to a DataFrame
externalecfp_data = [list(ecfp) for ecfp in externalecfp_fingerprints]  # Converting fingerprints to lists
externalMorgan_Fps = pd.DataFrame(externalecfp_data, columns=[f"FP_{i+1}" for i in range(1024)])

In [None]:
fingerprint_input = Morgan_Fps 
# Define the model for molecular fingerprint input
def create_fingerprint_model(fingerprint_shape):
    input_fingerprint = Input(shape=(fingerprint_shape,), name="fingerprint_input")
    x3 = Dense(64, activation='relu')(input_fingerprint)
    x3 = Dropout(0.5)(x3)
    x3 = Dense(32, activation='relu')(x3) 
    print(x3.shape)
    return input_fingerprint, x3

In [None]:
# Train
# Define input shapes and parameters
image_shape = (128, 128, 3) 
fingerprint_shape = 1024  

# Test
# Define input shapes and parameters
testimage_shape = (128, 128, 3)  
testfingerprint_shape = 1024  

# External
externalimage_shape = (128, 128, 3)  
externalfingerprint_shape = 1024 

In [None]:
# Train
# Create the individual models
image_input, cnn_image_features = create_cnn_image_model(image_shape)
fingerprint_input, fingerprint_features = create_fingerprint_model(fingerprint_shape)

In [None]:
# Train
# Combine features from all models
combined_features = Concatenate()([cnn_image_features, fingerprint_features])
print("Combined features shape:", combined_features.shape)

In [None]:
# Fully connected layers and binary classification output
x = Dense(64, activation='relu')(combined_features) # Add regularizer, Initializer
x = Dropout(0.5)(x)
x = Dense(32, activation='relu')(x)
x = Dropout(0.5)(x)

#btch_norm2 = BatchNormalization()(x)
output = Dense(1, activation='sigmoid')(x)# Binary classification
#output = Flatten()(output)

In [None]:
def get_learning_rate_metric(optimizer):
    def learning_rate(y_true, y_pred):
        return optimizer.learning_rate
    return learning_rate

In [None]:
# Create the final model
model = Model(inputs=[image_input, fingerprint_input], outputs=output)

optimizer = Adam(learning_rate=0.0001)
learning_rate_metric = get_learning_rate_metric(optimizer)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', learning_rate_metric])

# Summary of the model
model.summary()

In [None]:
import visualkeras
visualkeras.layered_view(model)

In [None]:
#If Train_imagfe_Arrays and Train_Effe_Toxicity are pandas Series or DataFrame, convert to numpy arrays
Train_image_Arrays = np.array(Train_image_Arrays)
Train_Eff_Toxicity = np.array(Train_Eff_Toxicity)

In [None]:
# Initialize Stratified K-Fold
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Get weights
best_weights = []

# Arrays to store performance metrics for each fold
train_accuracy = []
best_train_accs = []
accuracy_scores = []
auc_scores = []
precision_scores = []
recall_scores = []
specificity_scores = []
sensitivity_scores = []
f1_scores = []
mcc_scores = []
aurocs = []
auprcs = []
tps=[]
tns=[]
fps=[]
fns=[]
tpr_scores = [] 
fpr_scores = [] 

# Compute class weoghts
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(Train_Eff_Toxicity),y=Train_Eff_Toxicity)
class_weights_dict = dict(enumerate(class_weights))

# Initialize a fold counter
fold = 1

# Loop over the 10 folds
for train_index, val_index in skf.split(Train_Eff_Toxicity, Train_Eff_Toxicity):
    # Split the data into train and validation sets
    X_train_images, X_val_images = Train_image_Arrays[train_index], Train_image_Arrays[val_index]
    X_train_smiles, X_val_smiles = padded_smiles[train_index], padded_smiles[val_index]
    X_train_fingerprints, X_val_fingerprints = Morgan_Fps.iloc[train_index].values, Morgan_Fps.iloc[val_index].values
    y_train, y_val = Train_Eff_Toxicity[train_index], Train_Eff_Toxicity[val_index]

    callbacks_list = [
    ModelCheckpoint(filepath=f'Multimodal_2_{fold}.keras', monitor='val_loss', save_best_only=True, verbose=1, mode='auto'),
    EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=8)]
    start_time = time.time()
    history =model.fit([X_train_images, X_train_smiles, X_train_fingerprints], y_train, epochs=30, batch_size=32, verbose=1, validation_data=(
        [X_val_images, X_val_smiles, X_val_fingerprints], y_val), class_weight = class_weights_dict,  shuffle = True, callbacks=callbacks_list) # Use validation_data
    end_time = time.time()

    # Calculate training time for the current fold
    training_time = end_time - start_time
    print(f"Fold Training Time: {training_time:.2f} seconds")

    train_accuracy.append(history.history['accuracy'])

    best_train_acc = max(history.history['accuracy'])
    print(best_train_acc)
    best_train_accs.append(best_train_acc)
    
    #Load the best weights for this fold
    model.load_weights(f'Multimodal_2_{fold}.keras')
    #Append the best weights to the list
    best_weights.append(model.get_weights())

    # Predict on the validation set
    y_pred = model.predict([X_val_images, X_val_smiles, X_val_fingerprints])
    y_pred_labels = (y_pred > 0.5).astype(int) 

    # Calculate the metrices
    cm = confusion_matrix(y_val,y_pred_labels)
    tn, fp, fn, tp = cm.ravel()

    # Compute metrics for this fold
    accuracy = accuracy_score(y_val, y_pred_labels)
    auc = roc_auc_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred_labels)
    recall = recall_score(y_val, y_pred_labels)
    f1 = f1_score(y_val, y_pred_labels)
    mcc = matthews_corrcoef(y_val, y_pred_labels)
    auprc = average_precision_score(y_val, y_pred)
    
    # Sensitivity = Recall
    sensitivity = recall
    specificity = tn / (tn + fp)

    # Calculate TPR and FPR
    tpr = tp / (tp + fn)  # True Positive Rate (Sensitivity/Recall)
    fpr = fp / (fp + tn)  # False Positive Rate

    # Append metrics to lists
    accuracy_scores.append(accuracy)
    auc_scores.append(auc)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)
    mcc_scores.append(mcc)
    sensitivity_scores.append(sensitivity)
    specificity_scores.append(specificity)
    auprcs.append(auprc)
    tps.append(tp)
    tns.append(tn)
    fps.append(fp)
    fns.append(fn)
    tpr_scores.append(tpr)
    fpr_scores.append(fpr)

    print(f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
    print(f"Recall: {recall:.4f}, Precision: {precision:.4f}, F1: {f1:.4f}")
    print(f"Specificity:{specificity:.4f}, Sensitivity:{sensitivity:.4f}")
    print(f"MCC: {mcc:.4f}")
    print(f"AUC:{auc:.4f}, AUPRC:{auprc:.4f}")
    print(f"TPR: {tpr:.4f}")
    print(f"FPR: {fpr:.4f}")

    # Increment the fold counter
    fold += 1


print(f"******************************************************")
# Compute average and standard deviation for each metric across all folds
Train_acc = best_train_acc
Val_acc = accuracy_scores
Val_auroc = auc_scores
np.save('Val AUROC_CNN+FCNN_rat',Val_auroc)
Val_auprc = auprcs
np.save('Val AUPRC_CNN+FCNN_rat',Val_auprc)
Val_mcc = mcc_scores
Val_f1 = f1_scores
Val_sp = specificity_scores
Val_se = sensitivity_scores
Val_pr = precision_scores
np.save('Val PR_CNN+FCNN_rat',Val_pr)
Val_re = recall_scores
np.save('Val RE_CNN+FCNN_rat',Val_re)
Val_tp = tps
Val_fp = fps
Val_tn = tns
Val_fn = fns
Val_tpr = tpr_scores
Val_fpr = fpr_scores
np.save('Val TPR_CNN+FCNN_rat', Val_tpr)
np.save('Val FPR_CNN+FCNN_rat', Val_fpr)

Mean_Train_acc = np.mean(best_train_accs)
Mean_Val_acc = np.mean(accuracy_scores)
Mean_Val_auroc = np.mean(auc_scores)
Mean_Val_auprc = np.mean(auprcs)
Mean_Val_mcc = np.mean(mcc_scores)
Mean_Val_f1 = np.mean(f1_scores)
Mean_Val_sp = np.mean(specificity_scores)
Mean_Val_se = np.mean(sensitivity_scores)
Mean_Val_pr = np.mean(precision_scores)
Mean_Val_re = np.mean(recall_scores)
Mean_Val_tp = np.mean(tps)
Mean_Val_tn = np.mean(tns)
Mean_Val_fp = np.mean(fps)
Mean_Val_fn = np.mean(fns)
Mean_Val_tpr = np.mean(tpr_scores)
Mean_Val_fpr = np.mean(fpr_scores)

print(f"Train Accuracy: {np.mean(best_train_accs):.4f} ± {np.std(best_train_accs):.4f}")
np.save('Mean_Train_acc_CNN+FCNN_rat',Mean_Train_acc)
print(f"Accuracy: {np.mean(accuracy_scores):.4f} ± {np.std(accuracy_scores):.4f}")
np.save('Mean_Val_acc_CNN+FCNN_rat',Mean_Val_acc)
print(f"AUROC: {np.mean(auc_scores):.4f} ± {np.std(auc_scores):.4f}")
np.save('Mean_AUROC_CNN+FCNN_rat',Mean_Val_auroc)
print(f"Precision: {np.mean(precision_scores):.4f} ± {np.std(precision_scores):.4f}")
np.save('Mean_Pr_CNN+FCNN_rat',Mean_Val_pr)
print(f"Recall: {np.mean(recall_scores):.4f} ± {np.std(recall_scores):.4f}")
np.save('Mean_Re_CNN+FCNN_rat',Mean_Val_re)
print(f"Specificity: {np.mean(specificity_scores):.4f} ± {np.std(specificity_scores):.4f}")
np.save('Mean_SP_CNN+FCNN_rat',Mean_Val_sp)
print(f"Sensitivity: {np.mean(sensitivity_scores):.4f} ± {np.std(sensitivity_scores):.4f}")
np.save('Mean_SE_CNN+FCNN_rat',Mean_Val_se)
print(f"MCC: {np.mean(mcc_scores):.4f} ± {np.std(mcc_scores):.4f}")
np.save('Mean_MCC_CNN+FCNN_rat',Mean_Val_mcc)
print(f"F1 Score: {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")
np.save('Mean_F1_CNN+FCNN_rat',Mean_Val_mcc)
print(f"AUPRC: {np.mean(auprcs):.4f} ± {np.std(auprcs):.4f}")
np.save('Mean_AUPRC_CNN+FCNN_rat',Mean_Val_auprc)
print(f"TP:{np.mean(tps)} ± {np.std(tps)}")
np.save('Mean_TP_CNN+FCNN_rat',Mean_Val_tp)
print(f"TN:{np.mean(tns)} ± {np.std(tns)}")
np.save('Mean_TN_CNN+FCNN_rat',Mean_Val_tn)
print(f"FP:{np.mean(fps)} ± {np.std(fps)}")
np.save('Mean_FP_CNN+FCNN_rat',Mean_Val_fp)
print(f"FN:{np.mean(fns)} ± {np.std(fns)}")
np.save('Mean_FN_CNN+FCNN_rat',Mean_Val_fn)
print(f"TPR:{np.mean(tpr_scores)} ± {np.std(tpr_scores)}")
np.save('Mean_TPR_CNN+FCNN_rat',Mean_Val_tpr)
print(f"FPR:{np.mean(fpr_scores)} ± {np.std(fpr_scores)}")
np.save('Mean_FPR_CNN+FCNN_rat',Mean_Val_fpr)

# Create a new model as same as the above model and trained it using the best model from the above training

In [None]:
# Define the final multimodal model
def create_Final_model(image_shape,fingerprint_shape):
    # CNN branch for image input
    input_images = Input(shape=(Train_target_Size[0], Train_target_Size[1], 3))
    x1 = Conv2D(64, (3, 3), activation='relu', padding='same')(input_images)
    x1 = MaxPooling2D(pool_size=(2, 2))(x1)
    x1 = Conv2D(32, (3, 3), activation='relu', padding='same')(x1)
    x1 = MaxPooling2D(pool_size=(2, 2))(x1)
    x1 = Conv2D(16, (3, 3), activation='relu', padding='same')(x1)
    x1 = MaxPooling2D(pool_size=(2, 2))(x1)
    x1 = Flatten()(x1)
    x1 = Dense(16, activation='relu')(x1)
    x1 = Dropout(0.5)(x1)

    # Dense layers for fingerprint input
    input_fingerprint = Input(shape=(fingerprint_shape,), name="fingerprint_input")
    x3 = Dense(64, activation='relu')(input_fingerprint)
    x3 = Dropout(0.5)(x3)
    x3= Dense(32, activation='relu')(x3)

    # Combine the three branches
    combined_features = Concatenate()([x1, x3])

    # Fully connected layers after merging the inputs
    x = Dense(64, activation='relu')(combined_features)
    x = Dropout(0.5)(x)
    x = Dense(32, activation='relu')(x)
    x = Dropout(0.5)(x)

    output = Dense(1, activation='sigmoid')(x)# Binary classification
  

    # Create the model
    model = Model(inputs=[input_images, input_fingerprint], outputs=output) 

    # Compile the model
    optimizer = Adam(learning_rate=0.0001)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

In [None]:
# Define input shapes and parameters
image_shape = (128, 128, 3)  # Example image shape
fingerprint_shape = 1024  # Example molecular fingerprint size

# Initialize reference shapes for consistent model comparison
ref_shape = None
consistent_best_weights = []

# Filter out inconsistent models based on their weight shapes
for i, weights in enumerate(best_weights):
    current_shape = [w.shape for w in weights]
    if ref_shape is None:
        # Set the reference shape from the first model
        ref_shape = current_shape
        consistent_best_weights.append(weights)
    elif current_shape == ref_shape:
        # Only add models with the same architecture
        consistent_best_weights.append(weights)
    else:
        print(f"Inconsistent model found at index {i+1} with shape: {current_shape}, skipping this model.")

# Check if there are enough consistent models to average weights
if len(consistent_best_weights) > 1:
    # Now proceed with averaging the consistent weights
    averaged_weights = [
        np.mean(np.array(weights_list), axis=0) for weights_list in zip(*consistent_best_weights)
    ]
else:
    averaged_weights = consistent_best_weights[0]
   
# Create a new model and set the averaged weights
model = create_Final_model(image_shape, fingerprint_shape)  # Pass required arguments
model.set_weights(averaged_weights)

# Compile the model with the optimizer and metrics
model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=['accuracy', learning_rate_metric])

# Compute class weights
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(Train_Eff_Toxicity), y=Train_Eff_Toxicity)
class_weights_dict = dict(enumerate(class_weights))

# Callbacks
callbacks_list_1 = [
    EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=8)
]

# Train the model
history = model.fit([Train_image_Arrays, Morgan_Fps], Train_Eff_Toxicity, 
                    batch_size=32, epochs=30, verbose=1, 
                    class_weight=class_weights_dict, 
                    shuffle=True, 
                    validation_data=([Test_image_Arrays, testMorgan_Fps.values], Test_Eff_Toxicity), 
                    callbacks=callbacks_list_1)

print(f" Results for Test set:") 

# Evaluate the model
loss,accuracy,learning_rate = model.evaluate([Test_image_Arrays, testMorgan_Fps.values], Test_Eff_Toxicity)
# evaluate only with external validation set
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

# Predict on the test set
y_test_pred = model.predict([Test_image_Arrays, testMorgan_Fps.values])
y_test_pred_labels = (y_test_pred > 0.5).astype(int)

# Calculate metrics using the actual test labels and the predicted labels
cm_test = confusion_matrix(Test_Eff_Toxicity, y_test_pred_labels)
tnt, fpt, fnt, tpt = cm_test.ravel()

# Accuracy, Precision, Recall, F1, MCC, and other metrics
accuracy_test = accuracy_score(Test_Eff_Toxicity, y_test_pred_labels)
accuracy_testf = (tpt + tnt) / (tpt + fpt + fnt + tnt)
auc_test = roc_auc_score(Test_Eff_Toxicity, y_test_pred)
precision_test = precision_score(Test_Eff_Toxicity, y_test_pred_labels)
recall_test = recall_score(Test_Eff_Toxicity, y_test_pred_labels)
f1_test = f1_score(Test_Eff_Toxicity, y_test_pred_labels)
mcc_test = matthews_corrcoef(Test_Eff_Toxicity, y_test_pred_labels)
auprc_test = average_precision_score(Test_Eff_Toxicity, y_test_pred)
# Sensitivity = Recall
sensitivity_test = recall_test
# Specificity calculation
specificity_test = tnt / (tnt + fpt)

# Calculate TPR and FPR for test set
tpr_test = tpt / (tpt + fnt)
fpr_test = fpt / (fpt + tnt)

# Print out the metrics
print(f"Final Evaluation on Test Set:")
print(f"True Positive (TP): {tpt}")
np.save('Test TP_CNN+FCNN_rat',tpt)
print(f"True Negative (TN): {tnt}")
np.save('Test TN_CNN+FCNN_rat',tnt)
print(f"False Positive (FP): {fpt}")
np.save('Test FP_CNN+FCNN_rat',fpt)
print(f"False Negative (FN): {fnt}")
np.save('Test FN_CNN+FCNN_rat',fnt)
print(f"Test Accuracy: {accuracy_test:.4f}")
print(f"Test Accuracyf: {accuracy_testf:.4f}")
np.save('Test ACC_CNN+FCNN_rat',accuracy_test)
print(f"Test Precision: {precision_test:.4f}")
np.save('Test PR_CNN+FCNN_rat',precision_test)
print(f"Test Recall: {recall_test:.4f}")
np.save('Test RE_CNN+FCNN_rat',recall_test)
print(f"Test F1 Score: {f1_test:.4f}")
np.save('Test F1_CNN+FCNN_rat',f1_test)
print(f"Test MCC: {mcc_test:.4f}")
np.save('Test MCC_CNN+FCNN_rat',mcc_test)
print(f"Test Specificity: {specificity_test:.4f}")
np.save('Test SP_CNN+FCNN_rat',specificity_test)
print(f"Test Sensitivity: {sensitivity_test:.4f}")
np.save('Test SE_CNN+FCNN_rat',sensitivity_test)
print(f"Test AUROC: {auc_test:.4f}")
np.save('Test AUROC_CNN+FCNN_rat',auc_test)
print(f"Test AUPRC: {auprc_test:.4f}")
np.save('Test AUPRC_CNN+FCNN_rat',auprc_test)
print(f"Test TPR: {tpr_test:.4f}")
np.save('Test TPR_CNN+FCNN_rat',tpr_test)
print(f"Test FPR: {fpr_test:.4f}")
np.save('Test FPR_CNN+FCNN_rat',fpr_test)

print(f" Results for External set:") 
# Evaluate the model
loss,accuracy,learning_rate = model.evaluate([External_image_Arrays, externalMorgan_Fps.values], External_Eff_Toxicity)
# evaluate only with external validation set
print("External External Loss:", loss)
print("External External Accuracy:", accuracy)

# Predict on the test set
y_external_pred = model.predict([External_image_Arrays, externalMorgan_Fps.values])
y_external_pred_labels = (y_external_pred > 0.5).astype(int)

# Calculate metrics using the actual test labels and the predicted labels
cm_external = confusion_matrix(External_Eff_Toxicity, y_external_pred_labels)
tn_e, fp_e, fn_e, tp_e = cm_external.ravel()

# Accuracy, Precision, Recall, F1, MCC, and other metrics
accuracy_external = accuracy_score(External_Eff_Toxicity, y_external_pred_labels)
auc_external = roc_auc_score(External_Eff_Toxicity, y_external_pred)
precision_external = precision_score(External_Eff_Toxicity, y_external_pred_labels)
recall_external = recall_score(External_Eff_Toxicity, y_external_pred_labels)
f1_external = f1_score(External_Eff_Toxicity, y_external_pred_labels)
mcc_external = matthews_corrcoef(External_Eff_Toxicity, y_external_pred_labels)
auprc_external = average_precision_score(External_Eff_Toxicity, y_external_pred)
# Sensitivity = Recall
sensitivity_external = recall_external
# Specificity calculation
specificity_external = tn_e / (tn_e + fp_e)

# Calculate TPR and FPR for external set
tpr_external = tp_e / (tp_e + fn_e)
fpr_external = fp_e / (fp_e + tn_e)

# Print out the metrics
print(f"Final Evaluation on External Set:")
print(f"True Positive (TP): {tp_e}")
np.save('External TP_CNN+FCNN_external',tp_e)
print(f"True Negative (TN): {tn_e}")
np.save('External TN_CNN+FCNN_external',tn_e)
print(f"False Positive (FP): {fp_e}")
np.save('External FP_CNN+FCNN_external',fp_e)
print(f"False Negative (FN): {fn_e}")
np.save('External FN_CNN+FCNN_external',fn_e)
print(f"External Accuracy: {accuracy_external:.4f}")
np.save('External ACC_CNN+FCNN_external',accuracy_external)
print(f"External Precision: {precision_external:.4f}")
np.save('External PR_CNN+FCNN_external',precision_external)
print(f"External Recall: {recall_external:.4f}")
np.save('External RE_CNN+FCNN_external',recall_external)
print(f"External F1 Score: {f1_external:.4f}")
np.save('External F1_CNN+FCNN_external',f1_external)
print(f"External MCC: {mcc_external:.4f}")
np.save('External MCC_CNN+FCNN_external',mcc_external)
print(f"External Specificity: {specificity_external:.4f}")
np.save('External SP_CNN+FCNN_external',specificity_external)
print(f"External Sensitivity: {sensitivity_external:.4f}")
np.save('External SE_CNN+FCNN_external',sensitivity_external)
print(f"External AUROC: {auc_external:.4f}")
np.save('External AUROC_CNN+FCNN_external',auc_external)
print(f"External AUPRC: {auprc_external:.4f}")
np.save('External AUPRC_CNN+FCNN_external',auprc_external)
print(f"External TPR: {tpr_external:.4f}")
np.save('External TPR_CNN+FCNN_external',tpr_external)
print(f"External FPR: {fpr_external:.4f}")
np.save('External FPR_CNN+FCNN_external',fpr_external)