In [2]:
import torch 
import pandas as pd 
from data_preparation import *
from tensorconversion import *
from dataloader import *
from torch.utils.data import DataLoader
from GAT_MLP_Network import *
import torch.optim as optim
from train_evaluate import *
from hyperparameters import *

In [3]:
train_df = pd.read_csv('Data/train.csv')
test_df = pd.read_csv('Data/test.csv')

In [4]:
print(len(train_df),len(test_df))

6644 977


In [5]:
def preprocess(smiles,labels,batch_size=None,shuffle=None,generator=None):
    processor = MoleculeProcessor(smiles,labels)
    full_graph_batch = processor.node_edge_feature_extraction()
    fps = np.array(processor.calculate_fingerprint(radius=3,fpsize=4096))
    tensor_converter = TensorConversion()
    fps_data = tensor_converter.convert_to_tensor(fps)
    combined_dataset = SynchronizedDataset(full_graph_batch,fps_data)
    combined_dataloader = DataLoader(
    combined_dataset, 
    batch_size = batch_size,
    shuffle = shuffle,
    generator = generator, 
    collate_fn=collate_fn
    # generator=torch.Generator().manual_seed(42)
)
    return combined_dataloader

In [6]:
train_dataloader = preprocess(train_df['Smiles'],train_df['Label'],batch_size=64, shuffle=True, generator=torch.Generator().manual_seed(42))

2026-01-23 06:20:08 - data_preparation.py:45 - _generate_molecules - Total valid SMILES found: 6644
2026-01-23 06:20:12 - tensorconversion.py:24 - convert_to_tensor - Converting fingerprint array to tensor. Shape: (6644, 4096)
2026-01-23 06:20:12 - tensorconversion.py:26 - convert_to_tensor - Converted tensor shape: torch.Size([6644, 4096])


In [7]:
test_dataloader = preprocess(test_df['Smiles'],test_df['Label'],batch_size=64)

2026-01-23 06:20:12 - data_preparation.py:45 - _generate_molecules - Total valid SMILES found: 977
2026-01-23 06:20:13 - tensorconversion.py:24 - convert_to_tensor - Converting fingerprint array to tensor. Shape: (977, 4096)
2026-01-23 06:20:13 - tensorconversion.py:26 - convert_to_tensor - Converted tensor shape: torch.Size([977, 4096])


In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CombinedModelGATMLP(config.gnn_in_channels, config.gnn_hidden_channels, config.gnn_out_channels,
                      config.mlp_input_size, config.mlp_hidden_sizes, config.mlp_output_size, config.dense_size_1, config.dense_size_2).to(device)
criterion = torch.nn.BCEWithLogitsLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=config.lr,weight_decay= config.weight_decay)

In [9]:
print(model)

CombinedModelGATMLP(
  (gnn): GATModel(
    (gat_conv): GATConv(9, 57, heads=1)
    (lin): Linear(in_features=57, out_features=34, bias=True)
  )
  (mlp): MLPModel(
    (mlp): Sequential(
      (0): Linear(in_features=4096, out_features=2469, bias=True)
      (1): BatchNorm1d(2469, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Dropout(p=0.17988426707538552, inplace=False)
      (4): Linear(in_features=2469, out_features=185, bias=True)
    )
  )
  (fc1): Linear(in_features=219, out_features=112, bias=True)
  (batchnorm1): BatchNorm1d(112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout2): Dropout(p=0.3, inplace=False)
  (linear): Linear(in_features=112, out_features=1, bias=True)
)


In [10]:
trainer = ModelTrainer(model,criterion,optimizer,device)

2026-01-23 06:20:20 - train_evaluate.py:23 - __init__ - 
Model Architecture:
####################################################################################################
# CombinedModelGATMLP(                                                                             #
#   (gnn): GATModel(                                                                               #
#     (gat_conv): GATConv(9, 57, heads=1)                                                          #
#     (lin): Linear(in_features=57, out_features=34, bias=True)                                    #
#   )                                                                                              #
#   (mlp): MLPModel(                                                                               #
#     (mlp): Sequential(                                                                           #
#       (0): Linear(in_features=4096, out_features=2469, bias=True)                                #
#       (1): B

In [11]:
best_model, train_losses, train_accuracies, final_true_labels, final_predicted_labels = trainer.train(train_dataloader)

Epoch 1, Loss: 0.5329, Accuracy: 0.7401
Epoch 2, Loss: 0.4520, Accuracy: 0.7977
Epoch 3, Loss: 0.4436, Accuracy: 0.8006
Epoch 4, Loss: 0.4362, Accuracy: 0.8025
Epoch 5, Loss: 0.4307, Accuracy: 0.8070
Epoch 6, Loss: 0.4219, Accuracy: 0.8137
Epoch 7, Loss: 0.4130, Accuracy: 0.8171
Epoch 8, Loss: 0.4069, Accuracy: 0.8233
Epoch 9, Loss: 0.4035, Accuracy: 0.8200
Epoch 10, Loss: 0.3792, Accuracy: 0.8365
Epoch 11, Loss: 0.3727, Accuracy: 0.8373
Epoch 12, Loss: 0.3477, Accuracy: 0.8566
Epoch 13, Loss: 0.3387, Accuracy: 0.8564
Epoch 14, Loss: 0.3079, Accuracy: 0.8743
Epoch 15, Loss: 0.2872, Accuracy: 0.8787
Epoch 16, Loss: 0.2674, Accuracy: 0.8927
Epoch 17, Loss: 0.2389, Accuracy: 0.9053
Epoch 18, Loss: 0.2293, Accuracy: 0.9138
Epoch 19, Loss: 0.2106, Accuracy: 0.9210
Epoch 20, Loss: 0.1846, Accuracy: 0.9321
Epoch 21, Loss: 0.1780, Accuracy: 0.9351
Epoch 22, Loss: 0.1790, Accuracy: 0.9335
Epoch 23, Loss: 0.1650, Accuracy: 0.9377
Epoch 24, Loss: 0.1586, Accuracy: 0.9433
Epoch 25, Loss: 0.1538, A

2026-01-23 06:21:59 - train_evaluate.py:106 - train - Training Completed.
#####################################
# Loss: 0.1070 and Accuracy: 0.9646 #
#####################################


Epoch 56, Loss: 0.1131, Accuracy: 0.9598

Early stopping triggered. Best loss: 0.1070

Final Results:
Accuracy: 0.9646
Precision: 0.9674
Recall: 0.9593
Specificity for class_0: 0.9593
Specificity for class_1: 0.9697
F1 Score: 0.9633
Confusion Matrix:
[[3323  104]
 [ 131 3086]]

Classification Report:
              precision    recall  f1-score   support

     class_0       0.96      0.97      0.97      3427
     class_1       0.97      0.96      0.96      3217

    accuracy                           0.96      6644
   macro avg       0.96      0.96      0.96      6644
weighted avg       0.96      0.96      0.96      6644



In [12]:
probablity,predictions, actual_label,avg_loss, accuracy, conf_matrix, class_report, roc_auc, fpr, tpr = trainer.evaluate(best_model, test_dataloader)

2026-01-23 06:21:59 - train_evaluate.py:152 - evaluate - Evaluation Completed.
###################################
# Loss:0.5137 and Accuracy:0.8280 #
###################################
2026-01-23 06:21:59 - train_evaluate.py:153 - evaluate - 
Classification Report:
#########################################################
#               precision    recall  f1-score   support #
#                                                       #
#          0.0       0.84      0.75      0.79       426 #
#          1.0       0.82      0.89      0.85       551 #
#                                                       #
#     accuracy                           0.83       977 #
#    macro avg       0.83      0.82      0.82       977 #
# weighted avg       0.83      0.83      0.83       977 #
#                                                       #
#########################################################


Loss: 0.5137
Accuracy: 0.8280
[[319 107]
 [ 61 490]]
Specificity class_0 : 0.8893
Specificity class_1 : 0.7488
              precision    recall  f1-score   support

         0.0       0.84      0.75      0.79       426
         1.0       0.82      0.89      0.85       551

    accuracy                           0.83       977
   macro avg       0.83      0.82      0.82       977
weighted avg       0.83      0.83      0.83       977



ValueError: not enough values to unpack (expected 10, got 7)

In [12]:
# torch.save(best_model, 'Muta_MLP_best_model1.pth')