<a href="https://colab.research.google.com/github/Swayamprakashpatel/Sol_ME/blob/main/GNN_Solubility.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

SMILE to Graph Neural Network for SOluhbilty Prediction

In [None]:
!pip install rdkit-pypi tensorflow tensorflow-addons


In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow_addons.layers import GCNConv
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np

# Load the dataset
df = pd.read_csv('path_to_your_dataset.csv')

# Function to convert SMILES to a graph
def smiles_to_graph(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return None
    AllChem.Compute2DCoords(mol)
    atoms = [atom.GetAtomicNum() for atom in mol.GetAtoms()]
    edges = []
    for bond in mol.GetBonds():
        i = bond.GetBeginAtomIdx()
        j = bond.GetEndAtomIdx()
        edges.append((i, j))
        edges.append((j, i))
    atom_features = np.array(atoms, dtype=np.float32).reshape(-1, 1)
    edge_index = np.array(edges, dtype=np.int32)
    return atom_features, edge_index

# Process dataset into graph objects
data_list = []
solubilities = []
for index, row in df.iterrows():
    drug_graph = smiles_to_graph(row['drug_smiles'])
    solvent_graph = smiles_to_graph(row['solvent_smiles'])
    if drug_graph is not None and solvent_graph is not None:
        data_list.append(drug_graph)
        solubilities.append(row['solubility'])

# Split data into training and testing sets
split_index = int(0.8 * len(data_list))
train_data = data_list[:split_index]
train_labels = solubilities[:split_index]
test_data = data_list[split_index:]
test_labels = solubilities[split_index:]

# Define the GNN model using TensorFlow Sequential API
def create_gnn_model():
    model = Sequential()
    model.add(GCNConv(16, activation='relu'))
    model.add(GCNConv(32, activation='relu'))
    model.add(GCNConv(1))
    model.add(Flatten())
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

model = create_gnn_model()

# Prepare data for training
def prepare_data(data_list, labels):
    atom_features = [data[0] for data in data_list]
    edge_indices = [data[1] for data in data_list]
    atom_features = tf.ragged.constant(atom_features, dtype=tf.float32)
    edge_indices = tf.ragged.constant(edge_indices, dtype=tf.int32)
    labels = np.array(labels, dtype=np.float32)
    return atom_features, edge_indices, labels

train_atom_features, train_edge_indices, train_labels = prepare_data(train_data, train_labels)
test_atom_features, test_edge_indices, test_labels = prepare_data(test_data, test_labels)

# Training loop
batch_size = 32
epochs = 100

for epoch in range(epochs):
    model.fit([train_atom_features, train_edge_indices], train_labels, batch_size=batch_size, epochs=1, verbose=1)
    test_loss = model.evaluate([test_atom_features, test_edge_indices], test_labels, verbose=0)
    print(f'Epoch {epoch+1}, Test Loss: {test_loss:.4f}')

# Save the trained model
model.save('gnn_model_tf.h5')
