In [3]:
pip install rdkit

Collecting rdkitNote: you may need to restart the kernel to use updated packages.

  Downloading rdkit-2023.9.5-cp310-cp310-win_amd64.whl (21.1 MB)
     ---------------------------------------- 21.1/21.1 MB 9.0 MB/s eta 0:00:00
Installing collected packages: rdkit
Successfully installed rdkit-2023.9.5


In [4]:
import torch
from torch.nn.utils.rnn import pad_sequence
from rdkit import Chem
from rdkit.Chem import AllChem

# Define a function to convert SMILES strings to numerical representations
def smi_to_tensor(smi, max_length=100):
    mol = Chem.MolFromSmiles(smi)
    if mol is not None:
        mol = Chem.AddHs(mol)
        AllChem.EmbedMolecule(mol)
        conformer = mol.GetConformer()
        coordinates = torch.tensor([conformer.GetAtomPosition(i) for i in range(mol.GetNumAtoms())], dtype=torch.float)
        return coordinates
    else:
        return None

# Example SMILES strings
smiles_list = ['CCO', 'CC[NH2+]C', 'CC(=O)O']

# Convert SMILES strings to numerical representations
numerical_data = [smi_to_tensor(smi) for smi in smiles_list]

# Padding sequences to ensure uniform input size
padded_data = pad_sequence(numerical_data, batch_first=True, padding_value=0)

print("Padded Data Shape:", padded_data.shape)
print("Padded Data:", padded_data)

Padded Data Shape: torch.Size([3, 14, 3])
Padded Data: tensor([[[-9.5346e-01,  4.7804e-02,  4.2499e-02],
         [ 4.8907e-01, -3.2036e-01, -1.8855e-01],
         [ 1.2770e+00,  3.2478e-01,  7.3767e-01],
         [-1.3570e+00,  7.3398e-01, -7.3322e-01],
         [-1.5933e+00, -8.6257e-01,  2.1759e-02],
         [-1.0710e+00,  5.9644e-01,  1.0112e+00],
         [ 7.8785e-01, -1.3771e-01, -1.2385e+00],
         [ 5.8958e-01, -1.4290e+00, -4.1342e-02],
         [ 1.8313e+00,  1.0466e+00,  3.8849e-01],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],

        [[ 1.8303e+00,  1.2132e-01,  2.6065e-01],
         [ 4.1150e-01,  5.7438e-01,  4.2736e-02],
         [-4.4954e-01, -4.6014e-01, -4.4537e-01],
         [-1.8135e+00, -2.7435e-01,  3.6312e-02],
         [ 2.4024e+00,  1.0310e+00,  5.8996

In [5]:
import torch
from torch.utils.data import Dataset, DataLoader

# Custom Dataset Class
class DrugTargetDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# Example data and labels (replace with your actual dataset)
data = [torch.randn(100) for _ in range(1000)]
labels = [0 if i < 500 else 1 for i in range(1000)]

# Create an instance of the custom dataset
custom_dataset = DrugTargetDataset(data, labels)

# Define DataLoader to batch and shuffle the data
batch_size = 32
shuffle = True

data_loader = DataLoader(dataset=custom_dataset, batch_size=batch_size, shuffle=shuffle)

# Iterate over the DataLoader
for batch_data, batch_labels in data_loader:
    # Access batched data and labels for training
    print("Batch Data Shape:", batch_data.shape)
    print("Batch Labels:", batch_labels)

Batch Data Shape: torch.Size([32, 100])
Batch Labels: tensor([0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0,
        0, 0, 1, 0, 0, 0, 1, 0])
Batch Data Shape: torch.Size([32, 100])
Batch Labels: tensor([1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1,
        1, 0, 1, 1, 0, 1, 1, 1])
Batch Data Shape: torch.Size([32, 100])
Batch Labels: tensor([1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1,
        0, 1, 1, 0, 1, 0, 0, 0])
Batch Data Shape: torch.Size([32, 100])
Batch Labels: tensor([1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1,
        0, 0, 1, 1, 0, 0, 1, 0])
Batch Data Shape: torch.Size([32, 100])
Batch Labels: tensor([1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
        1, 1, 0, 1, 1, 0, 0, 1])
Batch Data Shape: torch.Size([32, 100])
Batch Labels: tensor([0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0,
        0, 0, 0, 0, 1, 1, 1, 0]

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Define the neural network architecture
class DrugTargetInteractionModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(DrugTargetInteractionModel, self).__init__()
        
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the model with input size, hidden size, and number of classes
input_size = 100  # Update with the actual input size
hidden_size = 128
num_classes = 2  # Binary classification (interaction or non-interaction)
model = DrugTargetInteractionModel(input_size, hidden_size, num_classes)

# Define example input data dimension
batch_size = 32
input_data = torch.randn(batch_size, input_size)

# Forward pass through the model to make predictions
output = model(input_data)

print("Model Output Shape:", output.shape)

Model Output Shape: torch.Size([32, 2])


In [9]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Define the training loop
def train_model(model, data_loader, optimizer, criterion, num_epochs):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        for inputs, labels in data_loader:
            optimizer.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        epoch_loss = running_loss / len(data_loader)
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss}')

# Example usage:
num_epochs = 20
train_model(model, data_loader, optimizer, criterion, num_epochs)

Epoch 1/20, Loss: 0.2656508693471551
Epoch 2/20, Loss: 0.20754584623500705
Epoch 3/20, Loss: 0.17147657135501504
Epoch 4/20, Loss: 0.1387675020378083
Epoch 5/20, Loss: 0.11361520574428141
Epoch 6/20, Loss: 0.09385392058175057
Epoch 7/20, Loss: 0.07588602718897164
Epoch 8/20, Loss: 0.06356503802817315
Epoch 9/20, Loss: 0.05204855487681925
Epoch 10/20, Loss: 0.04412087064702064
Epoch 11/20, Loss: 0.03744601545622572
Epoch 12/20, Loss: 0.03183996491134167
Epoch 13/20, Loss: 0.027193024812731892
Epoch 14/20, Loss: 0.0241530904895626
Epoch 15/20, Loss: 0.021061697509139776
Epoch 16/20, Loss: 0.018770317517919466
Epoch 17/20, Loss: 0.016778930963482708
Epoch 18/20, Loss: 0.014967426774092019
Epoch 19/20, Loss: 0.013377536204643548
Epoch 20/20, Loss: 0.012081046792445704


In [35]:
# Update the model architecture with the correct input size
class DrugTargetInteractionModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(DrugTargetInteractionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

# Assuming your input data is of shape (batch_size, input_size)
# Update the input_size to match the actual input data size
input_size = 100  # Update with the correct input size

# Create example input data with the correct dimensions
input_data = torch.randn(batch_size, input_size)

# Initialize the model with the adjusted input size
model = DrugTargetInteractionModel(input_size, hidden_size, num_classes)
output = model(input_data)
print("Model Output Shape:", output.shape)

Model Output Shape: torch.Size([32, 2])


In [36]:
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Define a function to evaluate the model and calculate metrics
def evaluate_model(model, data_loader):
    model.eval()
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in data_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_predictions)
    precision = precision_score(all_labels, all_predictions)
    recall = recall_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions)
    
    print(f'Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}')

# Create a validation DataLoader (similar to training DataLoader)
# Define a validation dataset and DataLoader
# Assuming you have `validation_data` and `validation_labels` ready
validation_dataset = DrugTargetDataset(validation_data, validation_labels)
validation_data_loader = DataLoader(dataset=validation_dataset, batch_size=batch_size, shuffle=False)

# Evaluate the model on the validation set and calculate metrics
evaluate_model(model, validation_data_loader)

Accuracy: 0.5400, Precision: 0.5282, Recall: 0.7500, F1-score: 0.6198


In [None]:
####################################################################################################################################################

In [51]:
import numpy as np
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc

def evaluate_model_advanced(model, data_loader):
    model.eval()
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in data_loader:
            outputs = model(inputs)
            all_predictions.extend(outputs.cpu().numpy().flatten())  # Flatten predictions
            all_labels.extend(labels.cpu().numpy().flatten())  # Flatten labels as well

    all_predictions = torch.sigmoid(torch.tensor(all_predictions)).numpy()
    
    # Ensure the number of samples is consistent between predictions and labels
    if len(all_predictions) != len(all_labels):
        min_len = min(len(all_predictions), len(all_labels))
        all_predictions = all_predictions[:min_len]
        all_labels = all_labels[:min_len]

    # Ensure at least two unique labels for AUC-ROC calculation
    if len(set(all_labels)) < 2:
        raise ValueError("At least two unique labels are required for AUC-ROC calculation.")
    
    # Convert labels to binary for AUC-ROC calculation
    if len(set(all_labels)) > 2:
        all_labels = np.array([1 if label == positive_label else 0 for label in all_labels])

    # Calculate AUC-ROC
    auc_roc = roc_auc_score(all_labels, all_predictions)

    # Calculate precision-recall curve
    precision, recall, _ = precision_recall_curve(all_labels, all_predictions)
    auc_pr = auc(recall, precision)

    print(f'AUC-ROC: {auc_roc:.4f}, AUC-PR: {auc_pr:.4f}')

# Assuming you have a validation DataLoader ready
# Evaluate the model using advanced metrics
evaluate_model_advanced(model, validation_data_loader)

AUC-ROC: 0.5563, AUC-PR: 0.5358


In [59]:
import torch

# Placeholder values for model weights and biases
fc1_weight = torch.tensor([[0.1, -0.2], [0.3, -0.4]])  # Example weights for fc1
fc1_bias = torch.tensor([0.5, -0.6])  # Example bias for fc1
fc2_weight = torch.tensor([[0.7, -0.8], [0.9, -1.0]])  # Example weights for fc2
fc2_bias = torch.tensor([0.11, -0.12])  # Example bias for fc2

# Sample representation of model weights and biases
model_weights_biases = {
    'fc1.weight': fc1_weight,  # Weights for the first linear layer (fc1)
    'fc1.bias': fc1_bias,  # Bias for the first linear layer (fc1)
    'fc2.weight': fc2_weight,  # Weights for the second linear layer (fc2)
    'fc2.bias': fc2_bias,  # Bias for the second linear layer (fc2)
}

In [60]:
torch.save(model.state_dict(), 'model.pth')

In [61]:
model_file_path = 'model.pth'

In [62]:
import torch

# Define the model file path
model_file_path = 'model.pth'  # Update with the actual path

# Load the model from the specified file path
model = DrugTargetInteractionModel(input_size, hidden_size, num_classes)
model.load_state_dict(torch.load(model_file_path))
model.eval()

DrugTargetInteractionModel(
  (fc1): Linear(in_features=100, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)

In [66]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Define the model architecture
class DrugTargetInteractionModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(DrugTargetInteractionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

# Load the trained model
model = DrugTargetInteractionModel(input_size=100, hidden_size=128, num_classes=2)
model.load_state_dict(torch.load('model.pth'))
model.eval()

# Prepare new data for inference (replace this with your actual new data)
new_data = torch.randn(batch_size, 100)  # Adjust input size and batch size as needed

# Perform inference
with torch.no_grad():
    predictions = model(new_data)

# Convert predictions to probabilities using sigmoid
predicted_probabilities = torch.sigmoid(predictions)

# Print the predicted probabilities
print("Predicted Probabilities:", predicted_probabilities)

Predicted Probabilities: tensor([[0.5000, 0.5165],
        [0.5000, 0.5000],
        [0.5000, 0.5000],
        [0.5345, 0.5000],
        [0.5474, 0.5016],
        [0.5519, 0.5009],
        [0.5378, 0.5000],
        [0.5000, 0.5000],
        [0.5799, 0.5249],
        [0.6140, 0.5119],
        [0.5000, 0.5000],
        [0.5360, 0.5000],
        [0.5684, 0.5000],
        [0.5150, 0.5655],
        [0.5000, 0.5175],
        [0.5311, 0.5000],
        [0.5348, 0.5000],
        [0.5957, 0.5102],
        [0.5969, 0.5000],
        [0.6417, 0.5293],
        [0.5000, 0.5000],
        [0.5000, 0.5000],
        [0.5721, 0.5242],
        [0.5504, 0.5000],
        [0.5000, 0.5000],
        [0.6027, 0.5000],
        [0.5568, 0.5355],
        [0.5176, 0.5000],
        [0.5616, 0.5000],
        [0.5300, 0.5411],
        [0.5131, 0.5000],
        [0.5444, 0.5000]])


In [67]:
import torch
# Assuming predicted_probabilities is the tensor of predicted probabilities
predicted_probabilities = torch.tensor([[0.5000, 0.5165],
                                        [0.5000, 0.5000],
                                        [0.5378, 0.5000],
                                        [0.5799, 0.5249]])

# Define a threshold for classification
threshold = 0.5

# Classify predictions based on the threshold
predicted_classes = (predicted_probabilities[:, 1] > threshold).long()

# Use predicted probabilities as scores for ranking predictions
prediction_scores = predicted_probabilities[:, 1]  # Using the probability for the positive class

# Print the predicted classes and scores
print("Predicted Classes:", predicted_classes)
print("Prediction Scores:", prediction_scores)

Predicted Classes: tensor([1, 0, 0, 1])
Prediction Scores: tensor([0.5165, 0.5000, 0.5000, 0.5249])
