In [2]:
# import libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import matplotlib_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
import sys
sys.path.append("/home/mylab-pharma/Code/tuele/pan_HDAC/mylab_panHDAC-master/src/common")
from pharmacy_common import PharmacyCommon

#class to encode smiles
common = PharmacyCommon()

In [3]:
train_dataset = pd.read_excel('../../data/train_test_data/NoCL/20240321_pan_HDAC_train_test_data.xlsx',sheet_name='train_dataset')
validation_dataset = pd.read_excel("../../data/train_test_data/NoCL/20240321_pan_HDAC_train_test_data.xlsx", sheet_name='validation_dataset')
test_dataset = pd.read_excel("../../data/train_test_data/NoCL/20240321_pan_HDAC_train_test_data.xlsx", sheet_name='test_dataset')
print(len(train_dataset),len(validation_dataset), len(test_dataset))

1528 328 327


In [4]:
train_dataset.head(10)

Unnamed: 0,Code,SMILES,Bioactivity
0,415897,CC(Nc1ccc(CN(CCC=C2CCC(NO)=O)C2=O)cc1)=O,inactive
1,1589183,COc(cc(/C=C/C(Nc(cccc1)c1N)=O)cc1)c1OCC(Nc(cc1...,inactive
2,1161066,CC[C@H](C)[C@@H](C(N(Cc1c(C2)ccc(OCC(NO)=O)c1)...,active
3,2100074,CC(c1ccccc1)Nc1ncnc2c1cc(-c1ccc(CN3CCN(CCOCCCC...,inactive
4,386804,CC(C)SC(SCC(c1ccc(C)cc1)=O)=S,inactive
5,467667,CCOC(SCC(c(cc1)ccc1NC(C)=O)=O)=S,inactive
6,390739,ONC(/C=C/c1cccc(C(c2cc(cccc3)c3[nH]2)=O)c1)=O,active
7,440474,ONC(CCCCCNC(Nc1cc(-c2nc(cccc3)c3[o]2)ccc1)=O)=O,active
8,2460036,CN1C(SCc(cc2)ccc2C(Nc(cccc2)c2N)=O)=Nc(cc(cc2)...,inactive
9,440227,CCC(c1nc(cccc2)c2[n]1CC)c1ccc(/C=C/C(NO)=O)cc1,active


In [5]:
#encoding smiles 
fpt_bits = 1024
X_train = common.gen_ecfp4_fpts(train_dataset['SMILES'],bits = 1024)
X_validation = common.gen_ecfp4_fpts(validation_dataset['SMILES'], bits = 1024)
X_test = common.gen_ecfp4_fpts(test_dataset['SMILES'], bits = 1024)

Progress:   0%|          | 0/1528 [00:00<?, ?it/s][18:12:51] Conflicting single bond directions around double bond at index 7.
[18:12:51]   BondStereo set to STEREONONE and single bond directions set to NONE.
[18:12:51] Conflicting single bond directions around double bond at index 16.
[18:12:51]   BondStereo set to STEREONONE and single bond directions set to NONE.
[18:12:52] Conflicting single bond directions around double bond at index 33.
[18:12:52]   BondStereo set to STEREONONE and single bond directions set to NONE.
[18:12:52] Conflicting single bond directions around double bond at index 18.
[18:12:52]   BondStereo set to STEREONONE and single bond directions set to NONE.
[18:12:52] Conflicting single bond directions around double bond at index 27.
[18:12:52]   BondStereo set to STEREONONE and single bond directions set to NONE.
[18:12:52] Conflicting single bond directions around double bond at index 9.
[18:12:52]   BondStereo set to STEREONONE and single bond directions set t

In [6]:
# encoding the label
import sklearn.preprocessing as preprocessing
y_train = np.array(train_dataset['Bioactivity'])
y_validation = np.array(validation_dataset['Bioactivity'])
y_test = np.array(test_dataset['Bioactivity'])

#Original data
print("Original data:")
print(y_train[0:5])
print(y_test[0:5])
print(y_validation[0:5])

#label encoding
label_encoder = preprocessing.LabelEncoder()
y_train  = label_encoder.fit_transform(y_train)
y_validation = label_encoder.transform(y_validation)
y_test = label_encoder.transform(y_test)
#Class encoded
print("Class encoded:")
print(list(label_encoder.classes_))
print(label_encoder.transform(label_encoder.classes_))
print("Encoded data:")
print(y_train[0:5])
print(y_test[0:5])
print(y_validation[0:5])


Original data:
['inactive' 'inactive' 'active' 'inactive' 'inactive']
['active' 'active' 'active' 'inactive' 'active']
['active' 'active' 'inactive' 'inactive' 'inactive']
Class encoded:
['active', 'inactive']
[0 1]
Encoded data:
[1 1 0 1 1]
[0 0 0 1 0]
[0 0 1 1 1]


In [19]:
x_train, y_train = np.array(X_train), np.array(y_train)
x_train = x_train.astype(np.float32)
y_train = y_train.astype(np.int64)

In [23]:
y_train=y_train.reshape(-1,1)
y_train = y_train.squeeze()
y_train.shape

(1528,)

In [9]:
# model
class ann_try(nn.Module):
    def __init__(self, input_dim=1024,layer1=512,layer2=256,drp=.2,output=1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim,layer1),
            nn.ReLU(),
            nn.Dropout(drp),
            nn.Linear(layer1,layer2),
            nn.ReLU(),
            nn.Dropout(drp),
            nn.Linear(layer2,output),
            nn.Sigmoid(),
        )
    def forward(self,input):
        return self.net(input)

In [18]:
y_train

array([1., 1., 0., ..., 0., 1., 1.], dtype=float32)

In [16]:
from skorch import NeuralNetClassifier
from sklearn.model_selection import GridSearchCV
import time

def gs_tune(x_train,y_train):
    x_train, y_train = np.array(x_train), np.array(y_train)
    x_train = x_train.astype(np.float32)
    y_train = y_train.astype(np.int64)
    # y_train=y_train.reshape(-1,1)

    net = NeuralNetClassifier(ann_try(), 
                             optimizer=torch.optim.Adam
                             , max_epochs=30
                             , lr=0.001
                             , verbose=1
                             , batch_size=64
                             , optimizer__weight_decay=.001)
    
    params = {
        'lr': np.arange(1e-4,1e-2,.001),
        'optimizer__weight_decay':np.arange(1e-4,1e-2,.001)
    }

    gs = GridSearchCV(net, params, refit=True, cv=3,scoring='roc_auc')
    gs.fit(x_train, y_train)
    return gs

y_train = y_train.flatten()
y_validation = y_validation.flatten()
start = time.time()
grid_model=gs_tune(X_train,y_train)
end = time.time()
print('Grid search takes {:.2f} seconds to tune'.format(end - start))

y_pred_train = grid_model(X_train)>.5
acc_train = 100*torch.mean((y_pred_train == y_train).float())
y_pred_val = grid_model(X_validation)>.5
acc_val = 100*torch.mean((y_pred_val == y_validation).float())

print('Accuracy on trainset: ', acc_train)
print("Accuracy on validationset: ", acc_val)

ValueError: 
All the 300 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
300 fits failed with the following error:
Traceback (most recent call last):
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/skorch/classifier.py", line 165, in fit
    return super(NeuralNetClassifier, self).fit(X, y, **fit_params)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/skorch/net.py", line 1319, in fit
    self.partial_fit(X, y, **fit_params)
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/skorch/net.py", line 1278, in partial_fit
    self.fit_loop(X, y, **fit_params)
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/skorch/net.py", line 1190, in fit_loop
    self.run_single_epoch(iterator_train, training=True, prefix="train",
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/skorch/net.py", line 1226, in run_single_epoch
    step = step_fn(batch, **fit_params)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/skorch/net.py", line 1105, in train_step
    self._step_optimizer(step_fn)
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/skorch/net.py", line 1060, in _step_optimizer
    optimizer.step(step_fn)
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/torch/optim/optimizer.py", line 385, in wrapper
    out = func(*args, **kwargs)
          ^^^^^^^^^^^^^^^^^^^^^
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/torch/optim/optimizer.py", line 76, in _use_grad
    ret = func(self, *args, **kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/torch/optim/adam.py", line 146, in step
    loss = closure()
           ^^^^^^^^^
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/skorch/net.py", line 1094, in step_fn
    step = self.train_step_single(batch, **fit_params)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/skorch/net.py", line 994, in train_step_single
    loss = self.get_loss(y_pred, yi, X=Xi, training=True)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/skorch/classifier.py", line 150, in get_loss
    return super().get_loss(y_pred, y_true, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/skorch/net.py", line 1665, in get_loss
    return self.criterion_(y_pred, y_true)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/torch/nn/modules/loss.py", line 216, in forward
    return F.nll_loss(input, target, weight=self.weight, ignore_index=self.ignore_index, reduction=self.reduction)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/mylab-pharma/.conda/envs/tuele-pharma-env/lib/python3.12/site-packages/torch/nn/functional.py", line 2733, in nll_loss
    return torch._C._nn.nll_loss_nd(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
IndexError: Target 1 is out of bounds.
