In [1]:
!pip install deepchem

Collecting deepchem
  Downloading deepchem-2.8.0-py3-none-any.whl.metadata (2.0 kB)
Collecting rdkit (from deepchem)
  Downloading rdkit-2024.9.5-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.0 kB)
Downloading deepchem-2.8.0-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rdkit-2024.9.5-cp311-cp311-manylinux_2_28_x86_64.whl (34.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.3/34.3 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit, deepchem
Successfully installed deepchem-2.8.0 rdkit-2024.9.5


In [2]:
!pip install hyperopt



In [3]:
import deepchem as dc
tasks, datasets, transformers = dc.molnet.load_hiv(featurizer='ECFP', splitter = 'scaffold')
train_dataset, valid_dataset, test_dataset = datasets

[1;30;43mStreaming output truncated to the last 5000 lines.[0m


In [4]:
from hyperopt import hp, fmin, tpe, Trials

In [5]:
search_space = {
    'layer_sizes': hp.choice('layer_sizes',[[500], [1000], [2000],[1000,1000]]),
    'dropouts': hp.uniform('dropout',low=0.2, high=0.5),
    'learning_rate': hp.uniform('learning_rate',high=0.001, low=0.0001)
}

In [6]:
search_space

{'layer_sizes': <hyperopt.pyll.base.Apply at 0x7fe98c21dad0>,
 'dropouts': <hyperopt.pyll.base.Apply at 0x7fe98bbee190>,
 'learning_rate': <hyperopt.pyll.base.Apply at 0x7fe98ba33610>}

In [7]:
import tempfile

In [8]:
metric = dc.metrics.Metric(dc.metrics.roc_auc_score)

In [9]:
def fm(args):
  save_dir = tempfile.mkdtemp()
  model = dc.models.MultitaskClassifier(n_tasks=len(tasks),n_features=1024,layer_sizes=args['layer_sizes'],dropouts=args['dropouts'],learning_rate=args['learning_rate'])
  #validation callback that saves the best checkpoint, i.e the one with the maximum score.
  validation=dc.models.ValidationCallback(valid_dataset, 1000, [metric],save_dir=save_dir,transformers=transformers,save_on_minimum=False)

  model.fit(train_dataset, nb_epoch=25,callbacks=validation)

  #restoring the best checkpoint and passing the negative of its validation score to be minimized.
  model.restore(model_dir=save_dir)
  valid_score = model.evaluate(valid_dataset, [metric], transformers)

  return -1*valid_score['roc_auc_score']

In [None]:
trials=Trials()
best = fmin(fm,
    		space= search_space,
    		algo=tpe.suggest,
    		max_evals=15,
    		trials = trials)




  0%|          | 0/15 [00:00<?, ?trial/s, best loss=?]Step 1000 validation: roc_auc_score=0.737523
Step 2000 validation: roc_auc_score=0.769501
Step 3000 validation: roc_auc_score=0.774502
Step 4000 validation: roc_auc_score=0.784007
Step 5000 validation: roc_auc_score=0.777175
Step 6000 validation: roc_auc_score=0.767411
Step 7000 validation: roc_auc_score=0.774088
Step 8000 validation: roc_auc_score=0.769087


  data = torch.load(checkpoint, map_location=self.device)



  7%|▋         | 1/15 [04:26<1:02:07, 266.25s/trial, best loss: -0.7840072769370711]Step 1000 validation: roc_auc_score=0.742712
Step 2000 validation: roc_auc_score=0.776217
Step 3000 validation: roc_auc_score=0.782814
Step 4000 validation: roc_auc_score=0.788803
Step 5000 validation: roc_auc_score=0.777553
Step 6000 validation: roc_auc_score=0.778372
Step 7000 validation: roc_auc_score=0.780282
Step 8000 validation: roc_auc_score=0.773897


  data = torch.load(checkpoint, map_location=self.device)



 13%|█▎        | 2/15 [06:48<41:49, 193.07s/trial, best loss: -0.7888026743356273]  Step 1000 validation: roc_auc_score=0.756867
Step 2000 validation: roc_auc_score=0.776867
Step 3000 validation: roc_auc_score=0.751691
Step 4000 validation: roc_auc_score=0.765111
Step 5000 validation: roc_auc_score=0.747709
Step 6000 validation: roc_auc_score=0.746002
Step 7000 validation: roc_auc_score=0.744184
Step 8000 validation: roc_auc_score=0.739837


  data = torch.load(checkpoint, map_location=self.device)



 20%|██        | 3/15 [11:42<47:54, 239.52s/trial, best loss: -0.7888026743356273]Step 1000 validation: roc_auc_score=0.747495
Step 2000 validation: roc_auc_score=0.750025
Step 3000 validation: roc_auc_score=0.738114
Step 4000 validation: roc_auc_score=0.741487
Step 5000 validation: roc_auc_score=0.744398
Step 6000 validation: roc_auc_score=0.750502
Step 7000 validation: roc_auc_score=0.770584
Step 8000 validation: roc_auc_score=0.755308


  data = torch.load(checkpoint, map_location=self.device)



 27%|██▋       | 4/15 [16:29<47:18, 258.06s/trial, best loss: -0.7888026743356273]Step 1000 validation: roc_auc_score=0.734948
Step 2000 validation: roc_auc_score=0.755694
Step 3000 validation: roc_auc_score=0.748292
Step 4000 validation: roc_auc_score=0.768665
Step 5000 validation: roc_auc_score=0.768074
Step 6000 validation: roc_auc_score=0.771525
Step 7000 validation: roc_auc_score=0.768714
Step 8000 validation: roc_auc_score=0.77396
 33%|███▎      | 5/15 [17:52<32:28, 194.83s/trial, best loss: -0.7888026743356273]

  data = torch.load(checkpoint, map_location=self.device)



Step 1000 validation: roc_auc_score=0.734161
Step 2000 validation: roc_auc_score=0.762702
Step 3000 validation: roc_auc_score=0.775274
Step 4000 validation: roc_auc_score=0.775498
Step 5000 validation: roc_auc_score=0.773782
Step 6000 validation: roc_auc_score=0.76886
Step 7000 validation: roc_auc_score=0.770986
Step 8000 validation: roc_auc_score=0.775461
 40%|████      | 6/15 [19:18<23:43, 158.12s/trial, best loss: -0.7888026743356273]

  data = torch.load(checkpoint, map_location=self.device)



Step 1000 validation: roc_auc_score=0.735882
Step 2000 validation: roc_auc_score=0.777308
Step 3000 validation: roc_auc_score=0.784349
Step 4000 validation: roc_auc_score=0.7769
Step 5000 validation: roc_auc_score=0.774508
Step 6000 validation: roc_auc_score=0.788064
Step 7000 validation: roc_auc_score=0.769136
Step 8000 validation: roc_auc_score=0.776654


  data = torch.load(checkpoint, map_location=self.device)



 47%|████▋     | 7/15 [23:37<25:27, 190.93s/trial, best loss: -0.7888026743356273]Step 1000 validation: roc_auc_score=0.744984
Step 2000 validation: roc_auc_score=0.771476
Step 3000 validation: roc_auc_score=0.780795
Step 4000 validation: roc_auc_score=0.785576
Step 5000 validation: roc_auc_score=0.776507
Step 6000 validation: roc_auc_score=0.765943
Step 7000 validation: roc_auc_score=0.766441
Step 8000 validation: roc_auc_score=0.763327


  data = torch.load(checkpoint, map_location=self.device)



 53%|█████▎    | 8/15 [27:55<24:46, 212.38s/trial, best loss: -0.7888026743356273]Step 1000 validation: roc_auc_score=0.74462
Step 2000 validation: roc_auc_score=0.731276
Step 3000 validation: roc_auc_score=0.73851
Step 4000 validation: roc_auc_score=0.744033
Step 5000 validation: roc_auc_score=0.730811
Step 6000 validation: roc_auc_score=0.718023
Step 7000 validation: roc_auc_score=0.762668
Step 8000 validation: roc_auc_score=0.749278


  data = torch.load(checkpoint, map_location=self.device)



 60%|██████    | 9/15 [32:45<23:39, 236.54s/trial, best loss: -0.7888026743356273]Step 1000 validation: roc_auc_score=0.729021
Step 2000 validation: roc_auc_score=0.757572
Step 3000 validation: roc_auc_score=0.770548
Step 4000 validation: roc_auc_score=0.772036
Step 5000 validation: roc_auc_score=0.760431
Step 6000 validation: roc_auc_score=0.754723
Step 7000 validation: roc_auc_score=0.749689
Step 8000 validation: roc_auc_score=0.765353


  data = torch.load(checkpoint, map_location=self.device)



 67%|██████▋   | 10/15 [35:06<17:14, 206.97s/trial, best loss: -0.7888026743356273]Step 1000 validation: roc_auc_score=0.757633
Step 2000 validation: roc_auc_score=0.765643
Step 3000 validation: roc_auc_score=0.771825
Step 4000 validation: roc_auc_score=0.771044
Step 5000 validation: roc_auc_score=0.774984
Step 6000 validation: roc_auc_score=0.777321
Step 7000 validation: roc_auc_score=0.754926
Step 8000 validation: roc_auc_score=0.758331


  data = torch.load(checkpoint, map_location=self.device)



 73%|███████▎  | 11/15 [39:36<15:05, 226.49s/trial, best loss: -0.7888026743356273]Step 1000 validation: roc_auc_score=0.72813
Step 2000 validation: roc_auc_score=0.738788
Step 3000 validation: roc_auc_score=0.750183
Step 4000 validation: roc_auc_score=0.770563
Step 5000 validation: roc_auc_score=0.745791
Step 6000 validation: roc_auc_score=0.730866
Step 7000 validation: roc_auc_score=0.746629
Step 8000 validation: roc_auc_score=0.724606


  data = torch.load(checkpoint, map_location=self.device)



 80%|████████  | 12/15 [44:29<12:20, 246.73s/trial, best loss: -0.7888026743356273]Step 1000 validation: roc_auc_score=0.750114
Step 2000 validation: roc_auc_score=0.761314
Step 3000 validation: roc_auc_score=0.784373
Step 4000 validation: roc_auc_score=0.773773
Step 5000 validation: roc_auc_score=0.766891
Step 6000 validation: roc_auc_score=0.76812
Step 7000 validation: roc_auc_score=0.764584
Step 8000 validation: roc_auc_score=0.760843


  data = torch.load(checkpoint, map_location=self.device)



 87%|████████▋ | 13/15 [48:46<08:19, 249.67s/trial, best loss: -0.7888026743356273]Step 1000 validation: roc_auc_score=0.720734
Step 2000 validation: roc_auc_score=0.753348
Step 3000 validation: roc_auc_score=0.766806
Step 4000 validation: roc_auc_score=0.769152
Step 5000 validation: roc_auc_score=0.736942
Step 6000 validation: roc_auc_score=0.72992
Step 7000 validation: roc_auc_score=0.742834
Step 8000 validation: roc_auc_score=0.735178


  data = torch.load(checkpoint, map_location=self.device)



 93%|█████████▎| 14/15 [53:58<04:28, 268.50s/trial, best loss: -0.7888026743356273]Step 1000 validation: roc_auc_score=0.732556
Step 2000 validation: roc_auc_score=0.755722
Step 3000 validation: roc_auc_score=0.768785
Step 4000 validation: roc_auc_score=0.767571
Step 5000 validation: roc_auc_score=0.7832
Step 6000 validation: roc_auc_score=0.773129
Step 7000 validation: roc_auc_score=0.771819
Step 8000 validation: roc_auc_score=0.766118


  data = torch.load(checkpoint, map_location=self.device)



100%|██████████| 15/15 [56:17<00:00, 225.20s/trial, best loss: -0.7888026743356273]


In [None]:
print("Best: {}".format(best))