## CSC413 - research project
---

In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/DeepPROTACs

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1_2em3v0TLxpcrRipRJlWxC6PKaqk5biD/DeepPROTACs


In [2]:
! pip install torch
! pip install torch_geometric
! pip install rdkit

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch_geometric
  Downloading torch_geometric-2.3.0.tar.gz (616 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m616.2/616.2 kB[0m [31m38.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: torch_geometric
  Building wheel for torch_geometric (pyproject.toml) ... [?25l[?25hdone
  Created wheel for torch_geometric: filename=torch_geometric-2.3.0-py3-none-any.whl size=909897 sha256=6a7ca0ff02a4bfb3414975fe364b396b6d99acc37eeeb5b383893fc909e5fd74
  Stored in directory: /root/.cache/pip/wheels/cd/7d/6b/17150450b80b4a3656a84330e22709ccd8dc0f8f4773ba4133
Successfully built

In [3]:
import sys
import numpy as np
import torch
import os
import pickle
import logging
from pathlib import  Path
from torch.utils.data import DataLoader, WeightedRandomSampler
from torch.utils.tensorboard import SummaryWriter
from protacloader import PROTACSet, collater
from model import GraphConv, SmilesNet, ProtacModel
from train_and_test import train
from prepare_data import GraphData

In [4]:
BATCH_SIZE = 1
EPOCH = 30
TRAIN_RATE = 0.8
LEARNING_RATE = 0.0001
TRAIN_NAME = "Original-model_weighted"
root = "small_dataset/data"
for handler in logging.root.handlers[:]:
      logging.root.removeHandler(handler)
logging.basicConfig(filename="/content/drive/MyDrive/DeepPROTACs/log/"+TRAIN_NAME+".log", filemode="a", level=logging.DEBUG, force=True)
logging.getLogger('RootLogger').setLevel(logging.DEBUG)

In [5]:
Path('/content/drive/MyDrive/DeepPROTACs/log').mkdir(exist_ok=True)
Path('/content/drive/MyDrive/DeepPROTACs/model').mkdir(exist_ok=True)

In [6]:
def main():
  ligase_ligand = GraphData("ligase_ligand", root)
  ligase_pocket = GraphData("ligase_pocket", root)
  target_ligand = GraphData("target_ligand", root)
  target_pocket = GraphData("target_pocket", root)
  with open(os.path.join(target_pocket.processed_dir, "smiles.pkl"),"rb") as f:
      smiles = pickle.load(f)
  with open('small_dataset/name.pkl','rb') as f:
      name_list = pickle.load(f)
  label = torch.load(os.path.join(target_pocket.processed_dir, "label.pt"))

  protac_set = PROTACSet(
      name_list,
      ligase_ligand, 
      ligase_pocket, 
      target_ligand, 
      target_pocket, 
      smiles, 
      label,
  )
  data_size = len(protac_set)
  train_size = int(data_size * TRAIN_RATE)
  test_size = data_size - train_size
  logging.info(f"all data: {data_size}")
  logging.info(f"train data: {train_size}")
  logging.info(f"test data: {test_size}")
  train_dataset = torch.utils.data.Subset(protac_set, range(train_size))
  test_dataset = torch.utils.data.Subset(protac_set, range(train_size, data_size))

  label_train = []
  for data_sample in train_dataset:
        label_train.append(data_sample["label"])
  lu_train, count_train = np.unique(label_train,return_counts=True)
  cw_train = [sum(count_train) / c for c in count_train]
  ew_train = [cw_train[e] for e in label_train]
  sampler_train = WeightedRandomSampler(weights=ew_train, num_samples = 1800, replacement=True)

  label_test = []
  for data_sample in test_dataset:
        label_test.append(data_sample["label"])
  lu_test, count_test = np.unique(label_test,return_counts=True)
  cw_test = [sum(count_test) / c for c in count_test]
  ew_test = [cw_test[e] for e in label_test]
  sampler_test = WeightedRandomSampler(weights=ew_test, num_samples = 500, replacement=True)

  trainloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler_train, collate_fn=collater, drop_last=False)
  testloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, sampler=sampler_test, collate_fn=collater,drop_last=False)

  #trainloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, collate_fn=collater,drop_last=False, shuffle=True)
  #testloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, collate_fn=collater,drop_last=False)

  ligase_ligand_model = GraphConv(num_embeddings=10)
  ligase_pocket_model = GraphConv(num_embeddings=5)
  target_ligand_model = GraphConv(num_embeddings=10)
  target_pocket_model = GraphConv(num_embeddings=5)
  smiles_model = SmilesNet(batch_size=BATCH_SIZE)
  model = ProtacModel(
      ligase_ligand_model, 
      ligase_pocket_model,
      target_ligand_model,
      target_pocket_model,
      smiles_model,
  )
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  writer = SummaryWriter(f'runs/{TRAIN_NAME}')
  model = train(
      model, 
      train_loader=trainloader, 
      valid_loader=testloader,
      device=device,
      writer=writer,
      LOSS_NAME=TRAIN_NAME,
      batch_size=BATCH_SIZE,
      epoch=EPOCH,
      lr=LEARNING_RATE
  )
  for handler in logging.root.handlers[:]:
      logging.root.removeHandler(handler)

In [7]:
main()



In [8]:
from train_and_test import test
BATCH_SIZE=1
root = "test_dataset/data"
ligase_ligand = GraphData("ligase_ligand", root)
ligase_pocket = GraphData("ligase_pocket", root)
target_ligand = GraphData("target_ligand", root)
target_pocket = GraphData("target_pocket", root)
with open(os.path.join(target_pocket.processed_dir, "smiles.pkl"),"rb") as f:
    smiles = pickle.load(f)
with open('test_dataset/name.pkl','rb') as f:
    name_list = pickle.load(f)
label = torch.load(os.path.join(target_pocket.processed_dir, "label.pt"))
test_set = PROTACSet(
    name_list,
    ligase_ligand, 
    ligase_pocket, 
    target_ligand, 
    target_pocket, 
    smiles, 
    label,
)
testloader = DataLoader(test_set, batch_size=BATCH_SIZE, collate_fn=collater,drop_last=False)

ligase_ligand_model = GraphConv(num_embeddings=10)
ligase_pocket_model = GraphConv(num_embeddings=5)
target_ligand_model = GraphConv(num_embeddings=10)
target_pocket_model = GraphConv(num_embeddings=5)
smiles_model = SmilesNet(batch_size=BATCH_SIZE)
model = ProtacModel(
    ligase_ligand_model, 
    ligase_pocket_model,
    target_ligand_model,
    target_pocket_model,
    smiles_model,
)
model = torch.load("model/Original-model_weighted.pt")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
val_loss, val_acc, auroc, y_pred, y_true = test(model, testloader, device=device)
print(val_acc)
print(auroc)
print(y_pred)
print(y_true)

0.6875
0.6181818181818182
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
[1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0]
