In [None]:
!apt install swig cmake

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
cmake is already the newest version (3.22.1-1ubuntu1.22.04.2).
Suggested packages:
  swig-doc swig-examples swig4.0-examples swig4.0-doc
The following NEW packages will be installed:
  swig swig4.0
0 upgraded, 2 newly installed, 0 to remove and 29 not upgraded.
Need to get 1,116 kB of archives.
After this operation, 5,542 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig4.0 amd64 4.0.2-1ubuntu1 [1,110 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig all 4.0.2-1ubuntu1 [5,632 B]
Fetched 1,116 kB in 1s (1,504 kB/s)
Selecting previously unselected package swig4.0.
(Reading database ... 125044 files and directories currently installed.)
Preparing to unpack .../swig4.0_4.0.2-1ubuntu1_amd64.deb ...
Unpacking swig4.0 (4.0.2-1ubuntu1) ...
Selecting previously unselected package swig.
Preparing to unpack .../swig_4.0.2-1ubu

In [None]:
!pip install -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt

Collecting stable-baselines3==2.0.0a5 (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1))
  Downloading stable_baselines3-2.0.0a5-py3-none-any.whl.metadata (5.3 kB)
Collecting swig (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 2))
  Downloading swig-4.3.0-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (3.5 kB)
Collecting huggingface_sb3 (from -r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 4))
  Downloading huggingface_sb3-3.0-py3-none-any.whl.metadata (6.3 kB)
Collecting gymnasium==0.28.1 (from stable-baselines3==2.0.0a5->-r https://raw.githubusercontent.com/huggingface/deep-rl-class/main/notebooks/unit1/requirements-unit1.txt (line 1))
  Downloading gymnasium-0.28.1-py3-none-any.whl.metadata (9.2 kB)
Collecting jax-jumpy>=1.0.0 (from gymnasium==0.28.1->stable-b

In [None]:
pip install PyTDC

Collecting PyTDC
  Downloading pytdc-1.1.14.tar.gz (151 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/151.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m151.3/151.3 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting accelerate==0.33.0 (from PyTDC)
  Downloading accelerate-0.33.0-py3-none-any.whl.metadata (18 kB)
Collecting dataclasses<1.0,>=0.6 (from PyTDC)
  Downloading dataclasses-0.6-py3-none-any.whl.metadata (3.0 kB)
Collecting datasets<2.20.0 (from PyTDC)
  Downloading datasets-2.19.2-py3-none-any.whl.metadata (19 kB)
Collecting evaluate==0.4.2 (from PyTDC)
  Downloading evaluate-0.4.2-py3-none-any.whl.metadata (9.3 kB)
Collecting fuzzywuzzy<1.0,>=0.18.0 (from PyTDC)
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Collecting scikit-learn==1.2.2 (from PyTDC)
  Downloading scikit_learn-1.2.2-cp311-cp311-manylinu

In [None]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import torch
from transformers import BertTokenizer, BertModel
from torchvision import models, transforms
from PIL import Image
import re
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from tdc.single_pred import ADME
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SimulatedPatientData:
    def __init__(self, num_patients=10):
        self.num_patients = num_patients
        self.blood_reports = [f"Patient {i}: Hemoglobin: {np.random.uniform(10, 15):.1f} g/dL, Glucose: {np.random.uniform(70, 120):.1f} mg/dL" for i in range(num_patients)]
        self.protein_scans = [f"protein_scan_{i}.jpg" for i in range(num_patients)]
        self.drug_responses = np.random.rand(num_patients, 10)

    def get_patient_data(self, idx):
        return self.blood_reports[idx], self.protein_scans[idx], self.drug_responses[idx]

class DataPipeline:
    def __init__(self):
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.bert_model = BertModel.from_pretrained('bert-base-uncased').to(device)
        self.bert_model.eval()
        self.resnet = models.resnet18(pretrained=True).to(device)
        self.resnet.eval()
        self.image_transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

    def process_blood_report(self, report):
        inputs = self.tokenizer(report, return_tensors='pt', truncation=True, padding=True, max_length=128)
        inputs = {k: v.to(device) for k, v in inputs.items()}
        with torch.no_grad():
            outputs = self.bert_model(**inputs)
        bert_features = outputs.pooler_output.squeeze().cpu().numpy()
        hemoglobin = float(re.search(r"Hemoglobin:\s*(\d+\.\d+)", report).group(1))
        glucose = float(re.search(r"Glucose:\s*(\d+\.\d+)", report).group(1))
        return np.concatenate([bert_features, [hemoglobin, glucose]])

    def process_protein_scan(self, image_path):
        if not os.path.exists(image_path):
            img = Image.fromarray(np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8))
        else:
            img = Image.open(image_path).convert('RGB')
        input_tensor = self.image_transform(img).unsqueeze(0).to(device)
        with torch.no_grad():
            features = self.resnet(input_tensor)
        return features.squeeze().cpu().numpy()


class PatientDrugEnv(gym.Env):
    def __init__(self, patient_data):
        super(PatientDrugEnv, self).__init__()
        self.patient_data = patient_data
        self.pipeline = DataPipeline()
        self.num_drugs = 10
        self.state_dim = 772  # 768 (BERT) + 2 (blood params) + 2 (ResNet last 2)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim,), dtype=np.float32)
        self.action_space = spaces.Discrete(self.num_drugs)
        self.current_patient_idx = None

    def reset(self, seed=None, options=None):
        if seed is not None:
            np.random.seed(seed)
        self.current_patient_idx = np.random.randint(self.patient_data.num_patients)
        blood_report, protein_scan, _ = self.patient_data.get_patient_data(self.current_patient_idx)
        blood_features = self.pipeline.process_blood_report(blood_report)
        protein_features = self.pipeline.process_protein_scan(protein_scan)
        self.state = np.concatenate([blood_features, protein_features[-2:]]).astype(np.float32)
        info = {"patient_idx": self.current_patient_idx}
        return self.state, info

    def step(self, action):
        _, _, drug_responses = self.patient_data.get_patient_data(self.current_patient_idx)
        reward = drug_responses[action]
        next_state = self.state
        terminated = True
        truncated = False
        info = {}
        return next_state, reward, terminated, truncated, info

    def render(self, mode='human'):
        print(f"Patient {self.current_patient_idx}: State {self.state[:5]}...")

def load_tdc_data():
    data = ADME(name='Caco2_Wang')
    drug_smiles = data.get_data()['Drug'].tolist()
    return drug_smiles[:10]

if __name__ == "__main__":
    patient_data = SimulatedPatientData(num_patients=10)
    drug_list = load_tdc_data()
    print(f"Loaded {len(drug_list)} drugs from TDC: {drug_list[:2]}...")
    env = PatientDrugEnv(patient_data)
    check_env(env)

    model = PPO("MlpPolicy", env, verbose=1, learning_rate=0.0003, n_steps=2048, device=device)
    print("Training RL model on GPU...")
    model.learn(total_timesteps=10000)
    model.save("ppo_drug_discovery")


Downloading...


Using device: cuda


100%|██████████| 82.5k/82.5k [00:00<00:00, 688kiB/s]
Loading...
Done!


Loaded 10 drugs from TDC: ['Oc1cc(O)c2c(c1)OC(c1ccc(O)c(O)c1)C(O)C2', 'C/C=C\\C#CCC/C=C\\C=C\\C(=O)NCC(C)C']...


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 153MB/s]


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Training RL model on GPU...
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 0.406    |
| time/              |          |
|    fps             | 59       |
|    iterations      | 1        |
|    time_elapsed    | 34       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1           |
|    ep_rew_mean          | 0.52        |
| time/                   |             |
|    fps                  | 60          |
|    iterations           | 2           |
|    time_elapsed         | 68          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.051488392 |
|    clip_fraction        | 0.474       |
|    clip_range           | 0.2  

In [None]:
new_blood_report = "Patient Test: Hemoglobin: 14.2 g/dL, Glucose: 95.3 mg/dL"
new_protein_scan = "images.jpeg"
pipeline = DataPipeline()
blood_features = pipeline.process_blood_report(new_blood_report)
protein_features = pipeline.process_protein_scan(new_protein_scan)
new_state = np.concatenate([blood_features, protein_features[-2:]]).astype(np.float32)
model = PPO.load("ppo_drug_discovery", device=device)
action, _ = model.predict(new_state, deterministic=True)
recommended_drug = drug_list[action]
print(f"Recommended drug for new patient: {recommended_drug}")

"""
total_reward = 0
for _ in range(5):
        obs, info = env.reset()
        action, _ = model.predict(obs)
        _, reward, _, _, _ = env.step(action)
        total_reward += reward
print(f"Average reward over 5 test episodes: {total_reward / 5:.2f}")
"""



Recommended drug for new patient: COc1ccc2c3c1O[C@H]1[C@@H](O)C=C[C@H]4[C@@H](C2)N(C)CC[C@]314
Average reward over 5 test episodes: 0.77


In [None]:
pip install pubchempy

Collecting pubchempy
  Downloading PubChemPy-1.0.4.tar.gz (29 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pubchempy
  Building wheel for pubchempy (setup.py) ... [?25l[?25hdone
  Created wheel for pubchempy: filename=PubChemPy-1.0.4-py3-none-any.whl size=13819 sha256=064fec16ee9af88db76492f980e37618ee45e392ba8cc8ae16f05a1beb9e97b4
  Stored in directory: /root/.cache/pip/wheels/8b/e3/6c/3385b2db08b0985a87f5b117f98d0cb61a3ae3ca3bcbbd8307
Successfully built pubchempy
Installing collected packages: pubchempy
Successfully installed pubchempy-1.0.4


In [None]:
import pubchempy as pcp
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors

def decode_smiles(smiles):
    mol = Chem.MolFromSmiles(smiles)
    mol_formula = rdMolDescriptors.CalcMolFormula(mol)
    compounds = pcp.get_compounds(smiles, 'smiles')
    drug_name = compounds[0].iupac_name if compounds else "Unknown"
    return drug_name, mol_formula

drug_name, mol_formula = decode_smiles(recommended_drug)
print(f"Drug Name: {drug_name}")
print(f"Molecular Formula: {mol_formula}")

Drug Name: (4R,4aR,7S,7aR,12bS)-9-methoxy-3-methyl-2,4,4a,7,7a,13-hexahydro-1H-4,12-methanobenzofuro[3,2-e]isoquinolin-7-ol
Molecular Formula: C18H21NO3
