In [None]:
# Small Molecule Modeling for HCM Therapeutics

## Graph Neural Networks and Molecular Property Prediction
### Using QM9 dataset and DeepChem for therapeutic discovery

**Objectives:**
1. Load and analyze QM9 molecular dataset
2. Train Graph Neural Networks for property prediction
3. Identify drug-like molecules
4. Connect to MYH7 variant targeting strategies


In [None]:
import deepchem as dc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from rdkit import Chem
from rdkit.Chem import Descriptors, Crippen
import seaborn as sns

print("🧪 Setting up molecular modeling pipeline...")

# Load QM9 dataset
qm9_tasks, qm9_datasets, qm9_transformers = dc.molnet.load_qm9(
    featurizer='GraphConv', reload=False, data_dir='../data/molecules'
)

train_dataset, valid_dataset, test_dataset = qm9_datasets

print(f"✅ QM9 dataset loaded:")
print(f"  Training: {len(train_dataset)} molecules")
print(f"  Validation: {len(valid_dataset)} molecules")
print(f"  Test: {len(test_dataset)} molecules")

# Train a simple Graph Neural Network
model = dc.models.GraphConvModel(
    n_tasks=len(qm9_tasks), 
    mode='regression',
    dropout=0.2
)

print("🤖 Training Graph Neural Network...")
model.fit(train_dataset, nb_epoch=10)
