In [20]:
# %% [markdown]
# # üíä 6. ADMET & Safety Validation
# **Goal:** Ensure our top drugs are "drug-like" (safe and absorbable).

import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors, QED



In [21]:
# We load the candidates we generated in the previous steps
try:
    top_candidates = pd.read_csv('../artifacts/top_candidates_for_docking.csv')
    print(f"‚úÖ Loaded {len(top_candidates)} candidates for safety screening.")
except FileNotFoundError:
    print("‚ùå Error: Could not find 'top_candidates_for_docking.csv'. Did you run Notebook 3?")
    # Create dummy data just so the code doesn't crash if file is missing
    top_candidates = pd.DataFrame({'clean_smiles': ['C']})

‚úÖ Loaded 10 candidates for safety screening.


In [22]:
# Function to calculate Safety Metrics
def check_safety(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if not mol:
        return pd.Series({'MW': 0, 'LogP': 0, 'QED_Score': 0, 'Lipinski_Pass': "‚ùå"})

    # 1. Lipinski's Rule of 5 (Oral Absorption)
    mw = Descriptors.MolWt(mol)
    logp = Descriptors.MolLogP(mol)
    hbd = Descriptors.NumHDonors(mol)
    hba = Descriptors.NumHAcceptors(mol)
    
    # 2. QED (Quantitative Estimate of Drug-likeness)
    qed_score = QED.qed(mol)
    
    # 3. Safety Decision
    passed = (mw < 500) and (logp < 5) and (hbd < 5) and (hba < 10)
    
    return pd.Series({
        'MW': mw, 
        'LogP': logp, 
        'QED_Score': qed_score, 
        'Lipinski_Pass': "‚úÖ" if passed else "‚ùå"
    })

# Apply to Top Candidates
if not top_candidates.empty:
    safety_results = top_candidates['clean_smiles'].apply(check_safety)
    final_report = pd.concat([top_candidates[['clean_smiles']], safety_results], axis=1)

    print("\nüõ°Ô∏è FINAL SAFETY REPORT")
    display(final_report.sort_values(by='QED_Score', ascending=False))


üõ°Ô∏è FINAL SAFETY REPORT


Unnamed: 0,clean_smiles,MW,LogP,QED_Score,Lipinski_Pass
6,CC1(C)C2CC3OB(C(CCCCN)NC(=O)C4CCCN4C(=O)c4cccc...,616.568,3.887,0.304541,‚ùå
5,CC1(C)C2CC3OB(C(CCCCN)NC(=O)C4CCCN4C(=O)c4cccc...,640.556,5.1771,0.274164,‚ùå
4,CC1(C)C2CC3OB(C(CCCCN)NC(=O)C4CCCN4C(=O)c4cncc...,640.556,5.1771,0.274164,‚ùå
1,CC1(N)C2CC3OB(C(CCCCN)NC(=O)C4CCCN4C(=O)c4cccc...,640.556,4.4733,0.260595,‚ùå
9,Cc1ccc(NS(=O)(=O)Nc2ccccc2)c(=O)n1CC(=O)NCc1cc...,474.568,1.58749,0.234833,‚ùå
2,CC1(F)C2CC3OB(C(CCCCN)NC(=O)C4CCCN4C(=O)c4cccc...,643.531,5.4841,0.203266,‚ùå
0,N=C(N)NCSCC(NC(=O)CN(CC1(c2ccccc2)CC1)C(=O)CCc...,511.457,0.85017,0.072341,‚ùå
3,CC(C)(CN(CC(=O)NC(CCNN=C(N)N)B(O)O)C(=O)CCc1cc...,496.421,0.0905,0.070886,‚ùå
7,CC1=CC(C(=O)NC(CCCCN)C(=O)C(=O)NSCc2ccc(C(N)=O...,711.845,2.391,0.058905,‚ùå
8,CC1=CC(C(=O)NC(CCCCN)C(=O)C(=O)NOCc2ccc(C(N)=O...,695.777,1.6744,0.058655,‚ùå
