<a href="https://colab.research.google.com/github/BNdiege/Tianic-flask-app/blob/main/ADME__analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
# Install RDKit in Colab
!pip install rdkit



In [15]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors, Crippen, rdMolDescriptors

In [19]:
# Load your GC-MS compound list
df = pd.read_csv("/content/PE_trial conversion.csv", encoding='latin-1')
df.head(5)

Unnamed: 0,PubChem CID,SMILES
0,779,C(CO)C(C(=O)O)N
1,8051,CCCCCC(=O)C
2,7720,CCCCC(CC)CO
3,8175,CCCCCCCCCC=O
4,8186,CCCCCCCCCCC=O


In [20]:
# Define functions for ADME-related rules
def lipinski(mol):
    """Check Lipinski's Rule of 5"""
    mw = Descriptors.MolWt(mol)
    logp = Crippen.MolLogP(mol)
    hbd = rdMolDescriptors.CalcNumHBD(mol)
    hba = rdMolDescriptors.CalcNumHBA(mol)
    violations = 0
    if mw > 500: violations += 1
    if logp > 5: violations += 1
    if hbd > 5: violations += 1
    if hba > 10: violations += 1
    return violations

def veber(mol):
    """Check Veber's rule (oral bioavailability)"""
    rot_bonds = Descriptors.NumRotatableBonds(mol)
    tpsa = rdMolDescriptors.CalcTPSA(mol)
    if rot_bonds <= 10 and tpsa <= 140:
        return True
    else:
        return False

# Process compounds
results = []
for i, row in df.iterrows():
    smiles = row["SMILES"]
    cid = row["PubChem CID"]
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        mw = Descriptors.MolWt(mol)
        logp = Crippen.MolLogP(mol)
        hbd = rdMolDescriptors.CalcNumHBD(mol)
        hba = rdMolDescriptors.CalcNumHBA(mol)
        tpsa = rdMolDescriptors.CalcTPSA(mol)
        rot_bonds = Descriptors.NumRotatableBonds(mol)
        lipinski_violations = lipinski(mol)
        veber_ok = veber(mol)

        # Assign GI absorption (simple proxy)
        gi_absorption = "High" if (tpsa <= 140 and logp <= 5) else "Low"
        druglikeness = "Yes" if (lipinski_violations <= 1 and veber_ok) else "No"

        results.append({
            "PubChem CID": cid,
            "SMILES": smiles,
            "MW": round(mw, 2),
            "LogP": round(logp, 2),
            "HBD": hbd,
            "HBA": hba,
            "TPSA": round(tpsa, 2),
            "Rotatable Bonds": rot_bonds,
            "Lipinski Violations": lipinski_violations,
            "GI Absorption": gi_absorption,
            "Druglikeness": druglikeness
        })

# Save results
adme_df = pd.DataFrame(results)
adme_df.to_csv("ADME_results.csv", index=False)

import pandas as pd
adme_df.head()


Unnamed: 0,PubChem CID,SMILES,MW,LogP,HBD,HBA,TPSA,Rotatable Bonds,Lipinski Violations,GI Absorption,Druglikeness
0,779,C(CO)C(C(=O)O)N,119.12,-1.22,3,3,83.55,3,0,High,Yes
1,8051,CCCCCC(=O)C,114.19,2.16,0,1,17.07,4,0,High,Yes
2,7720,CCCCC(CC)CO,130.23,2.2,1,1,20.23,5,0,High,Yes
3,8175,CCCCCCCCCC=O,156.27,3.33,0,1,17.07,8,0,High,Yes
4,8186,CCCCCCCCCCC=O,170.3,3.72,0,1,17.07,9,0,High,Yes


In [21]:
from google.colab import files

# Download the output CSV file
files.download('ADME_results.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>