# SyGMA processing for metabolites

Info: Jupyter notebook originally made by Louis-Felix Nothias and Anupriya Tripathi (UC San Diego) on April 2018

Description: Process a list of structures (SMILES) and generate Phase I and II metabolites

In [1]:
import pandas as pd
import numpy as np
import sygma
from rdkit import Chem

## Running SyGMa

Each step in a scenario lists the ruleset
and the number of reaction cycles to be applied

In [2]:
# Define SyGMA scenario. 
scenario = sygma.Scenario([
    [sygma.ruleset['phase1'], 3],
    [sygma.ruleset['phase2'], 2]])

In [3]:
parent = Chem.MolFromSmiles("CCCc1nn(C)c2c1nc([nH]c2=O)-c1cc(ccc1OCC)S(=O)(=O)N1CCN(C)CC1")

In [4]:
metabolic_tree = scenario.run(parent)
metabolic_tree.calc_scores()

In [5]:
metabolite_list = metabolic_tree.to_list()

In [6]:
df = pd.DataFrame(data=None)
df2 = pd.DataFrame(data=None)
metabolic_tree = scenario.run(parent)
metabolic_tree.calc_scores()
metabolites = metabolic_tree.to_smiles()
df = pd.DataFrame(metabolites[1:],columns=metabolites[0])
df['parent'] = (metabolites[0][0])
df.columns.values[0] = 'metabolite'
df.columns.values[1] = 'score'

In [9]:
df

Unnamed: 0,metabolite,score,parent
0,CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCNCC4...,4.180000e-01,CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)...
1,CCCc1n[nH]c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)...,2.530000e-01,CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)...
2,CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CC[N+]...,1.900000e-01,CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)...
3,CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(CC(C)...,1.060000e-01,CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)...
4,CCCc1n[nH]c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCNCC4...,1.057540e-01,CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)...
...,...,...,...
32386,CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1n(C2OC(C(=...,2.318400e-09,CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)...
32387,CCOc1c(O)cc(S(=O)(=O)N2CCN(C)CC2)cc1-c1[nH]c(=...,2.257920e-09,CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)...
32388,CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(CCOS(...,2.086560e-09,CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)...
32389,CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1[nH]c(=O)c...,2.086560e-09,CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)...


In [8]:
#Write the file out
df.to_csv('Sildenafil_Sygma_3_2_metabolites.tsv', sep = '\t', index = False)