In [2]:
import pandas as pd
from ClickReaction import CuAAC
import rdkit as rk
from PIL import Image  # Import PIL for visualization
from rdkit import Chem
from IPython.display import display
from rdkit.Chem import AllChem, Draw
from rdkit.Chem.rdMolDescriptors import CalcNumHeteroatoms
import random #library for the generation of DNA Tags
from openpyxl import Workbook
from rdkit.Chem import SDMolSupplier, SDWriter
from rdkit.Chem import rdChemReactions
from rdkit.Chem.Draw import IPythonConsole  # Ensures images render properly
from ClickReaction.BaseReaction import BaseReaction, Reactant, Reactants
from rdkit.Chem import PandasTools
from rdkit import DataStructs
import numpy as np

In [3]:
# Function to classify compounds based on functional groups
def classify_compound(mol):
    if mol.HasSubstructMatch(Chem.MolFromSmarts("[CX3](=O)[OX2H]")):
        return "Carboxylic Acid"
    elif mol.HasSubstructMatch(Chem.MolFromSmarts("[OX2H]")):
        return "Alcohol"
    elif mol.HasSubstructMatch(Chem.MolFromSmarts("[NX3;H2,H1]")):
        return "Amine"
    elif mol.HasSubstructMatch(Chem.MolFromSmarts("[Cl,Br,I]")):
        return "Halogen"
    elif mol.HasSubstructMatch(Chem.MolFromSmarts("[CX3](=O)[OX2][CX4]")):
        return "Ester"
    else:
        return "Other"

In [4]:
def classify_amine(mol):
    if mol.HasSubstructMatch(Chem.MolFromSmarts("[NX3;H2]")):
        return "Primary Amine"
    elif mol.HasSubstructMatch(Chem.MolFromSmarts("[NX3;H1]")):
        return "Secondary Amine"
    else:
        return "Other"

In [5]:
# Load SDF file
sdf_file = "AsinexDNA-encoded libraries.sdf"  # Change this to your SDF file path
supplier = Chem.SDMolSupplier(sdf_file)

In [56]:
# Create lists to store results
alcohols = []
carboxylic_acids = []
amines = []
halogens = []
esters = []
primary_amines = []
secondary_amines = []
others = []

for mol in supplier:
    if mol is not None:
        smiles = Chem.MolToSmiles(mol)
        classification = classify_compound(mol)
        
        if classification == "Halogen":
            halogens.append(Chem.MolFromSmiles(smiles)) 
        elif classification == "Carboxylic Acid":
            carboxylic_acids.append(Chem.MolFromSmiles(smiles))
        elif classification == "Amine":
            amines.append(Chem.MolFromSmiles(smiles))
        elif classification == "Alcohol":
            alcohols.append(Chem.MolFromSmiles(smiles))
        elif classification == "Ester":
            esters.append(Chem.MolFromSmiles(smiles))
        else:
            others.append(Chem.MolFromSmiles(smiles))
            
for amine in amines:
    if amine is not None:
        amine_classification = classify_amine(amine)
        if amine_classification == "Primary Amine":
                primary_amines.append(amine)
        elif amine_classification == "Secondary Amine":
                secondary_amines.append(amine)
        else:
                others.append(amine)

In [57]:
print("OH:",len(alcohols))
print("R-NH2:", len(primary_NH2))
print("R-NH-R:", len(secondary_NH))
print("COOH:",len(carboxylic_acids))
print("Halogens",len(halogens))
print("Esters",len(esters))

OH: 262
R-NH2: 102
R-NH-R: 158
COOH: 795
Halogens 46
Esters 14


In [76]:
pd.DataFrame(alcohols, columns=["SMILES"]).to_csv("alcohols.csv", index=False)

In [78]:
alcohol_smiles = []

for i in alcohols:
    alcohol_smiles.append(Chem.MolToSmiles(i))

pd.DataFrame(alcohol_smiles, columns=["SMILES"]).to_csv("alcohols.csv", index=False)

In [8]:
def alcohol_classifier(mol):
    if mol.HasSubstructMatch(Chem.MolFromSmarts("[NX3;H2,H1]")):
        return "Amine" 
    elif mol.HasSubstructMatch(Chem.MolFromSmarts("[Cl,Br,I]")):
        return "Halogen_alcohol"

In [9]:
OH_halogen = []
OH_NH = []

for alcohol in alcohols:
    classification = alcohol_classifier(alcohol)
    
    if classification == "Halogen_alcohol":
        OH_halogen.append(alcohol)
    elif classification == "amine":
        OH_NH.append(alcohol)
        
        
print("OH_halogen:", len(OH_halogen), "alcohols:", len(alcohols))

OH_halogen: 4 alcohols: 262


Reaccionar amina secondaria de alcoholes con halogenos

In [48]:
# Define the amine coupling reaction (carboxylic acid + amine → amide)
reaction_smarts = '[C:1](=[O:2])[OH:3].[N:4]>>[C:1](=[O:2])[N:4]'

# Load the reaction into RDKit
rxn = AllChem.ReactionFromSmarts(reaction_smarts)

carboxylic_acids = ['CC(=O)O', 'CCC(=O)O', 'C1=CC=C(C=C1)C(=O)O']  # Acetic, Propionic, Benzoic acid
amines = ['NCC', 'NC(C)C', 'NC1CCCCC1']  # Ethylamine, Isopropylamine, Cyclohexylamine

amide_products = []
for carboxylic_acid_smiles in carboxylic_acids:
    for amine_smiles in amines:
        carboxylic_acid = Chem.MolFromSmiles(carboxylic_acid_smiles)
        amine = Chem.MolFromSmiles(amine_smiles)

        if carboxylic_acid and amine:  # Ensure valid molecules
            products = rxn.RunReactants((carboxylic_acid, amine))

            # Store the product SMILES
            for prod_set in products:
                for product in prod_set:
                    product_smiles = Chem.MolToSmiles(product)
                    amide_products.append(product_smiles)

# Print all amide products
#print("\nGenerated Amides:")
print(amide_products)

['CCNC(C)=O', 'CC(=O)NC(C)C', 'CC(=O)NC1CCCCC1', 'CCNC(=O)CC', 'CCC(=O)NC(C)C', 'CCC(=O)NC1CCCCC1', 'CCNC(=O)c1ccccc1', 'CC(C)NC(=O)c1ccccc1', 'O=C(NC1CCCCC1)c1ccccc1']


[15:40:35] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 3 


In [67]:
# Define the amine coupling reaction (carboxylic acid + amine → amide)
reaction_smarts = '[C:1](=[O:2])[OH:3].[N:4]>>[C:1](=[O:2])[N:4]'

# Load the reaction into RDKit
rxn = AllChem.ReactionFromSmarts(reaction_smarts)

amide_products = []
for carboxylic_acid_smiles in carboxylic_acids:
    for amine_smiles in primary_amines:

        if carboxylic_acid and amine:  # Ensure valid molecules
            products = rxn.RunReactants((carboxylic_acid, amine))

            # Store the product SMILES
            for prod_set in products:
                for product in prod_set:
                    #product_smiles = Chem.MolToSmiles(product)
                    amide_products.append(product)

# Print all amide products
#print("\nGenerated Amides:")
print(amide_products)

[15:54:06] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 3 
IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [76]:
Chem.MolToSmiles(carboxylic_acids[27])

'Cc1nc(N)nc(C)c1CC(=O)O'

In [79]:
Chem.MolToSmiles(primary_amines[58])

'CC(C)(C)OC(=O)N1CCCCC1c1cc(N)[nH]n1'

In [90]:
# Define the amine coupling reaction (carboxylic acid + amine → amide)
reaction_smarts = '[C:1](=[O:2])[OH:3].[N:4]>>[C:1](=[O:2])[N:4]'

# Load the reaction into RDKit
rxn = AllChem.ReactionFromSmarts(reaction_smarts)

carboxylic_acids = ['CC(C)Cn1c(SCC(=O)O)nc2c1c(=O)[nH]c(=O)n2C', 'CCOC(=O)c1[nH]c(C)c(C(=O)O)c1C', 'Cc1nc(N)nc(C)c1CC(=O)O']  # Acetic, Propionic, Benzoic acid
amines = ['CC(C)(C)OC(=O)N1CCC(CC2(C(N)=O)CCCN2)CC1', 'Nc1nccc(C2CCCNC2)n1', 'CC(C)(C)OC(=O)N1CCCCC1c1cc(N)[nH]n1']  # Ethylamine, Isopropylamine, Cyclohexylamine

amide_products = []
for carboxylic_acid_smiles in carboxylic_acids:
    for amine_smiles in amines:
        carboxylic_acid = Chem.MolFromSmiles(carboxylic_acid_smiles)
        amine = Chem.MolFromSmiles(amine_smiles)

        if carboxylic_acid and amine:  # Ensure valid molecules
            products = rxn.RunReactants((carboxylic_acid, amine))

            # Store the product SMILES
            for prod_set in products:
                for product in prod_set:
                    amide_products.append(Chem.MolToSmiles(product))

# Print all amide products
#print("\nGenerated Amides:")
print(amide_products)

['CC(C)Cn1c(SCC(=O)N2(C(=O)OC(C)(C)C)CCC(CC3(C(N)=O)CCCN3)CC2)nc2c1c(=O)[nH]c(=O)n2C', 'CC(C)Cn1c(SCC(=O)NC(=O)C2(CC3CCN(C(=O)OC(C)(C)C)CC3)CCCN2)nc2c1c(=O)[nH]c(=O)n2C', 'CC(C)Cn1c(SCC(=O)N2CCCC2(CC2CCN(C(=O)OC(C)(C)C)CC2)C(N)=O)nc2c1c(=O)[nH]c(=O)n2C', 'CC(C)Cn1c(SCC(=O)Nc2nccc(C3CCCNC3)n2)nc2c1c(=O)[nH]c(=O)n2C', 'CC(C)Cn1c(SCC(=O)N2CCCC(c3ccnc(N)n3)C2)nc2c1c(=O)[nH]c(=O)n2C', 'CC(C)Cn1c(SCC(=O)N2(C(=O)OC(C)(C)C)CCCCC2c2cc(N)[nH]n2)nc2c1c(=O)[nH]c(=O)n2C', 'CC(C)Cn1c(SCC(=O)Nc2cc(C3CCCCN3C(=O)OC(C)(C)C)n[nH]2)nc2c1c(=O)[nH]c(=O)n2C', 'CCOC(=O)c1[nH]c(C)c(C(=O)N2(C(=O)OC(C)(C)C)CCC(CC3(C(N)=O)CCCN3)CC2)c1C', 'CCOC(=O)c1[nH]c(C)c(C(=O)NC(=O)C2(CC3CCN(C(=O)OC(C)(C)C)CC3)CCCN2)c1C', 'CCOC(=O)c1[nH]c(C)c(C(=O)N2CCCC2(CC2CCN(C(=O)OC(C)(C)C)CC2)C(N)=O)c1C', 'CCOC(=O)c1[nH]c(C)c(C(=O)Nc2nccc(C3CCCNC3)n2)c1C', 'CCOC(=O)c1[nH]c(C)c(C(=O)N2CCCC(c3ccnc(N)n3)C2)c1C', 'CCOC(=O)c1[nH]c(C)c(C(=O)N2(C(=O)OC(C)(C)C)CCCCC2c2cc(N)[nH]n2)c1C', 'CCOC(=O)c1[nH]c(C)c(C(=O)Nc2cc(C3CCCCN3C(=O)OC(C)(C)C)n[nH]

[16:05:45] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 3 


In [91]:
pd.DataFrame(amide_products, columns=["amides"]).to_csv("amide_products.csv", index=False)