In [1]:
import pandas as pd
from ClickReaction import CuAAC
import rdkit as rk
from PIL import Image  # Import PIL for visualization
from rdkit import Chem
from IPython.display import display
from rdkit.Chem import AllChem, Draw
from rdkit.Chem.rdMolDescriptors import CalcNumHeteroatoms
from rdkit.Chem import Descriptors 
import random #library for the generation of DNA Tags
from openpyxl import Workbook
from rdkit.Chem import SDMolSupplier, SDWriter
from rdkit.Chem import rdChemReactions
from rdkit.Chem.Draw import IPythonConsole  # Ensures images render properly
from ClickReaction.BaseReaction import BaseReaction, Reactant, Reactants
from rdkit.Chem import PandasTools
from rdkit import DataStructs
import numpy as np

In [2]:
df = pd.read_excel('BB_Tag.xlsx', sheet_name = 'Hoja1')

In [3]:
df

Unnamed: 0,DNA-Tag,Unnamed: 1,Secondary amines
0,GTTGGGGGTTG,,O[C@@H](CNCC1)[C@H]1c1ccccc1
1,CATACGCATAC,,O=C1CNCC(CO)C1
2,ATTAGGGATTA,,CNC2CC(CO)CC(c1ccccc1)C2
3,GTGCCCCCGTG,,OCC2C=C(N1CCCC1)CC3=C2CCNCC3
4,GACCTGTCCAG,,CC(=O)CC2CCC1NCC=CCC1C2
5,TAAAACAAAAT,,OCCCC2CNC(c1ccccc1)C2
6,TTATGAGTATT,,Oc3ccc(C1NCC(O)C1c2ccccc2)cc3
7,ACTCCTCCTCA,,CNc1cccc2cc(CCO)c(Cl)cc12
8,ATCGTACGTCA,,OC(Oc1ccccc1)C2CCNCC2
9,GCTAGTTAGCT,,CNc2cccc(N1CCC(O)CC1)c2


In [4]:
df.columns

Index(['DNA-Tag', 'Unnamed: 1', 'Secondary amines'], dtype='object')

In [4]:
new_df = df.drop('Unnamed: 1', axis = 1)

In [5]:
new_df

Unnamed: 0,DNA-Tag,Secondary amines
0,GTTGGGGGTTG,O[C@@H](CNCC1)[C@H]1c1ccccc1
1,CATACGCATAC,O=C1CNCC(CO)C1
2,ATTAGGGATTA,CNC2CC(CO)CC(c1ccccc1)C2
3,GTGCCCCCGTG,OCC2C=C(N1CCCC1)CC3=C2CCNCC3
4,GACCTGTCCAG,CC(=O)CC2CCC1NCC=CCC1C2
5,TAAAACAAAAT,OCCCC2CNC(c1ccccc1)C2
6,TTATGAGTATT,Oc3ccc(C1NCC(O)C1c2ccccc2)cc3
7,ACTCCTCCTCA,CNc1cccc2cc(CCO)c(Cl)cc12
8,ATCGTACGTCA,OC(Oc1ccccc1)C2CCNCC2
9,GCTAGTTAGCT,CNc2cccc(N1CCC(O)CC1)c2


In [18]:
Amine_Tags = list(new_df["DNA-Tag"][0:9])
Halogen_Tags = list(new_df["DNA-Tag"][10:19])
amines = []
halides = []

for i in new_df['Secondary amines'][0:9]:
    amines.append(Chem.MolFromSmiles(i))
    
for j in new_df['Secondary amines'][10:19]:
    halides.append(Chem.MolFromSmiles(j))

In [35]:
sn2_reaction = AllChem.ReactionFromSmarts("[N:1]>>[N:1]")  # Placeholder, will define dynamically

amines = [
    "O[C@@H](CNCC1)[C@H]1c1ccccc1",
    "O=C1CNCC(CO)C1",
    "CNC2CC(CO)CC(c1ccccc1)C2",
    "OCC2C=C(N1CCCC1)CC3=C2CCNCC3",
    "CC(=O)CC2CCC1NCC=CCC1C2",
    "OCCCC2CNC(c1ccccc1)C2",
    "Oc3ccc(C1NCC(O)C1c2ccccc2)cc3",
    "CNc1cccc2cc(CCO)c(Cl)cc12",
    "OC(Oc1ccccc1)C2CCNCC2",
    "CNc2cccc(N1CCC(O)CC1)c2"
]

halides = [
    "O=C2c1ccccc1C(CBr)c3ccccc23",
    "C1COC(C1)Br",
    "C1=CSC=C1Br",
    "C1=CC=C(C=C1)CCl",
    "FC(F)(F)Cc2cccc1CC(Cl)Cc12",
    "CC(Br)Cc2cccc3c1ccccc1oc23",
    "BrCCc2ccc1nonc1c2",
    "COc1ccc(CBr)cc1C2CCCC2",
    "CCN(CC)C(Br)Cc1ccccc1",
    "Cc2cc(N1CCCC1)cc(C)c2CBr"
]

# Function to perform SN2 reactions
def perform_sn2_reactions(nucleophiles, electrophiles):
    product_mols = []
    
    for amine in nucleophiles:
        amine_mol = Chem.MolFromSmiles(amine)
        if amine_mol:
            for halide in electrophiles:
                halide_mol = Chem.MolFromSmiles(halide)
                if halide_mol:
                    # Define SN2 reaction SMARTS dynamically
                    reaction_smarts = f"[C:1][Br,Cl:2].[N:3]>>[C:1][N:3]"
                    sn2_reaction = AllChem.ReactionFromSmarts(reaction_smarts)
                    
                    # Apply the reaction
                    products = sn2_reaction.RunReactants((halide_mol, amine_mol))
                    
                    if products:
                        for product in products:
                            product_mols.append(product[0])  # Store RDKit Mol objects

    return product_mols

# Run the reactions
sn2_products = perform_sn2_reactions(amines, halides)

# Print the number of products
print(f"Total products generated: {len(sn2_products)}")

# Optional: Show SMILES representation of the first few products
for mol in sn2_products[:5]:
    print(Chem.MolToSmiles(mol))

Total products generated: 108
O=C1c2ccccc2C(CN2CC[C@H](c3ccccc3)[C@@H](O)C2)c2ccccc21
O[C@H]1CN(C2CCCO2)CC[C@@H]1c1ccccc1
O[C@H]1CN(Cc2ccccc2)CC[C@@H]1c1ccccc1
O[C@H]1CN(C2Cc3cccc(CC(F)(F)F)c3C2)CC[C@@H]1c1ccccc1
CC(Cc1cccc2c1oc1ccccc12)N1CC[C@H](c2ccccc2)[C@@H](O)C1


[13:15:31] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[13:15:31] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[13:15:31] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[13:15:31] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[13:15:31] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[13:15:31] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[13:15:31] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[13:15:31] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[13:15:31] mapped atoms in the reactants were not mapped in the products.
  unmapped numbers are: 2 
[13:15:31] mapped atoms in the reactants were not mapped in the products.
  unmapped number

In [50]:
def sanitize_molecules(mol_list):
    sanitized = []
    for mol in mol_list:
        try:
            Chem.SanitizeMol(mol)
            sanitized.append(mol)
        except:
            pass  # Ignore molecules that cannot be sanitized
    return sanitized

# Example usage
sanitized_sn2_products = sanitize_molecules(sn2_products)

[13:36:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[13:36:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[13:36:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[13:36:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[13:36:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[13:36:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[13:36:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[13:36:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[13:36:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[13:36:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[13:36:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[13:36:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[13:36:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[13:36:20] Explicit valence for atom # 1 N, 4, is g

In [54]:
mol_wt = []
for i in sanitized_sn2_products:
    mol_wt.append(Descriptors.MolWt(i))
    
sum(mol_wt)/len(sanitized_sn2_products)

372.45227777777797

In [42]:
DNA_tag = new_df['DNA-Tag']
SMILES = new_df['SMILES']

data_base = {}

for key, value in zip(DNA_tag, SMILES):
    data_base[key] = value
    
data_base

{'GTTGGGGGTTG': 'C#CC(c1ccccc1)N',
 'CATACGCATAC': 'C#CCC(c1ccccc1)N',
 'ATTAGGGATTA': 'OC(c1cc(Oc2ccccc2)ccc1)=O',
 'GTGCCCCCGTG': 'OC(CC(CC1)CCC1C(F)(F)F)=O',
 'GACCTGTCCAG': 'OC(C(CC1)CN1c1ccccc1)=O',
 'TAAAACAAAAT': 'C1CCCC1C[N-]-[N+]#N',
 'TTATGAGTATT': 'c1ccccc1C[N-]-[N+]#N',
 'ACTCCTCCTCA': 'C1CCCCCC1C[N-]-[N+]#N',
 nan: 'CC(=O)C3Nc1c(C)c(C)ccc1C2C=C(N=[N+]=N)CC23'}

In [45]:
def classify_molecules(df):
    azide_smarts = '[N-][N+]#[N]'
    carboxylic_acid_smarts = 'C(=O)[OH]'
    alkyne_smarts = 'C#C'
    
    azide_pat = Chem.MolFromSmarts(azide_smarts)
    carboxylic_acid_pat = Chem.MolFromSmarts(carboxylic_acid_smarts)
    alkyne_pat = Chem.MolFromSmarts(alkyne_smarts)
    
    azides = []
    carboxylic_acids = []
    alkynes = []
    
    for smiles in new_df['SMILES']:
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            if mol.HasSubstructMatch(azide_pat):
                azides.append(Chem.MolFromSmiles(smiles))
            if mol.HasSubstructMatch(carboxylic_acid_pat):
                carboxylic_acids.append(Chem.MolFromSmiles(smiles))
            if mol.HasSubstructMatch(alkyne_pat):
                alkynes.append(Chem.MolFromSmiles(smiles))
    
    return azides, carboxylic_acids, alkynes

In [46]:
azides, carboxylic_acids, alkynes = classify_molecules(suppl)

print("Azides:", azides)
print("Carboxylic Acids:", carboxylic_acids)
print("Alkynes:", alkynes)

Azides: [<rdkit.Chem.rdchem.Mol object at 0x1321aa120>, <rdkit.Chem.rdchem.Mol object at 0x1321aa270>, <rdkit.Chem.rdchem.Mol object at 0x1321aa040>]
Carboxylic Acids: [<rdkit.Chem.rdchem.Mol object at 0x1300cc900>, <rdkit.Chem.rdchem.Mol object at 0x1300cc820>, <rdkit.Chem.rdchem.Mol object at 0x1321aa190>, <rdkit.Chem.rdchem.Mol object at 0x1321aa200>]
Alkynes: [<rdkit.Chem.rdchem.Mol object at 0x1300ccac0>, <rdkit.Chem.rdchem.Mol object at 0x1300cca50>]


In [47]:
#DNA_tag = new_df['DNA-Tag']
#SMILES = new_df['SMILES']

azides_dict = {}
alkynes_dict ={}
Carboxylic_dict = {}

for key, value in zip(Azide_Tags, azides):
    azides_dict[key] = value
    
for key, value in zip(Alkyne_Tags, alkynes):
    alkynes_dict[key] = value
    
for key, value in zip(Carboxy_Tags, carboxylic_acids):
    Carboxylic_dict[key] = value
    
print("azides", azides_dict)
print("Alkynes", alkynes_dict)
print("Carbox", Carboxylic_dict)

azides {'GTTGGGGGTTG': <rdkit.Chem.rdchem.Mol object at 0x1321aa120>, 'CATACGCATAC': <rdkit.Chem.rdchem.Mol object at 0x1321aa270>, 'ATTAGGGATTA': <rdkit.Chem.rdchem.Mol object at 0x1321aa040>}
Alkynes {'GTGCCCCCGTG': <rdkit.Chem.rdchem.Mol object at 0x1300ccac0>, 'GACCTGTCCAG': <rdkit.Chem.rdchem.Mol object at 0x1300cca50>}
Carbox {'TTATGAGTATT': <rdkit.Chem.rdchem.Mol object at 0x1300cc900>}


In [None]:
# Define the SN2 reaction SMARTS pattern
reaction_smarts = '[C:1]-Br.[N:2]>>[C:1]-[N:2]'

# Create the reaction object from SMARTS
reaction = AllChem.ReactionFromSmarts(reaction_smarts)

# Perform the reaction
reactants = [electrophile, nucleophile]
product_sets = reaction.RunReactants(reactants)

In [48]:
first_rxn_prods = [] 

for azide in azides_dict.values():
    for alkyne in alkynes_dict.values():
        reaction = CuAAC(alkyne, azide)
        product = reaction.get_product()
        first_rxn_prods.append(product)

In [49]:
first_rxn_tags = []

for i in azides_dict.keys():
    for j in alkynes_dict.keys():
        first_rxn_tags.append(i + j)


first_rxn_tags

['GTTGGGGGTTGGTGCCCCCGTG',
 'GTTGGGGGTTGGACCTGTCCAG',
 'CATACGCATACGTGCCCCCGTG',
 'CATACGCATACGACCTGTCCAG',
 'ATTAGGGATTAGTGCCCCCGTG',
 'ATTAGGGATTAGACCTGTCCAG']

In [50]:
first_rxn_products = {}

for key, value in zip(first_rxn_tags, first_rxn_prods):
    first_rxn_products[key] = value
    
first_rxn_products

{'GTTGGGGGTTGGTGCCCCCGTG': <rdkit.Chem.rdchem.Mol at 0x1300cc970>,
 'GTTGGGGGTTGGACCTGTCCAG': <rdkit.Chem.rdchem.Mol at 0x1321aa350>,
 'CATACGCATACGTGCCCCCGTG': <rdkit.Chem.rdchem.Mol at 0x1321aa3c0>,
 'CATACGCATACGACCTGTCCAG': <rdkit.Chem.rdchem.Mol at 0x1321aaba0>,
 'ATTAGGGATTAGTGCCCCCGTG': <rdkit.Chem.rdchem.Mol at 0x1321aaa50>,
 'ATTAGGGATTAGACCTGTCCAG': <rdkit.Chem.rdchem.Mol at 0x1321aac10>}

In [22]:
comps = list(first_rxn_products.values())

df = pd.DataFrame(comps)