In [120]:
import pandas as pd
from ClickReaction import CuAAC
import rdkit as rk
from PIL import Image  # Import PIL for visualization
from rdkit import Chem
from IPython.display import display
from rdkit.Chem import AllChem, Draw
from rdkit.Chem.rdMolDescriptors import CalcNumHeteroatoms
import random #library for the generation of DNA Tags
from openpyxl import Workbook
from rdkit.Chem import SDMolSupplier, SDWriter
from rdkit.Chem import rdChemReactions
from rdkit.Chem.Draw import IPythonConsole  # Ensures images render properly
from ClickReaction.BaseReaction import BaseReaction, Reactant, Reactants
from rdkit.Chem import PandasTools
from rdkit import DataStructs
import numpy as np

In [66]:
# Function to classify compounds based on functional groups
def classify_compound(mol):
    if mol.HasSubstructMatch(Chem.MolFromSmarts("[CX3](=O)[OX2H]")):
        return "Carboxylic Acid"
    elif mol.HasSubstructMatch(Chem.MolFromSmarts("[OX2H]")):
        return "Alcohol"
    elif mol.HasSubstructMatch(Chem.MolFromSmarts("[NX3;H2,H1]")):
        return "Amine"
    elif mol.HasSubstructMatch(Chem.MolFromSmarts("[Cl,Br,I]")):
        return "Halogen"
    elif mol.HasSubstructMatch(Chem.MolFromSmarts("[CX3](=O)[OX2][CX4]")):
        return "Ester"
    else:
        return "Other"

In [53]:
def classify_amine(mol):
    if mol.HasSubstructMatch(Chem.MolFromSmarts("[NX3;H2]")):
        return "Primary Amine"
    elif mol.HasSubstructMatch(Chem.MolFromSmarts("[NX3;H1]")):
        return "Secondary Amine"
    else:
        return "Other"

In [3]:
# Load SDF file
sdf_file = "AsinexDNA-encoded libraries.sdf"  # Change this to your SDF file path
supplier = Chem.SDMolSupplier(sdf_file)

Classification completed. Results saved in 'classified_compounds.csv'


In [81]:
# Create lists to store results
alcohols = []
carboxylic_acids = []
amines = []
halogens = []
esters = []
primary_NH2 = []
secondary_NH = []
others = []

for mol in supplier:
    if mol is not None:
        smiles = Chem.MolToSmiles(mol)
        classification = classify_compound(mol)
        
        if classification == "Halogen":
            halogens.append(Chem.MolFromSmiles(smiles)) 
        elif classification == "Carboxylic Acid":
            carboxylic_acids.append(Chem.MolFromSmiles(smiles))
        elif classification == "Amine":
            amines.append(Chem.MolFromSmiles(smiles))
        elif classification == "Alcohol":
            alcohols.append(Chem.MolFromSmiles(smiles))
        elif classification == "Ester":
            esters.append(Chem.MolFromSmiles(smiles))
        else:
            others.append(Chem.MolFromSmiles(smiles))
            
for amine in amines:
    if amine is not None:
        amine_classification = classify_amine(amine)
        if amine_classification == "Primary Amine":
                primary_NH2.append(amine)
        elif amine_classification == "Secondary Amine":
                secondary_NH.append(amine)
        else:
                others.append(amine)

In [82]:
print("OH:",len(alcohols))
print("R-NH2:", len(primary_NH2))
print("R-NH-R:", len(secondary_NH))
print("COOH:",len(carboxylic_acids))
print("Halogens",len(halogens))
print("Esters",len(esters))

OH: 262
R-NH2: 102
R-NH-R: 158
COOH: 795
Halogens 46
Esters 14


In [76]:
pd.DataFrame(alcohols, columns=["SMILES"]).to_csv("alcohols.csv", index=False)

In [78]:
alcohol_smiles = []

for i in alcohols:
    alcohol_smiles.append(Chem.MolToSmiles(i))

pd.DataFrame(alcohol_smiles, columns=["SMILES"]).to_csv("alcohols.csv", index=False)

In [108]:
def alcohol_classifier(mol):
    if mol.HasSubstructMatch(Chem.MolFromSmarts("[Cl,Br,I]")):
        return "Halogen_alcohol"
    elif mol.HasSubstructMatch(Chem.MolFromSmarts("[NX3;H2,H1]")):
        return "Amine"

In [109]:
OH_halogen = []
OH_NH = []

for alcohol in alcohols:
    classification = alcohol_classifier(alcohol)
    
    if classification == "Halogen_alcohol":
        OH_halogen.append(alcohol)
    elif classification == "amine":
        OH_NH.append(alcohol)
        
        
print("OH_halogen:", len(OH_halogen), "alcohols:", len(alcohols))

OH_halogen: 33 alcohols: 262


In [129]:
def perform_sn2(nucleophile, electrophile):
    rxn = AllChem.ReactionFromSmarts("[NX3;H2:1].[CX4:2][Cl,Br]>>[NX3:1][CX4:2]")
    products = rxn.RunReactants((nucleophile, electrophile))
    return [Chem.MolToSmiles(prod[0]) for prod in products] if products else []

In [132]:
# Perform reactions and store results
products = []
for nu in amines:
    for el in halogens:
        products.extend(perform_sn2(nu, el))

In [133]:
products

[]