In [7]:
import pandas as pd
import rdkit as rk
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.rdMolDescriptors import CalcNumHeteroatoms
import random #library for the generation of DNA Tags
from openpyxl import Workbook

In [8]:
# Function to generate a random DNA sequence of length 5
def generate_random_sequences(num_sequences=10, length=5):
    sequences = [''.join(random.choices("ACGT", k=length)) for _ in range(num_sequences)]
    return sequences

# Generate and print 10 random sequences of length 5
random_sequences = generate_random_sequences(10, 5)
for seq in random_sequences:
    print(seq)

ATGCC
CAGAA
CTCTA
CCTGA
CATCC
TAGCG
GCCAA
TGTGT
ATAAT
CCTTA


In [None]:
#Read building blocks using a Supplier
supp = Chem.SDMolSupplier('Click_BB.sdf')
for mol in supp:
    if mol is not None: mol.GetNumAtoms()

In [None]:
#Create a list of molecules
mols = [x for x in supp]
len(mols) #Number of building blocks

In [None]:
# Match a substructure with a SMARTS query 
#SMARTS 5-membered heterocycles
patt1= Chem.MolFromSmarts('[$([NX3;H2;!$(NC=O)]),$([#16X2H]),$([OX2H])]- [cr5;$([cr5]:1:[nr5,or5,sr5]:[cr5]:[cr5]:[nr5,or5,sr5]:1),$([cr5]:1:[cr5]:[nr5,or5,sr5]:[cr5]:[cr5]:1)]')
het5 = [x for x in mols if x.HasSubstructMatch(patt1)]

In [None]:
#SMARTS Terminal alkyne 3-bromo or chloro substituted
patt2= Chem.MolFromSmarts('[Br,Cl][#6]C#[CH1]')
alkynes = [x for x in mols if x.HasSubstructMatch(patt2)]

In [None]:
#SMARTS Azide
patt3= Chem.MolFromSmarts('[N;H0;$(N-[#6]);D2]=[N;D2]=[N;D1]')
azide = [x for x in mols if x.HasSubstructMatch(patt3)]

In [None]:
#Nucleophilic Substitutuion
rxn=AllChem.ReactionFromSmarts('[#6;a;r5:1]- [$([NX3;H2;!$(NC=O)]),$([#16X2H]),$([OX2H]):2].[#35,#17]-[#6:3][C:4]#[C:5]>>[#6;a;r5:1]- [$([NX3;H]),$([#16X2]),$([OX2]):2]-[#6:3][C:4]#[C:5]')

In [None]:
prods1 = AllChem.EnumerateLibraryFromReaction(rxn,[het5,alkynes])
smis = list(set([Chem.MolToSmiles(x[0],isomericSmiles=True) for x in prod]))

In [None]:
#Click reaction
rxn2= AllChem.ReactionFromSmarts('[#6:7][C:6]#[CH1:5].[#6:4]-[#7:3]=[N+:2]=[#7-:1]>>[#6:4]-[#7:3]-1- [#6:5]=[#6:6](-[#6:7])-[#7:1]=[#7:2]-1')
prods2 = AllChem.EnumerateLibraryFromReaction(rxn2,[[ Chem.MolFromSmiles(x) for x in smis ],azide]) >>> smis2 = list(set([Chem.MolToSmiles(x[0],isomericSmiles=True) for x in prods2]))
len(smis2)

In [None]:
#Export results as .CSV File
>>> df = pd.DataFrame(smis2, columns=["colummn"])
df.to_csv('bis_heterocycles.csv', index=False)