In [1]:
import stk
import rdkit, rdkit.Chem as rdkit
import itertools

In [2]:
def enumerate_skeleton(skeleton, substituents):
    
    """
    Get all possible unique structures formed from the skeleton &
    each of the substituents, retaining two Br functional groups
    to be used later when forming supramolecular structure.
    
    Parameters
    ----------
    
    skeleton : `str` SMILES string describing skeleton onto which 
        substituents will be places. See README.md for how
        skeleton SMILES should be specified.
                
    Returns
    -------
    
    canonical_smiles : `list` All unique SMILES obtained
         by placing maximum of two substituents on a molecular 
         skeleton.
    
    """

    smiles = []        
    template = skeleton.replace('(Br)', '{}')
    vacant_sites = template.count('{}')

    perms = get_substituent_permutations(substituents, vacant_sites)

    smiles = [template.format(*perm) for perm in perms]    
    canonical_smiles = [rdkit.MolToSmiles(rdkit.MolFromSmiles(smi), canonical=True) for smi in smiles]
    canonical_smiles = remove_duplicates(canonical_smiles)

    print('Skeleton:', skeleton)
    print('Number of vacant sites:', vacant_sites)
    print('Numer of unique substituent permutations:', len(canonical_smiles), '\n')

    return canonical_smiles


def get_substituent_permutations(substituents, vacant_sites):
    
    """
    Finds all combinations of user-specified substituents. A maximum
    of two substituents may be selected at once.
    
    Parameters
    ----------
    
    substituents : `list` user-specified SMILES strings representing substituents 
        to be combined with molecular skeleton.
        
    vacant_sites : `int` numer of sites that can be substituted onto for a given
        molecular skeleton.
                    
    Returns
    -------
    
    permutations : `list` All permutations of two substituents plus
        two bromine functional groups to be used to build supramolecules
                    
    """

    if vacant_sites >= 4:                                                   
        sub_combinations = list(itertools.combinations(substituents, 2))
        for i in substituents:
            sub_combinations.append([i, i])                                        # pairs substit with itself

    elif vacant_sites == 3:
        sub_combinations = list(itertools.combinations(substituents, 1))

    else:
        sub_combinations = list(itertools.combinations(substituents, 0))

    sub_combinations = [(list(i) + ['(Br)', '(Br)']) for i in sub_combinations]    # adds oblig. bromines

    permutations = []
    for combination in sub_combinations:
        for permuation in list(itertools.permutations(combination+['']*(vacant_sites - len(combination)), vacant_sites)):
            permutations.append(list(permuation))

    permutations = remove_duplicates(permutations)
    
    return permutations


def get_embedded_structures(permutations):
    
    """
    Embeds structures and writes them in *.mol format 
    """

    pass
    

def remove_duplicates(x):    
    '''simple tool for removing duplicates in lists'''
    x_unique = []
    for item in x:
        if item not in x_unique:
            x_unique.append(item)
    return x_unique

In [3]:
with open('skeletons-list.txt') as f:
    skeletons = [line.split()[0] for line in f]
substituents = ['', '(N(C)C)', '(N)', '(OC)', '(O)', '(S)']#, '(C)', '(F)', '(Cl)']#, '(CC)', '(C=O)', '(C(=O)OC)', '(C(F)(F)(F))', '(C#N)', '(N(=O)(=O))']#, '(C(=O)O))']    
    
for skeleton in skeletons:
    enumerate_skeleton(skeleton, substituents)

Skeleton: c1(Br)c(Br)c(Br)c(Br)c(Br)c1(Br)
Number of vacant sites: 6
Numer of unique substituent permutations: 248 

Skeleton: c1(Br)nc(Br)c(Br)c(Br)c1(Br)
Number of vacant sites: 5
Numer of unique substituent permutations: 466 

Skeleton: c1(Br)nc(Br)nc(Br)c1(Br)
Number of vacant sites: 4
Numer of unique substituent permutations: 129 

Skeleton: c1(Br)nc(Br)c(Br)nc1(Br)
Number of vacant sites: 4
Numer of unique substituent permutations: 63 

Skeleton: c1(Br)nc(Br)nc(Br)n1
Number of vacant sites: 3
Numer of unique substituent permutations: 6 

Skeleton: c1(Br)c(Br)c(Br)[s]c1(Br)
Number of vacant sites: 4
Numer of unique substituent permutations: 114 

Skeleton: n1c(Br)c(Br)[s]c1(Br)
Number of vacant sites: 3
Numer of unique substituent permutations: 18 

Skeleton: c1(Br)c(Br)n[s]c1(Br)
Number of vacant sites: 3
Numer of unique substituent permutations: 18 

Skeleton: n1nc(Br)[s]c1(Br)
Number of vacant sites: 2
Numer of unique substituent permutations: 1 

Skeleton: c1(Br)c(Br)c(Br)[o]c

#### Notes

* problems with Si and Carboxylic Acid substituents : cannot canonicalize