In [1]:
import stk
import rdkit, rdkit.Chem as rdkit
import itertools

In [6]:
def remove_duplicates(x):
    # remove duplicates from object
    
    x_unique = []
    for item in x:
        if item not in x_unique:
            x_unique.append(item)
            
    return x_unique


def combine_substituents(substituents, vacant_sites):
    # get all combinations of substituents + 2 bromine groups (as func of num vacant sites)
    
    if vacant_sites >= 4:                                                   
        sub_combinations = list(itertools.combinations(substituents, 2))
        for i in substituents:
            sub_combinations.append([i, i])                                    # pairs substit with itself
            
    elif vacant_sites == 3:
        sub_combinations = list(itertools.combinations(substituents, 1))
        
    else:
        sub_combinations = list(itertools.combinations(substituents, 0))
    
    sub_combinations = [(list(i) + ['(Br)', '(Br)']) for i in sub_combinations]    # adds oblig. bromines
        
    return sub_combinations


def get_positional_permutations(sub_combinations, vacant_sites):
    # get permutations of substituents from a list of substituents
    
    perms = []
    for combination in sub_combinations:
        for permuation in list(itertools.permutations(combination+['']*(vacant_sites - len(combination)), vacant_sites)):
            perms.append(list(permuation))

    return remove_duplicates(perms)

In [7]:
with open('skeletons-list.txt') as f:
    skeletons = [line.split()[0] for line in f]
    
substituents = ['', '(N(C)C)', '(N)', '(OC)', '(O)', '(S)']#, '(C)', '(Si)', '(F)', '(Cl)', '(CC)', '(C=O)', '(C(=O)OC)', '(C(F)(F)(F))', '(C#N)', '(C(=O)O))', '(N(=O)(=O))']

In [8]:
permcount = 0
for skeleton in skeletons:
        
    template = skeleton.replace('(Br)', '{}')
    vacant_sites = template.count('{}')

    print('number of vacant sites :', vacant_sites)

    sub_combinations = combine_substituents(substituents, vacant_sites)
    perms = get_positional_permutations(sub_combinations, vacant_sites)

    print('number of permutations for skelelton', skeleton, ':', len(perms))
    
    for perm in perms:
        print(template.format(*perm))
        
        permcount += 1
        
print(permcount)

number of vacant sites : 6
number of permutations for skelelton c1(Br)c(Br)c(Br)c(Br)c(Br)c1(Br) : 2565
c1c(N(C)C)c(Br)c(Br)cc1
c1c(N(C)C)c(Br)cc(Br)c1
c1c(N(C)C)c(Br)ccc1(Br)
c1c(N(C)C)cc(Br)c(Br)c1
c1c(N(C)C)cc(Br)cc1(Br)
c1c(N(C)C)ccc(Br)c1(Br)
c1c(Br)c(N(C)C)c(Br)cc1
c1c(Br)c(N(C)C)cc(Br)c1
c1c(Br)c(N(C)C)ccc1(Br)
c1c(Br)c(Br)c(N(C)C)cc1
c1c(Br)c(Br)cc(N(C)C)c1
c1c(Br)c(Br)ccc1(N(C)C)
c1c(Br)cc(N(C)C)c(Br)c1
c1c(Br)cc(N(C)C)cc1(Br)
c1c(Br)cc(Br)c(N(C)C)c1
c1c(Br)cc(Br)cc1(N(C)C)
c1c(Br)ccc(N(C)C)c1(Br)
c1c(Br)ccc(Br)c1(N(C)C)
c1cc(N(C)C)c(Br)c(Br)c1
c1cc(N(C)C)c(Br)cc1(Br)
c1cc(N(C)C)cc(Br)c1(Br)
c1cc(Br)c(N(C)C)c(Br)c1
c1cc(Br)c(N(C)C)cc1(Br)
c1cc(Br)c(Br)c(N(C)C)c1
c1cc(Br)c(Br)cc1(N(C)C)
c1cc(Br)cc(N(C)C)c1(Br)
c1cc(Br)cc(Br)c1(N(C)C)
c1ccc(N(C)C)c(Br)c1(Br)
c1ccc(Br)c(N(C)C)c1(Br)
c1ccc(Br)c(Br)c1(N(C)C)
c1(N(C)C)cc(Br)c(Br)cc1
c1(N(C)C)cc(Br)cc(Br)c1
c1(N(C)C)cc(Br)ccc1(Br)
c1(N(C)C)ccc(Br)c(Br)c1
c1(N(C)C)ccc(Br)cc1(Br)
c1(N(C)C)cccc(Br)c1(Br)
c1(N(C)C)c(Br)cc(Br)cc1
c1(N(C)C

c1(N)c(Br)cc(Br)c(OC)c1
c1(N)c(Br)cc(Br)cc1(OC)
c1(N)c(Br)ccc(OC)c1(Br)
c1(N)c(Br)ccc(Br)c1(OC)
c1(N)cc(OC)c(Br)c(Br)c1
c1(N)cc(OC)c(Br)cc1(Br)
c1(N)cc(OC)cc(Br)c1(Br)
c1(N)cc(Br)c(OC)c(Br)c1
c1(N)cc(Br)c(OC)cc1(Br)
c1(N)cc(Br)c(Br)c(OC)c1
c1(N)cc(Br)c(Br)cc1(OC)
c1(N)cc(Br)cc(OC)c1(Br)
c1(N)cc(Br)cc(Br)c1(OC)
c1(N)ccc(OC)c(Br)c1(Br)
c1(N)ccc(Br)c(OC)c1(Br)
c1(N)ccc(Br)c(Br)c1(OC)
c1(OC)c(N)c(Br)c(Br)cc1
c1(OC)c(N)c(Br)cc(Br)c1
c1(OC)c(N)c(Br)ccc1(Br)
c1(OC)c(N)cc(Br)c(Br)c1
c1(OC)c(N)cc(Br)cc1(Br)
c1(OC)c(N)ccc(Br)c1(Br)
c1(OC)c(Br)c(N)c(Br)cc1
c1(OC)c(Br)c(N)cc(Br)c1
c1(OC)c(Br)c(N)ccc1(Br)
c1(OC)c(Br)c(Br)c(N)cc1
c1(OC)c(Br)c(Br)cc(N)c1
c1(OC)c(Br)c(Br)ccc1(N)
c1(OC)c(Br)cc(N)c(Br)c1
c1(OC)c(Br)cc(N)cc1(Br)
c1(OC)c(Br)cc(Br)c(N)c1
c1(OC)c(Br)cc(Br)cc1(N)
c1(OC)c(Br)ccc(N)c1(Br)
c1(OC)c(Br)ccc(Br)c1(N)
c1(OC)cc(N)c(Br)c(Br)c1
c1(OC)cc(N)c(Br)cc1(Br)
c1(OC)cc(N)cc(Br)c1(Br)
c1(OC)cc(Br)c(N)c(Br)c1
c1(OC)cc(Br)c(N)cc1(Br)
c1(OC)cc(Br)c(Br)c(N)c1
c1(OC)cc(Br)c(Br)cc1(N)
c1(OC)cc(Br)cc(N

c1c(Br)c(N(C)C)c(Br)cc1(N(C)C)
c1c(Br)c(N(C)C)cc(N(C)C)c1(Br)
c1c(Br)c(N(C)C)cc(Br)c1(N(C)C)
c1c(Br)c(Br)c(N(C)C)c(N(C)C)c1
c1c(Br)c(Br)c(N(C)C)cc1(N(C)C)
c1c(Br)c(Br)cc(N(C)C)c1(N(C)C)
c1c(Br)cc(N(C)C)c(N(C)C)c1(Br)
c1c(Br)cc(N(C)C)c(Br)c1(N(C)C)
c1c(Br)cc(Br)c(N(C)C)c1(N(C)C)
c1cc(N(C)C)c(N(C)C)c(Br)c1(Br)
c1cc(N(C)C)c(Br)c(N(C)C)c1(Br)
c1cc(N(C)C)c(Br)c(Br)c1(N(C)C)
c1cc(Br)c(N(C)C)c(N(C)C)c1(Br)
c1cc(Br)c(N(C)C)c(Br)c1(N(C)C)
c1cc(Br)c(Br)c(N(C)C)c1(N(C)C)
c1(N)c(N)c(Br)c(Br)cc1
c1(N)c(N)c(Br)cc(Br)c1
c1(N)c(N)c(Br)ccc1(Br)
c1(N)c(N)cc(Br)c(Br)c1
c1(N)c(N)cc(Br)cc1(Br)
c1(N)c(N)ccc(Br)c1(Br)
c1(N)c(Br)c(N)c(Br)cc1
c1(N)c(Br)c(N)cc(Br)c1
c1(N)c(Br)c(N)ccc1(Br)
c1(N)c(Br)c(Br)c(N)cc1
c1(N)c(Br)c(Br)cc(N)c1
c1(N)c(Br)c(Br)ccc1(N)
c1(N)c(Br)cc(N)c(Br)c1
c1(N)c(Br)cc(N)cc1(Br)
c1(N)c(Br)cc(Br)c(N)c1
c1(N)c(Br)cc(Br)cc1(N)
c1(N)c(Br)ccc(N)c1(Br)
c1(N)c(Br)ccc(Br)c1(N)
c1(N)cc(N)c(Br)c(Br)c1
c1(N)cc(N)c(Br)cc1(Br)
c1(N)cc(N)cc(Br)c1(Br)
c1(N)cc(Br)c(N)c(Br)c1
c1(N)cc(Br)c(N)cc1(Br)
c1(N)c

c1(O)nc(N)c(Br)c(Br)c1
c1(O)nc(N)c(Br)cc1(Br)
c1(O)nc(N)cc(Br)c1(Br)
c1(O)nc(Br)c(N)c(Br)c1
c1(O)nc(Br)c(N)cc1(Br)
c1(O)nc(Br)c(Br)c(N)c1
c1(O)nc(Br)c(Br)cc1(N)
c1(O)nc(Br)cc(N)c1(Br)
c1(O)nc(Br)cc(Br)c1(N)
c1(O)ncc(N)c(Br)c1(Br)
c1(O)ncc(Br)c(N)c1(Br)
c1(O)ncc(Br)c(Br)c1(N)
c1(Br)nc(N)c(O)c(Br)c1
c1(Br)nc(N)c(O)cc1(Br)
c1(Br)nc(N)c(Br)c(O)c1
c1(Br)nc(N)c(Br)cc1(O)
c1(Br)nc(N)cc(O)c1(Br)
c1(Br)nc(N)cc(Br)c1(O)
c1(Br)nc(O)c(N)c(Br)c1
c1(Br)nc(O)c(N)cc1(Br)
c1(Br)nc(O)c(Br)c(N)c1
c1(Br)nc(O)c(Br)cc1(N)
c1(Br)nc(O)cc(N)c1(Br)
c1(Br)nc(O)cc(Br)c1(N)
c1(Br)nc(Br)c(N)c(O)c1
c1(Br)nc(Br)c(N)cc1(O)
c1(Br)nc(Br)c(O)c(N)c1
c1(Br)nc(Br)c(O)cc1(N)
c1(Br)nc(Br)cc(N)c1(O)
c1(Br)nc(Br)cc(O)c1(N)
c1(Br)ncc(N)c(O)c1(Br)
c1(Br)ncc(N)c(Br)c1(O)
c1(Br)ncc(O)c(N)c1(Br)
c1(Br)ncc(O)c(Br)c1(N)
c1(Br)ncc(Br)c(N)c1(O)
c1(Br)ncc(Br)c(O)c1(N)
c1nc(N)c(O)c(Br)c1(Br)
c1nc(N)c(Br)c(O)c1(Br)
c1nc(N)c(Br)c(Br)c1(O)
c1nc(O)c(N)c(Br)c1(Br)
c1nc(O)c(Br)c(N)c1(Br)
c1nc(O)c(Br)c(Br)c1(N)
c1nc(Br)c(N)c(O)c1(Br)
c1nc(Br)c(N

c1c(Br)c(O)[o]c1(Br)
c1c(Br)c(Br)[o]c1(O)
c1(O)cc(Br)[o]c1(Br)
c1(O)c(Br)c[o]c1(Br)
c1(O)c(Br)c(Br)[o]c1
c1(Br)cc(O)[o]c1(Br)
c1(Br)cc(Br)[o]c1(O)
c1(Br)c(O)c[o]c1(Br)
c1(Br)c(O)c(Br)[o]c1
c1(Br)c(Br)c[o]c1(O)
c1(Br)c(Br)c(O)[o]c1
c1c(S)c(Br)[o]c1(Br)
c1c(Br)c(S)[o]c1(Br)
c1c(Br)c(Br)[o]c1(S)
c1(S)cc(Br)[o]c1(Br)
c1(S)c(Br)c[o]c1(Br)
c1(S)c(Br)c(Br)[o]c1
c1(Br)cc(S)[o]c1(Br)
c1(Br)cc(Br)[o]c1(S)
c1(Br)c(S)c[o]c1(Br)
c1(Br)c(S)c(Br)[o]c1
c1(Br)c(Br)c[o]c1(S)
c1(Br)c(Br)c(S)[o]c1
c1(N(C)C)c(N)c(Br)[o]c1(Br)
c1(N(C)C)c(Br)c(N)[o]c1(Br)
c1(N(C)C)c(Br)c(Br)[o]c1(N)
c1(N)c(N(C)C)c(Br)[o]c1(Br)
c1(N)c(Br)c(N(C)C)[o]c1(Br)
c1(N)c(Br)c(Br)[o]c1(N(C)C)
c1(Br)c(N(C)C)c(N)[o]c1(Br)
c1(Br)c(N(C)C)c(Br)[o]c1(N)
c1(Br)c(N)c(N(C)C)[o]c1(Br)
c1(Br)c(N)c(Br)[o]c1(N(C)C)
c1(Br)c(Br)c(N(C)C)[o]c1(N)
c1(Br)c(Br)c(N)[o]c1(N(C)C)
c1(N(C)C)c(OC)c(Br)[o]c1(Br)
c1(N(C)C)c(Br)c(OC)[o]c1(Br)
c1(N(C)C)c(Br)c(Br)[o]c1(OC)
c1(OC)c(N(C)C)c(Br)[o]c1(Br)
c1(OC)c(Br)c(N(C)C)[o]c1(Br)
c1(OC)c(Br)c(Br)[o]c1(N(C)C)
c1(Br)c