In [1]:
import pandas as pd
import numpy as np
from itertools import product, permutations, combinations
import pickle

In [2]:
dct_tr_pos = {
    0 : 4,
    1 : 6,
    2 : 11,
    3 : 12,
    4 : 14,
    5 : 15,
    6 : 16,
    7 : 17,
    8 : 22,
    9 : 23,
    10 : 26,
    11 : 27,
    12 : 28,
    13 : 29,
    14 : 30,
    15 : 34,
    16 : 37,
    17 : 40,
    18 : 42,
    19 : 43,
    20 : 44,
    21 : 45,
    22 : 46,
    23 : 47
}

In [3]:
degenerate_symbols = {
    'A' : ['A'],
    'C' : ['C'],
    'G' : ['G'],
    'T' : ['T'],
#    'U' : ['U'],
    'W' : ['A', 'T'],
    'S' : ['C', 'G'],
    'M' : ['A', 'C'],
    'K' : ['G', 'T'],
    'R' : ['A', 'G'],
    'Y' : ['C', 'T'],
    'B' : ['C', 'G', 'T'],
    'D' : ['A', 'G', 'T'],
    'H' : ['A', 'C', 'T'],
    'V' : ['A', 'C', 'G'],
    'N' : ['A', 'C', 'G', 'T'],
    'Z' : [],
}

In [4]:
position_base_options = {
    0: ['G', 'A'],
    1: ['G', 'A'],
    2: ['G', 'A'],
    3: ['C', 'A'],
    4: ['G', 'A'],
    5: ['C', 'T'],
    6: ['G', 'A'],
    7: ['C', 'T'],
    8: ['G', 'T'],
    9: ['C', 'A', 'T'],
    10: ['G', 'C', 'A', 'T'],
    11: ['G', 'T'],
    12: ['A', 'T'],
    13: ['A', 'T'],
    14: ['G', 'C', 'A', 'T'],
    15: ['G', 'A', 'T'],
    16: ['C', 'A', 'T'],
    17: ['G', 'A'],
    18: ['C', 'T'],
    19: ['G', 'A'],
    20: ['C', 'T'],
    21: ['A', 'T'],
    22: ['G', 'T'],
    23: ['C', 'T']
}

In [5]:
lib_design = [
    'GGAGGRGATGRATRYRYCCGGKHCCRKWWYGGCDTGHGGRGYRYATY',
    'GGAAGRGATGRATRYRYCCGGKHCCRKWWYGGCDTGHGGRGYRYTTY',
    'GGAGGRGATGRCTRYRYCCGGKHCCRKWWYGGCDTGHGGRGYRYAGY',
    'GGAGGRGATGRATRYRYCCGGKHCCYKWWRGGCDTGHGGRGYRYATY',
    'GGAAGRGATGRCTRYRYCCGGKHCCRKWWYGGCDTGHGGRGYRYTGY',
    'GGAAGRGATGRATRYRYCCGGKHCCYKWWRGGCDTGHGGRGYRYTTY',
    'GGAGGRGATGRCTRYRYCCGGKHCCYKWWRGGCDTGHGGRGYRYAGY',
    'GGAAGRGATGRCTRYRYCCGGKHCCYKWWRGGCDTGHGGRGYRYTGY'
]

strc_positions_0 = [3,5,10,11,13,14,15,16,21,22,25,26,27,28,29,33,36,39,41,42,43,44,45,46]
aurora2 = 'AGACATGTTTTGTAAATATGTTGT'

In [6]:
def unDegenerate(S):
    px = []
    for x in S:
        px.append(degenerate_symbols[x])
    return [''.join(x) for x in product(*px)]    

---
### Assemble the mutations list

In [7]:
# Put the lib design into a matrix format
lib_design = [''.join(s[x] for x in strc_positions_0) for s in lib_design]
lib_design = [list(x) for x in lib_design]
df_lib_design = pd.DataFrame(lib_design)
df_lib_design

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,G,R,R,A,R,Y,R,Y,K,H,...,Y,D,H,R,Y,R,Y,A,T,Y
1,A,R,R,A,R,Y,R,Y,K,H,...,Y,D,H,R,Y,R,Y,T,T,Y
2,G,R,R,C,R,Y,R,Y,K,H,...,Y,D,H,R,Y,R,Y,A,G,Y
3,G,R,R,A,R,Y,R,Y,K,H,...,R,D,H,R,Y,R,Y,A,T,Y
4,A,R,R,C,R,Y,R,Y,K,H,...,Y,D,H,R,Y,R,Y,T,G,Y
5,A,R,R,A,R,Y,R,Y,K,H,...,R,D,H,R,Y,R,Y,T,T,Y
6,G,R,R,C,R,Y,R,Y,K,H,...,R,D,H,R,Y,R,Y,A,G,Y
7,A,R,R,C,R,Y,R,Y,K,H,...,R,D,H,R,Y,R,Y,T,G,Y


In [8]:
# Define position groups
position_groups = [(0, 21), (1,), (2, 23), (3, 22), (4, 20), (5, 19), (6, 18), (7, 17), (8,), (9,),  (10, 14), (11,), (12,), (13,), (15,), (16,)]

---
### Permutations

In [15]:
# Prepare a list of all possible mutations which can occur in groups defined above
mutations_permutations = []
for position_group in position_groups:
    base_variants = df_lib_design[list(position_group)].agg(''.join, axis=1).unique().tolist()
    base_variants_undegenerated = []
    for  base_variant in base_variants:
        base_variants_undegenerated.extend(unDegenerate(base_variant))
    #mutations_combinations[position_group] = base_variants_undegenerated
    for x in list(permutations(base_variants_undegenerated, r=2)): # <<<--- Here is where the permutations are made
        mutations_permutations.append([position_group, x])

In [17]:
mutations_permuatations_names = []
for m in mutations_permutations:
    mutations_permuatations_names.append('-'.join([str(dct_tr_pos[x]) for x in m[0]]) + '|' + '>'.join([x for x in m[1]]))

In [19]:
with open('mutations_permutations.pkl', mode='wb')as f:
    pickle.dump([mutations_permutations, mutations_permuatations_names], f)

---
### Permutations -> AU2 Filter

In [9]:
# Prepare a list of all possible mutations which can occur in groups defined above
mutations_permutations = []
for position_group in position_groups:
    base_variants = df_lib_design[list(position_group)].agg(''.join, axis=1).unique().tolist()
    base_variants_undegenerated = []
    for  base_variant in base_variants:
        base_variants_undegenerated.extend(unDegenerate(base_variant))
    #mutations_combinations[position_group] = base_variants_undegenerated
    for x in list(permutations(base_variants_undegenerated, r=2)): # <<<--- Here is where the permutations are made
        mutations_permutations.append([position_group, x])

In [10]:
# Filter so that only those mutations relevant to Aurora2 are left
mutations_in_aurora = [m for m in mutations_permutations if ''.join([aurora2[x] for x in m[0]]) == m[1][0]]

In [11]:
mutations_in_aurora_names = []
for m in mutations_in_aurora:
    mutations_in_aurora_names.append('-'.join([str(dct_tr_pos[x]) for x in m[0]]) + '|' + '>'.join([x for x in m[1]]))

In [12]:
with open('mutations_permutations_aurora2.pkl', mode='wb')as f:
    pickle.dump([mutations_in_aurora, mutations_in_aurora_names], f)

---
### Combinations

In [13]:
# Prepare a list of all possible mutations which can occur in groups defined above
mutations_combinations = []
for position_group in position_groups:
    base_variants = df_lib_design[list(position_group)].agg(''.join, axis=1).unique().tolist()
    base_variants_undegenerated = []
    for  base_variant in base_variants:
        base_variants_undegenerated.extend(unDegenerate(base_variant))
    #mutations_combinations[position_group] = base_variants_undegenerated
    for x in list(combinations(base_variants_undegenerated, r=2)): # <<<--- Here is where the combinations are made
        mutations_combinations.append([position_group, x])

In [14]:
mutations_combinations_names = []
for m in mutations_combinations:
    mutations_combinations_names.append('-'.join([str(dct_tr_pos[x]) for x in m[0]]) + '|' + '>'.join([x for x in m[1]]))

In [15]:
with open('mutations_combinations.pkl', mode='wb')as f:
    pickle.dump([mutations_combinations, mutations_combinations_names], f)