### Analysis of Strong -10/-35 Mutants

Here we look at sequences with all possible -10/-35 random motifs which returned a binding energy stronger than WT to see how they differ from the WT consensus motifs.

In [4]:
import numpy as np
import pandas as pd
import holoviews as hv

import bokeh_catplot

import bokeh.io
bokeh.io.output_notebook()

hv.extension('bokeh')

In [5]:
# read in Suzy's csv as a pandas dataset
strong_seqs = pd.read_csv("10_and_35_mutants_stronger_than_wt.csv")
strong_seqs.head()

# name each sequence
counter = np.arange(1, 14340, 1)
seq_names = []

for i in counter:
    seq_names += [str(i)]

In [9]:
# give each sequence a name
strong_seqs['sequence name'] = seq_names
strong_seqs.head()

Unnamed: 0,sequence,energy,sequence name,mutations
0,AATATACTTTATGCTTCCGGCTCGTAAAAT,-3.712116,1,6
1,AATATACTTTATGCTTCCGGCTCGTAAACT,-3.410839,2,6
2,AATATACTTTATGCTTCCGGCTCGTATAAT,-3.902571,3,5
3,AATATACTTTATGCTTCCGGCTCGTATACT,-3.601294,4,5
4,AATATACTTTATGCTTCCGGCTCGTATCAT,-3.298632,5,5


In [7]:
# define a function to calculate how many mutations each sequence is from WT
# initialize list to store number of mutations
number_muts = []

# remember the wt promoter sequence
wt_motifs = "TTTACATATGTT"

for seq in strong_seqs['sequence']:
    seq_fixed = seq[0:6]+seq[24:30]
    muts = 0
    for i, bp in enumerate(seq_fixed):
        
        if bp == wt_motifs[i]:
            muts = muts
        if bp != wt_motifs[i]:
            muts += 1
        
    number_muts += [muts]
    
strong_seqs['mutations'] = number_muts
strong_seqs.head()

Unnamed: 0,sequence,energy,sequence name,mutations
0,AATATACTTTATGCTTCCGGCTCGTAAAAT,-3.712116,1,6
1,AATATACTTTATGCTTCCGGCTCGTAAACT,-3.410839,2,6
2,AATATACTTTATGCTTCCGGCTCGTATAAT,-3.902571,3,5
3,AATATACTTTATGCTTCCGGCTCGTATACT,-3.601294,4,5
4,AATATACTTTATGCTTCCGGCTCGTATCAT,-3.298632,5,5


In [8]:
# plot number of mutations in each sequence
p = bokeh_catplot.ecdf(
    data=strong_seqs['mutations'],
    val='mutations',
)

bokeh.io.show(p)