# Exploring sequence of Tn10 

In [1]:
import wgregseq
%load_ext autoreload
%autoreload 2

import pandas as pd

First we read the FASTA file obtained from Genebank.

In [2]:
with open ("tn10.fasta", "r") as file:
    data = file.read().split('\n')[1:]
    sequence = "".join(data)

Organization of tetR/tetA regulation:
- two operators that can be bound independently by TetR
- tetA is repressed by both tetO1 and tetO2
- tetR is repressed only by tetO1
- Affinity of tetO2 to TetR is about twice as high as tetO1

![](tn10_tet.png)

From Genebank, we can find the positions for *tetA* and *tetR*. The repressor gene is reversed, so we will have to obtain the complementary sequence in case we are interested in the actual sequences.

In [3]:
# Exact positions from Genebank
tetR_pos = [4702, 5328]
tetA_pos = [5407, 6612]

tetA = sequence[tetA_pos[0]-1:tetA_pos[1]]
tetR = wgregseq.complement_seq(sequence[tetA_pos[0]-1:tetA_pos[1]])

In [4]:
# Positions taken from Bertram 2008
tetO2_pos = [tetA_pos[0] - 28, tetA_pos[0] - 10]
tetO1_pos = [tetA_pos[0] - 58, tetA_pos[0] - 40]

tetO2 = sequence[tetO2_pos[0]:tetO2_pos[1]+1]
tetO1 = sequence[tetO1_pos[0]:tetO1_pos[1]+1]

In [5]:
tetO2

'TCCCTATCAGTGATAGAGA'

In [6]:
tetO1

'ACTCTATCATTGATAGAGT'

In [7]:
rev_tetO1 = wgregseq.complement_seq(tetO1, rev=True)
rev_tetO1

'ACTCTATCAATGATAGAGT'

In [8]:
rev_tetO2 = wgregseq.complement_seq(tetO2, rev=True)
rev_tetO2

'TCTCTATCACTGATAGGGA'

In [9]:
# TSS estimated from -10 region
tetA_TSS = tetO2_pos[0]+1
tetR_TSS1 = tetO2_pos[0] - 19
tetR_TSS2 = tetO1_pos[0] - 8

In [10]:
P_tetA = sequence[tetA_TSS-36:tetA_TSS+1]
P_tetA

'TTGACACTCTATCATTGATAGAGTTATTTTACCACTC'

In [11]:
P_tetR1 = wgregseq.complement_seq(sequence[tetO1_pos[1]-6:tetA_pos[0] - 8], rev=True)
P_tetR1

'TTCTCTATCACTGATAGGGAGTGGTAAAATAACTCTAT'

In [12]:
P_tetR2 = wgregseq.complement_seq(sequence[tetO1_pos[0]-8:tetO2_pos[0]-1], rev=True)
P_tetR2

'TGGTAAAATAACTCTATCAATGATAGAGTGTCAACAA'

In [13]:
len(P_tetA)

37

In [14]:
lavUV5 = 'TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGG'

## Constructs

All constructs which are include a tet operator need to be integrated into a cell which expresses the tet repressor. However, the inserts which only have the promoter should be observed in the absence of the repressor to identify the binding energy matrix for the -10/-35 regions.

### RegSeq tetA

This construct should be straight forward. One promoter, two operator binding sites. 

In [15]:
regseq_tetA = sequence[tetA_TSS-115:tetA_TSS+45]
regseq_tetA

'GACCTCATTAAGCAGCTCTAATGCGCTGTTAATCACTTTACTTTTATCTAATCTAGACATCATTAATTCCTAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTCCCTATCAGTGATAGAGAAAAGTGAAATGAATAGTTCGACAAAGA'

### RegSeq tetR

In [16]:
regseq_tetR1 = wgregseq.complement_seq(sequence[tetR_TSS1-45:tetR_TSS1+115])
regseq_tetR1

'TAGATCTGTAGTAATTAAGGATTAAAAACAACTGTGAGATAGTAACTATCTCAATAAAATGGTGAGGGATAGTCACTATCTCTTTTCACTTTACTTATCAAGCTGTTTCTAGCGTAACCATTAATGCAATGAGCTACGGTACCCCTAACCGGAATAGTAC'

In [17]:
regseq_tetR2 = wgregseq.complement_seq(sequence[tetR_TSS2-45:tetR_TSS2+115])
regseq_tetR2

'TAGTGAAATGAAAATAGATTAGATCTGTAGTAATTAAGGATTAAAAACAACTGTGAGATAGTAACTATCTCAATAAAATGGTGAGGGATAGTCACTATCTCTTTTCACTTTACTTATCAAGCTGTTTCTAGCGTAACCATTAATGCAATGAGCTACGGTA'

### LacUV5 + individual operators downstream

In [18]:
lacUV5_tetO1 = lavUV5 + tetO1
lacUV5_tetO1

'TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGACTCTATCATTGATAGAGT'

In [19]:
mutants = wgregseq.mutations_det(lacUV5_tetO1, mut_per_seq=1, site_start=-20)
tet_df1 = pd.DataFrame({"seq":mutants})
tet_df1["description"] = "lacUV5_tetO1 single mutant"
tet_df1.head()

Unnamed: 0,seq,description
0,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGACTCT...,lacUV5_tetO1 single mutant
1,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGaACTCT...,lacUV5_tetO1 single mutant
2,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGcCTCT...,lacUV5_tetO1 single mutant
3,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGAaTCT...,lacUV5_tetO1 single mutant
4,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGACaCT...,lacUV5_tetO1 single mutant


In [20]:
lacUV5_tetO2 = lavUV5 + tetO2
lacUV5_tetO2

'TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGTCCCTATCAGTGATAGAGA'

In [21]:
mutants = wgregseq.mutations_det(lacUV5_tetO2, mut_per_seq=1, site_start=-20)
tet_df2 = pd.DataFrame({"seq":mutants})
tet_df2["description"] = "lacUV5_tetO2 single mutant"
tet_df = pd.concat([tet_df1, tet_df2]).reset_index()
tet_df

Unnamed: 0,index,seq,description
0,0,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGACTCT...,lacUV5_tetO1 single mutant
1,1,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGaACTCT...,lacUV5_tetO1 single mutant
2,2,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGcCTCT...,lacUV5_tetO1 single mutant
3,3,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGAaTCT...,lacUV5_tetO1 single mutant
4,4,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGACaCT...,lacUV5_tetO1 single mutant
...,...,...,...
111,53,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGTCCCT...,lacUV5_tetO2 single mutant
112,54,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGTCCCT...,lacUV5_tetO2 single mutant
113,55,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGTCCCT...,lacUV5_tetO2 single mutant
114,56,TCGAGTTTACACTTTATGCTTCCGGCTCGTATAATGTGTGGTCCCT...,lacUV5_tetO2 single mutant


## -10/-35 without operators

This one we could right away without quantifying how many repressors are around.

In [22]:
PR1 = wgregseq.complement_seq(sequence[tetR_TSS1:tetR_TSS1+45], rev=True)
PR1

'TCACTTTTCTCTATCACTGATAGGGAGTGGTAAAATAACTCTATC'

In [23]:
PR2 = wgregseq.complement_seq(sequence[tetR_TSS2:tetR_TSS2+45], rev=True)
PR2

'ATAGGGAGTGGTAAAATAACTCTATCAATGATAGAGTGTCAACAA'

In [24]:
PA = sequence[tetA_TSS-45:tetA_TSS+1]
PA

'TAATTTTTGTTGACACTCTATCATTGATAGAGTTATTTTACCACTC'

In [40]:
wgregseq.mutations_rand(PR2, 1500, 0.1)


array(['ATAGGGAGTGGTAAAATAACTCTATCAATGATAGAGTGTCAACAA',
       'ATAcGGAGTcGTAAAAcAACTCTATCAAaGATAGAGTGTCAACAA',
       'ATAGGGAGTGGTAAAATAACTCTcTCAATGATAGAGTGTCAACAA', ...,
       'ATAGGGAGTGGTgAAATAACTCTATCAATGATAGAGTGTCAACAA',
       'ATAGGGAGTGaTAAAATAcCTaTATCAATGAaAGAGTGTCAACAA',
       'ATAGGtAGTcGTAAAATAgCcCTATtAAgGcTcGgGTGTCAACAA'], dtype='<U45')

In [34]:
import numpy as np
np.empty(10, dtype=object)#[np.array([1,2])])

array([None, None, None, None, None, None, None, None, None, None],
      dtype=object)