# Motifs


In [3]:
from Bio import motifs
from Bio.Seq import Seq

instances = [Seq("CAGTT"),Seq("CATTT"),Seq("ATTTA"),Seq("CAGTA"),Seq("CAGTT"),Seq("CAGTA")]




# Create motifs

In [4]:
motif = motifs.create(instances)

In [5]:
print(motif.degenerate_consensus)

CAKTW


In [7]:
print(motif.counts)

        0      1      2      3      4
A:   1.00   5.00   0.00   0.00   3.00
C:   5.00   0.00   0.00   0.00   0.00
G:   0.00   0.00   4.00   0.00   0.00
T:   0.00   1.00   2.00   6.00   3.00



# Use sites file from JASPAR

In [8]:
## address of file: http://jaspar.genereg.net/cgi-bin/jaspar_db.pl?ID=MA0447.1&rm=present&collection=CORE

In [10]:
motif = motifs.read(open('MA0447.1.sites'), "sites")

In [11]:
print(motif.counts)

        0      1      2      3      4      5      6      7      8      9
A:   9.00   0.00   1.00  29.00   0.00   5.00   0.00  30.00  33.00   0.00
C:   4.00   1.00   0.00   0.00  29.00   0.00   3.00   2.00   1.00   8.00
G:  18.00   1.00   2.00   4.00   0.00  30.00   1.00   1.00   0.00   4.00
T:   4.00  33.00  32.00   2.00   6.00   0.00  31.00   2.00   1.00  23.00



# Gibbs sampling 

In [15]:
from Bio.Seq import Seq
import random
seqs=[Seq('GTCGATCGATCGTACGTACGTACGTACGATGCTAGCTACGTACC'),
Seq('GGTTCGAGTCGAGCAAGAGCTAGCTAGCGACGTACTAC'),
Seq('GCTGATCATGCTAGCGCGTAGCTACGATCGTACGTACGATGAGCTAGCTACGTCTACGTACGTGCACA')]


In [21]:
K = 7
# we go 7-mers
random.seed(1)
j = random.randint(0,20)
j

4

In [22]:
instances = []
for seq in seqs:
    instances.append(seq[j:j+K])

* from each sequence we create a 7mer 

In [23]:
print(instances)

[Seq('ATCGATC'), Seq('CGAGTCG'), Seq('ATCATGC')]


* we will create a motif from the 3 patterns now.

In [24]:
motif = motifs.create(instances)

In [25]:
print(motif.degenerate_consensus)

MKMRWBS


In [27]:
print(motif.counts)

        0      1      2      3      4      5      6
A:   2.00   0.00   1.00   1.00   1.00   0.00   0.00
C:   1.00   0.00   2.00   0.00   0.00   1.00   2.00
G:   0.00   1.00   0.00   2.00   0.00   1.00   1.00
T:   0.00   2.00   0.00   0.00   2.00   1.00   0.00



* Also we can create the PSSM weight matrix

In [31]:
weight_matrix = motif.pssm
print(weight_matrix)

        0      1      2      3      4      5      6
A:   1.42   -inf   0.42   0.42   0.42   -inf   -inf
C:   0.42   -inf   1.42   -inf   -inf   0.42   1.42
G:   -inf   0.42   -inf   1.42   -inf   0.42   0.42
T:   -inf   1.42   -inf   -inf   1.42   0.42   -inf

