In [1]:
from pkg_resources import resource_filename

In [14]:
import numpy as np
import motifdata as md

In [20]:
def describe_motif_set(motif_set):
    print(f"# motifs: {len(motif_set)}")
    print(f"Alphabet: {motif_set.alphabet}")
    print(f"Version: {motif_set.version}")
    print(f"Strands: {motif_set.strands}")
    print(f"Background: {motif_set.background}")
    print(f"Background source: {motif_set.background_source}")

In [21]:
def print_all_motifs(motif_set):
    for motif in motif_set:
        print(f"Identifer: {motif.identifier}")
        print(f"Name: {motif.name}")
        print(f"Consensus: {motif.consensus}")
        print(f"PFM: {motif.pfm} with shape {motif.pfm.shape}")
        print()

# Convert MotifSets to BioPython and kernel

In [6]:
meme_file = resource_filename("motifdata", "resources/sample.meme")

In [7]:
motif_set = md.read_meme(meme_file)

In [8]:
motif_set

MotifSet with 6 motifs

In [9]:
md.to_biopython(motif_set)

[<Bio.motifs.jaspar.Motif at 0x15552a838490>,
 <Bio.motifs.jaspar.Motif at 0x15552bc95670>,
 <Bio.motifs.jaspar.Motif at 0x15552a838520>,
 <Bio.motifs.jaspar.Motif at 0x15552a838310>,
 <Bio.motifs.jaspar.Motif at 0x15552a838220>,
 <Bio.motifs.jaspar.Motif at 0x15552bf2a3d0>]

In [13]:
md.to_kernel(motif_set, size=(256, 4, 13))

tensor([[[ 5.1000e-01,  6.3000e-01,  9.9600e-01,  ...,  1.3640e+00,
           2.7820e+00,  5.0200e-01],
         [ 1.5060e+00,  1.5940e+00,  1.2120e+00,  ...,  1.1200e-01,
           3.2600e-01,  1.7280e+00],
         [ 4.7800e-01,  7.9600e-01,  7.8800e-01,  ...,  1.4400e-01,
           4.7800e-01,  1.2660e+00],
         [ 1.5060e+00,  9.8200e-01,  1.0040e+00,  ...,  2.3820e+00,
           4.1400e-01,  5.0200e-01]],

        [[ 5.2400e-01,  6.7200e-01,  1.6400e+00,  ..., -3.6482e-02,
          -1.0093e-02,  4.1355e-02],
         [ 1.8680e+00,  1.3840e+00,  9.2000e-01,  ...,  2.4633e-02,
           2.8663e-02, -1.9624e-02],
         [ 5.6000e-01,  3.7200e-01,  3.0000e-01,  ...,  6.5665e-03,
           2.7774e-02, -3.8307e-02],
         [ 1.0480e+00,  1.5720e+00,  1.1400e+00,  ..., -3.1951e-02,
           2.7186e-02,  3.9633e-02]],

        [[ 2.1818e+00,  5.4546e-01,  1.8182e-01,  ...,  1.7756e-02,
          -3.1422e-02, -3.3219e-02],
         [ 1.2727e+00,  1.4545e+00,  2.5455e+00,  .

# Turn a kernel into a MotifSet

In [15]:
# Create a dummy random position frequency matrix (pfm) where every column sums to 1
pfms = np.random.rand(10, 4, 13)
pfms = pfms / pfms.sum(axis=1, keepdims=True)

In [22]:
motif_set = md.from_kernel(
    pfms
)
describe_motif_set(motif_set)

# motifs: 10
Alphabet: ACGT
Version: 5
Strands: + -
Background: None
Background source: None


In [23]:
print_all_motifs(motif_set)

Identifer: filter_0
Name: filter_0
Consensus: TATTGTGGGGCCT
PFM: [[0.22610029 0.27962686 0.21137976 0.28289309]
 [0.40195002 0.30503644 0.108327   0.18468653]
 [0.40408357 0.05074357 0.06754875 0.47762411]
 [0.23895361 0.29700062 0.10161464 0.36243113]
 [0.21949819 0.27022422 0.28440474 0.22587285]
 [0.34930191 0.16656206 0.12790778 0.35622825]
 [0.31203591 0.19334074 0.33238604 0.1622373 ]
 [0.38090744 0.01062224 0.47884038 0.12962994]
 [0.20905434 0.2090581  0.44762271 0.13426486]
 [0.32923509 0.25417944 0.345152   0.07143346]
 [0.21707847 0.40479263 0.04253592 0.33559298]
 [0.15210358 0.36653929 0.25732524 0.22403189]
 [0.12806442 0.29583317 0.26924801 0.3068544 ]] with shape (13, 4)

Identifer: filter_1
Name: filter_1
Consensus: GCAGGTCACGGAA
PFM: [[0.08021398 0.1915247  0.42100144 0.30725988]
 [0.14771469 0.35009898 0.29466972 0.20751661]
 [0.31765414 0.28341791 0.21929399 0.17963396]
 [0.1834894  0.0572398  0.51603988 0.24323092]
 [0.27828029 0.28374887 0.37807133 0.05989951]
 [0

# DONE!

---

# Scratch