In [1]:
from pkg_resources import resource_filename

In [39]:
def describe_motif_set(motif_set):
    print(f"# motifs: {len(motif_set)}")
    print(f"Alphabet: {motif_set.alphabet}")
    print(f"Version: {motif_set.version}")
    print(f"Strands: {motif_set.strands}")
    print(f"Background: {motif_set.background}")
    print(f"Background source: {motif_set.background_source}")

In [3]:
def print_all_motifs(motif_set):
    for motif in motif_set:
        print(f"Identifer: {motif.identifier}")
        print(f"Name: {motif.name}")
        print(f"Consensus: {motif.consensus}")
        print(f"PFM: {motif.pfm} with shape {motif.pfm.shape}")
        print()

# Read/write MEME

In [4]:
from motifdata import read_meme, write_meme

In [5]:
meme_file = resource_filename('motifdata', 'resources/sample.meme')

In [6]:
motif_set = read_meme(meme_file)
describe_motif_set(motif_set)

Alphabet: ACGT
Version: 5
Strands: + -
Background: {'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25}
Background source: None


In [7]:
print_all_motifs(motif_set)

Identifer: TATA
Name: TATA
Consensus: CCCCTATAAATACCCC
PFM: [[0.1275 0.3765 0.1195 0.3765]
 [0.1575 0.3985 0.199  0.2455]
 [0.249  0.303  0.197  0.251 ]
 [0.1235 0.655  0.0755 0.1455]
 [0.01   0.002  0.002  0.986 ]
 [0.968  0.     0.     0.032 ]
 [0.002  0.014  0.006  0.978 ]
 [0.992  0.     0.002  0.006 ]
 [0.653  0.012  0.002  0.333 ]
 [0.974  0.     0.008  0.018 ]
 [0.341  0.028  0.036  0.5955]
 [0.6955 0.0815 0.1195 0.1035]
 [0.1255 0.432  0.3165 0.1255]
 [0.291  0.418  0.175  0.1155]
 [0.263  0.3445 0.1755 0.2175]
 [0.307  0.3085 0.2365 0.1475]] with shape (16, 4)

Identifer: Inr
Name: Inr
Consensus: CTATTCATCA
PFM: [[0.131  0.467  0.14   0.262 ]
 [0.168  0.346  0.093  0.393 ]
 [0.41   0.23   0.075  0.285 ]
 [0.238  0.2895 0.1265 0.3455]
 [0.1355 0.2805 0.125  0.4585]
 [0.059  0.815  0.054  0.0725]
 [0.919  0.029  0.0145 0.038 ]
 [0.2015 0.2815 0.1345 0.382 ]
 [0.277  0.36   0.114  0.249 ]
 [0.417  0.183  0.1    0.3   ]] with shape (10, 4)

Identifer: POL006.1
Name: BREu
Consensus

In [8]:
write_meme(
    motif_set = motif_set,
    filename = resource_filename('motifdata', 'resources/sample_out.meme')
)

Saved pfm in MEME format as: /cellar/users/aklie/projects/ML4GLand/MotifData/motifdata/resources/sample_out.meme


# Read/write HOMER

In [9]:
from motifdata import read_homer, write_homer

In [10]:
homer_file = resource_filename('motifdata', 'resources/sample.motifs')

In [11]:
motif_set = read_homer(homer_file)
describe_motif_set(motif_set)

Alphabet: ACGT
Version: None
Strands: None
Background: {'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25}
Background source: None


In [12]:
print_all_motifs(motif_set)

Identifer: CGTRNAAARTGA
Name: ABF1/SacCer-Promoters/Homer
Consensus: CGTGCAAAATGA
PFM: [[0.001 0.962 0.036 0.001]
 [0.001 0.001 0.997 0.001]
 [0.036 0.001 0.001 0.962]
 [0.326 0.15  0.429 0.095]
 [0.188 0.294 0.28  0.239]
 [0.408 0.181 0.187 0.224]
 [0.42  0.126 0.243 0.212]
 [0.426 0.274 0.187 0.112]
 [0.535 0.001 0.463 0.001]
 [0.037 0.331 0.001 0.631]
 [0.001 0.001 0.905 0.093]
 [0.98  0.018 0.001 0.001]] with shape (12, 4)

Identifer: TCACGTGAYH
Name: Cbf1(bHLH)/Yeast-Cbf1-ChIP-Seq(GSE29506)/Homer
Consensus: TCACGTGACC
PFM: [[0.067 0.105 0.22  0.608]
 [0.004 0.989 0.006 0.001]
 [0.978 0.009 0.007 0.006]
 [0.007 0.931 0.001 0.061]
 [0.032 0.01  0.957 0.001]
 [0.001 0.005 0.005 0.989]
 [0.001 0.003 0.979 0.017]
 [0.79  0.116 0.062 0.032]
 [0.146 0.438 0.033 0.383]
 [0.231 0.34  0.15  0.279]] with shape (10, 4)

Identifer: CTYTCTYTCTCTCTC
Name: GAGA-repeat/SacCer-Promoters/Homer
Consensus: CTCTCTCTCTCTCTC
PFM: [[0.091 0.563 0.083 0.263]
 [0.083 0.075 0.064 0.778]
 [0.075 0.495 0.075 0

In [13]:
write_homer(
    motif_set = motif_set,
    filename = resource_filename('motifdata', 'resources/sample_out.motifs')
)

Saved pfms in .motifs format as: /cellar/users/aklie/projects/ML4GLand/MotifData/motifdata/resources/sample_out.motifs


# Read PFM files

In [14]:
pfm_file = resource_filename('motifdata', 'resources/sample.pfm')

In [15]:
from motifdata import read_motifs

In [16]:
motif_set = read_motifs(
    filename = pfm_file,
    transpose=True
)
describe_motif_set(motif_set)

Alphabet: None
Version: None
Strands: None
Background: None
Background source: None


In [17]:
print_all_motifs(motif_set)

Identifer: >MA0037.3
Name: GATA3
Consensus: AGATAAGA
PFM: [[0.502      0.226      0.001      0.271     ]
 [0.         0.         1.         0.        ]
 [1.         0.         0.         0.        ]
 [0.         0.         0.         1.        ]
 [0.91491491 0.002002   0.         0.08308308]
 [0.862      0.015      0.076      0.047     ]
 [0.114      0.301      0.476      0.109     ]
 [0.423      0.188      0.321      0.068     ]] with shape (8, 4)

Identifer: >MA0098.1
Name: ETS1
Consensus: CTTCCG
PFM: [[0.1   0.4   0.1   0.4  ]
 [0.425 0.    0.    0.575]
 [0.    0.025 0.    0.975]
 [0.    0.975 0.025 0.   ]
 [0.    0.975 0.    0.025]
 [0.125 0.075 0.425 0.375]] with shape (6, 4)



# Load from JASPAR

In [18]:
from motifdata import load_jaspar

In [40]:
motif_set = load_jaspar(collection="CORE", release="JASPAR2022")

In [41]:
motif_set.alphabet = "ACGT"
motif_set.background = {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25}

In [42]:
describe_motif_set(motif_set)

# motifs: 1956
Alphabet: ACGT
Version: None
Strands: None
Background: {'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25}
Background source: None


In [43]:
for motif in motif_set:
    print(f"Identifer: {motif.identifier}")
    print(f"Name: {motif.name}")
    print(f"Consensus: {motif.consensus}")
    print(f"PFM: {motif.pfm} with shape {motif.pfm.shape}")
    print()
    break

Identifer: MA0004.1
Name: Arnt
Consensus: CACGTG
PFM: [[0.2  0.8  0.   0.  ]
 [0.95 0.   0.05 0.  ]
 [0.   1.   0.   0.  ]
 [0.   0.   1.   0.  ]
 [0.   0.   0.   1.  ]
 [0.   0.   1.   0.  ]] with shape (6, 4)



In [44]:
write_meme(
    motif_set = motif_set,
    filename = resource_filename('motifdata', 'resources/jaspar_out.meme')
)

Saved pfm in MEME format as: /cellar/users/aklie/projects/ML4GLand/MotifData/motifdata/resources/jaspar_out.meme


# DONE!

---

# Scratch