In [7]:
from pyscenic.utils import load_motifs
import operator as op
from IPython.display import HTML, display
import pandas as pd
import scanpy as sc
import loompy as lp

BASE_URL = "http://motifcollections.aertslab.org/v9/logos/"
COLUMN_NAME_LOGO = "MotifLogo"
COLUMN_NAME_MOTIF_ID = "MotifID"
# helper functions (not yet integrated into pySCENIC):

COLUMN_NAME_TARGETS = "TargetGenes"

In [2]:
def display_logos(df: pd.DataFrame, top_target_genes: int = 3, base_url: str = BASE_URL):
    """
    :param df:
    :param base_url:
    """
    # Make sure the original dataframe is not altered.
    df = df.copy()
    
    # Add column with URLs to sequence logo.
    def create_url(motif_id):
        return '<img src="{}{}.png" style="max-height:124px;"></img>'.format(base_url, motif_id)
    df[("Enrichment", COLUMN_NAME_LOGO)] = list(map(create_url, df.index.get_level_values(COLUMN_NAME_MOTIF_ID)))
    
    # Truncate TargetGenes.
    def truncate(col_val):
        return sorted(col_val, key=op.itemgetter(1))[:top_target_genes]
    df[("Enrichment", COLUMN_NAME_TARGETS)] = list(map(truncate, df[("Enrichment", COLUMN_NAME_TARGETS)]))
    
    MAX_COL_WIDTH = pd.get_option('display.max_colwidth')
    pd.set_option('display.max_colwidth', 200)
    display(HTML(df.head().to_html(escape=False)))
    pd.set_option('display.max_colwidth', MAX_COL_WIDTH)

In [3]:
df_motifs = load_motifs('loom/aging-11/BC/reg.csv')

In [4]:
selected_motifs = ['PAX5','TCF3','EBF1']
df_motifs_sel = df_motifs.iloc[ [ True if x in selected_motifs else False for x in df_motifs.index.get_level_values('TF') ] ,:]

In [5]:
df_motifs_sel

Unnamed: 0_level_0,Unnamed: 1_level_0,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment
Unnamed: 0_level_1,Unnamed: 1_level_1,AUC,Annotation,Context,MotifSimilarityQvalue,NES,OrthologousIdentity,RankAtMax,TargetGenes
TF,MotifID,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
PAX5,cisbp__M4475,0.106258,gene is annotated for similar motif dbcorrdb__...,"(weight>75.0%, hg38__refseq-r80__10kb_up_and_d...",0.000003,4.825522,1.0,540,"[(GPR18, 1.6735250260866281), (SMIM14, 0.88342..."
PAX5,cisbp__M4489,0.111169,gene is annotated for similar motif dbcorrdb__...,"(weight>75.0%, hg38__refseq-r80__10kb_up_and_d...",0.000002,5.189401,1.0,2681,"[(GPR18, 1.6735250260866281), (SMIM14, 0.88342..."
PAX5,dbcorrdb__IKZF1__ENCSR000EUJ_1__m1,0.092705,motif similar to dbcorrdb__PAX5__ENCSR000BHJ_1...,"(weight>75.0%, hg38__refseq-r80__10kb_up_and_d...",0.000000,3.821436,1.0,1568,"[(GPR18, 1.6735250260866281), (LIMD2, 0.883426..."
PAX5,homer__AAAGRGGAAGTG_SpiB,0.091839,gene is annotated for similar motif dbcorrdb__...,"(weight>75.0%, hg38__refseq-r80__10kb_up_and_d...",0.000071,3.757266,1.0,1155,"[(LYN, 1.6735250260866281), (SYK, 0.8834268482..."
PAX5,dbcorrdb__SPI1__ENCSR000BGW_1__m1,0.089499,gene is annotated for similar motif dbcorrdb__...,"(weight>75.0%, hg38__refseq-r80__10kb_up_and_d...",0.000405,3.583907,1.0,2033,"[(LIMD2, 1.6735250260866281), (MTPN, 0.8834268..."
PAX5,...,...,...,...,...,...,...,...,...
PAX5,dbcorrdb__SPI1__ENCSR000BGQ_1__m1,0.088096,motif similar to dbcorrdb__PAX5__ENCSR000BHJ_1...,"(hg38__refseq-r80__10kb_up_and_down_tss.mc9nr,...",0.000012,3.914346,1.0,1560,"[(ALOX5AP, 1.6735250260866281), (GPR18, 0.8834..."
PAX5,hocomoco__IRF8_HUMAN.H11MO.0.B,0.097240,gene is annotated for similar motif dbcorrdb__...,"(hg38__refseq-r80__10kb_up_and_down_tss.mc9nr,...",0.000003,4.657880,1.0,710,"[(ZEB2, 1.6735250260866281), (GPR18, 0.8834268..."
PAX5,dbcorrdb__SPI1__ENCSR000BIJ_1__m1,0.091386,gene is annotated for similar motif dbcorrdb__...,"(hg38__refseq-r80__10kb_up_and_down_tss.mc9nr,...",0.000008,4.181842,1.0,2331,"[(ALOX5AP, 1.6735250260866281), (GPR18, 0.8834..."
PAX5,dbcorrdb__EP300__ENCSR000DZG_1__m1,0.081365,motif similar to dbcorrdb__PAX5__ENCSR000BHJ_1...,"(hg38__refseq-r80__10kb_up_and_down_tss.mc9nr,...",0.000000,3.367034,1.0,2134,"[(GPR18, 1.6735250260866281), (ZEB2, 0.8834268..."


In [8]:
#display_logos(df_motifs.head())
display_logos( df_motifs_sel.sort_values([('Enrichment','NES')], ascending=False).head(9))

Unnamed: 0_level_0,Unnamed: 1_level_0,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment,Enrichment
Unnamed: 0_level_1,Unnamed: 1_level_1,AUC,Annotation,Context,MotifSimilarityQvalue,NES,OrthologousIdentity,RankAtMax,TargetGenes,MotifLogo
TF,MotifID,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
PAX5,cisbp__M4489,0.106027,"gene is annotated for similar motif dbcorrdb__PAX5__ENCSR000BHJ_1__m2 ('PAX5 (ENCSR000BHJ-1, motif 2)'; q-value = 1.72e-06)","(hg38__refseq-r80__10kb_up_and_down_tss.mc9nr, activating, top50perTarget)",2e-06,5.372377,1.0,1425,"[(PLAC8, 0.0011768970297239732), (LAPTM5, 0.1250222349227719), (CXCR4, 0.1513492750572744)]",
PAX5,cisbp__M4489,0.111169,"gene is annotated for similar motif dbcorrdb__PAX5__ENCSR000BHJ_1__m2 ('PAX5 (ENCSR000BHJ-1, motif 2)'; q-value = 1.72e-06)","(weight>75.0%, hg38__refseq-r80__10kb_up_and_down_tss.mc9nr, activating)",2e-06,5.189401,1.0,2681,"[(MEF2C, 0.43093183532262497), (COL19A1, 0.4340176830003246), (BAZ2A, 0.4493684229996896)]",
PAX5,homer__AAAGRGGAAGTG_SpiB,0.103005,"gene is annotated for similar motif dbcorrdb__PAX5__ENCSR000BHJ_1__m2 ('PAX5 (ENCSR000BHJ-1, motif 2)'; q-value = 7.11e-05)","(hg38__refseq-r80__10kb_up_and_down_tss.mc9nr, activating, top50perTarget)",7.1e-05,5.126659,1.0,1241,"[(TAGAP, 0.0011768970297239732), (MANBA, 0.1250222349227719), (ALOX5AP, 0.1513492750572744)]",
PAX5,dbcorrdb__PAX5__ENCSR000BJI_1__m1,0.186335,gene is directly annotated,"(hg38__refseq-r80__10kb_up_and_down_tss.mc9nr, top5perTarget, activating)",0.0,5.061688,1.0,1157,"[(SMIM14, 0.08699154186886034), (LYST, 0.19399471310044436), (MCTP2, 0.5451611881504062)]",
PAX5,factorbook__PU1,0.101168,"gene is annotated for similar motif dbcorrdb__PAX5__ENCSR000BHJ_1__m2 ('PAX5 (ENCSR000BHJ-1, motif 2)'; q-value = 6.12e-08)","(hg38__refseq-r80__10kb_up_and_down_tss.mc9nr, activating, top50perTarget)",0.0,4.977292,1.0,1247,"[(APLP2, 0.0011768970297239732), (POLD4, 0.1250222349227719), (HLA-DMB, 0.1513492750572744)]",
