In [None]:
import pandas as pd
import re
import numpy as np
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter
import glob

In [None]:
def limit_go_name(s, maxlen=45):
    if len(s) > maxlen:
        return s[:maxlen - 3] + '...'
    else: 
        return s
      
    #m = re.search("^(([a-zA-Z0-9]+[^a-zA-Z0-9]){0,5})")
    #return m.groups(1)

def go_df_for_plotting(df, name):
    df = df[(df['enrichment']=='e')].copy()
    df['Log10'] = -np.log10(df['p_fdr_bh'])
    df['Feature'] = name
    df['prettyname'] = df['name'].apply(limit_go_name)
    fac = []
    for ii in range(df.shape[0]):
        xx, yy = df[['ratio_in_study']].iloc[ii, 0].split('/')
        zz, tt = df[['ratio_in_pop']].iloc[ii, 0].split('/')    
        fac.append((int(xx) / int(yy)) / (int(zz) / int(tt)))
    df['OddsRatio'] = fac
    return df.drop(columns=['study_items']).sort_values('p_uncorrected')
    


def plot_go(df, name, filename):
    godf = go_df_for_plotting(df, name).sort_values(['NS', 'p_uncorrected']).head(15)
    with localconverter(ro.default_converter + pandas2ri.converter):
        r_godf = ro.conversion.py2rpy(godf)
    ro.globalenv['r_godf'] = r_godf
    ro.globalenv['r_filename'] = filename
    ro.r("""
    
library(ggplot2)
df1 = r_godf
#df1$fac1 = -log2(df1[, 'OddsRatio'])

df1$prettyname <- factor(df1$prettyname, levels = df1$prettyname[order(df1$Log10, df1$p_uncorrected, df1$name, decreasing=FALSE)])

gg1 = (ggplot(df1, aes(x=Log10, y=prettyname, size=OddsRatio)) + 
       geom_point(shape=18, col='#f8766d') + 
       labs(y='', x='-log10(FDR)') + theme_bw() +
       facet_grid('.~Feature') +
       geom_vline(xintercept = -log10(0.05), linetype = "dotted") +
       theme(axis.text=element_text(size=14),
             axis.title=element_text(size=18, face='bold'),
             strip.text=element_text(size=18, face='bold'), 
             ))

print(r_filename)

ggsave(file=paste(sep='', r_filename, '.pdf'), plot=gg1, width=10, height=6)
ggsave(file=paste(sep='', r_filename, '.svg'), plot=gg1, width=10, height=6)
ggsave(file=paste(sep='', r_filename, '.png'), plot=gg1, width=10, height=6)

    """)
    


In [None]:
for fn in glob.glob('../../_m/module*_go_enrichment.tsv'):
    m = re.search('module(\d+)', fn)
    module_number = m.groups(1)
    name = "Module %s" % module_number
    filename = 'module%s_go_enrichment' % module_number
    df = pd.read_csv(fn, sep="\t")
    plot_go(df, name, filename)
    print(filename)