In [None]:
%matplotlib inline

import pandas as pd
import gseapy as gp
from gseapy.plot import gseaplot, heatmap

In [None]:
ls -lah ../results/

In [None]:
def gene_exp_from_deseq(filename, prefix):
    deseq = pd.read_csv(filename)
    cols = [x for x in deseq.columns if x.startswith(prefix)]
    gene_exp = deseq[cols + ['SYMBOL']]
    gene_exp = gene_exp.rename(columns={'SYMBOL': 'NAME'})
    gene_exp['DESCRIPTION'] = 'NA'
    gene_exp = gene_exp.reindex(columns=['NAME', 'DESCRIPTION'] + cols)
    gene_exp['NAME'] = gene_exp['NAME'].str.upper()
    return gene_exp

# `DMD` vs `CNTRL`

In [None]:
gene_exp = gene_exp_from_deseq('../results/DMD_combined.genes.deseq.csv', prefix='da')
gene_exp

In [None]:
gene_cols = ["NAME", "DESCRIPTION"]
cntrl_col = ["da04", "da05", "da06", "da07"]
dmd_col = ["da11", "da12", "da13"]
class_vector = ['CTRL']*4 + ['DMD']*3
class_vector

In [None]:
gs_res = gp.gsea(data=gene_exp[gene_cols + cntrl_col + dmd_col],
                gene_sets='KEGG_2019_Mouse',
                cls=class_vector,
                max_size=500,
                min_size=15,
                method='s2n', 
                processes=64,
                format='png',
                verbose=True)

gs_res.res2d.sort_values(by='fdr', ascending=True).head()

In [None]:
terms = gs_res.res2d.index
gseaplot(gs_res.ranking, term=terms[0], **gs_res.results[terms[0]])

In [None]:
genes = gs_res.res2d.genes[0].split(";")
heatmap(df=gs_res.heatmat.loc[genes], z_score=0, title=terms[0], figsize=(12,9))

In [None]:
genes = gs_res.res2d.genes[1].split(";")
heatmap(df=gs_res.heatmat.loc[genes], z_score=0, title=terms[1], figsize=(12,9))

In [None]:
gs_res.res2d.sort_values(by='fdr', ascending=True).to_csv('../results/DMD_combined.gsea.csv')
!head -n 2 ../results/DMD_combined.gsea.csv

# `Dual` vs `CNTRL`

In [None]:
dual_col = ["da14", "da15", "da16"]
class_vector = ['CTRL']*4 + ['Dual']*3
class_vector

In [None]:
gs_res = gp.gsea(data=gene_exp[gene_cols + cntrl_col + dual_col],
                gene_sets='KEGG_2019_Mouse',
                cls=class_vector,
                max_size=500,
                min_size=15,
                method='s2n', 
                processes=64,
                format='png',
                verbose=True)

gs_res.res2d.sort_values(by='fdr', ascending=True).head()

In [None]:
terms = gs_res.res2d.index
gseaplot(gs_res.ranking, term=terms[0], **gs_res.results[terms[0]])

In [None]:
genes = gs_res.res2d.genes[0].split(";")
heatmap(df=gs_res.heatmat.loc[genes], z_score=0, title=terms[0], figsize=(12,9))

In [None]:
genes = gs_res.res2d.genes[1].split(";")
heatmap(df=gs_res.heatmat.loc[genes], z_score=0, title=terms[1], figsize=(12,9))

In [None]:
gs_res.res2d.sort_values(by='fdr', ascending=True).to_csv('../results/Dual_combined.gsea.csv')
!head -n 2 ../results/Dual.gsea.csv

# `hAUF` vs `CNTRL`

In [None]:
hauf_col = ["da08", "da09", "da10"]
class_vector = ['CTRL']*4 + ['hAUF']*3
class_vector

In [None]:
gs_res = gp.gsea(data=gene_exp[gene_cols + cntrl_col + hauf_col],
                gene_sets='KEGG_2019_Mouse',
                cls=class_vector,
                max_size=500,
                min_size=15,
                method='s2n', 
                processes=64,
                format='png',
                verbose=True)

gs_res.res2d.sort_values(by='fdr', ascending=True).head()

In [None]:
terms = gs_res.res2d.index
gseaplot(gs_res.ranking, term=terms[0], **gs_res.results[terms[0]])

In [None]:
genes = gs_res.res2d.genes[0].split(";")
heatmap(df=gs_res.heatmat.loc[genes], z_score=0, title=terms[0], figsize=(12,9))

In [None]:
genes = gs_res.res2d.genes[1].split(";")
heatmap(df=gs_res.heatmat.loc[genes], z_score=0, title=terms[1], figsize=(12,9))

In [None]:
gs_res.res2d.sort_values(by='fdr', ascending=True).to_csv('../results/hAUF_combined.gsea.csv')
!head -n 2 ../results/hAUF_combined.gsea.csv

# `DMD` vs `Dual`

In [None]:
class_vector = ['DMD']*3 + ['Dual']*3
class_vector

In [None]:
gs_res = gp.gsea(data=gene_exp[gene_cols + dmd_col + dual_col],
                gene_sets='KEGG_2019_Mouse',
                cls=class_vector,
                max_size=500,
                min_size=15,
                method='s2n', 
                processes=64,
                format='png',
                verbose=True)

gs_res.res2d.sort_values(by='fdr', ascending=True).head()

In [None]:
terms = gs_res.res2d.index
gseaplot(gs_res.ranking, term=terms[0], **gs_res.results[terms[0]])

In [None]:
genes = gs_res.res2d.genes[0].split(";")
heatmap(df=gs_res.heatmat.loc[genes], z_score=0, title=terms[0], figsize=(12,9))

In [None]:
genes = gs_res.res2d.genes[1].split(";")
heatmap(df=gs_res.heatmat.loc[genes], z_score=0, title=terms[1], figsize=(12,9))

In [None]:
gs_res.res2d.sort_values(by='fdr', ascending=True).to_csv('../results/DMDvsDual_combined.gsea.csv')
!head -n 2 ../results/DMDvsDual_combined.gsea.csv