In [None]:
'''
Goal: An Excel sheet with two lists. Would it be possible to create a volcano plot for both the oxytocin pathway and the calcium signaling pathway for each option? Also, could you create a heatmap for both pathways using these two lists?
Author: Carsten Knutsen
Date: March 01 2023
conda_env: bulk_rnaseq
'''

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import os


In [None]:
DEG_FILE = '/home/carsten/alvira_bioinformatics/myometrium/data/2302_bulk_sequencing/03.Result_X202SC22123874-Z01-F001_Homo_sapiens/Result_X202SC22123874-Z01-F001/4.Differential/1.deglist/REDvsGD/REDvsGD_deg.xlsx'
OUTPUT = '/home/carsten/alvira_bioinformatics/myometrium/data/230301_daiana_grant/output'

In [None]:
gene_dict = {'Oxytocin genes':[
    'CAMKK1',
'GUCY1B2',
'NPR1',
'PGF',
'PGF',
'PTGDS',
'PTGDS',
'PTGES3P2',
'PTGES3P2',
'ACTA1',
'ACTC1',
'EGFL6',
'GNB1L',
'GNB4',
'GNG4',
'GNPNAT1',
'GNPTG',
'HBEGF',
'KCNH7',
'KCNIP4',
'MAPK4',
'MAPK8IP2',
'MAPKBP1',
# 'MAPL2P8',# What gene is this supposed to be?
'MYH13',
'MYH3',
'NPPA',
'NPPB',
'PLCB1',
'PLCD1',
'PRKAA2',
'PRKG1',
'RGS2',
'RGS2',
'RGS22',
'RGS22',
'RGS7BP',
'RGS7BP',
'RHOB',
'RYR2',
'TGFB1',
'TGFB2',
'TRPM5'
],
             'Calcium genes':[
    'ATP10D',
'ATP8B1',
'ATP8B2',
'EGR1',
'EGR2',
'EGR3',
'TNC',
'MYH13',
'MYH3',
'MYO1B',
'MYO1E',
'ACTA1',
'ACTC1',
'TNNT1',
'BAHCC1',
'ATF3',
'EGR1',
'GNG4',
'GUCY1B2',
'ITPRIPL2',
'NOS3',
'PDE4C',
'PGF',
'PLCB1',
'PLCD1',
'PRKG1',
'PTGDS',
'RGS2',
'RYR2',
'TNNT1',
'ITPRIP',
]
            }
for key in gene_dict.keys():
    gene_dict[key] = sorted(set(gene_dict[key]))

In [None]:
deg_df = pd.read_excel(DEG_FILE, index_col = 1)
count_df = deg_df[deg_df.columns[1:11]]


In [None]:
volcano_df = deg_df[['pvalue','padj','log2FoldChange']]
volcano_df.dropna(how='any',inplace=True)
volcano_df['$-Log_{10}$(FDR)'] = volcano_df['padj'].apply(lambda x: -np.log10(x))
volcano_df['$Log_{2}$(FC)'] = volcano_df['log2FoldChange']

def conditions(s):
    if s['log2FoldChange'] > 0.5 and s['padj'] < 0.1:
        return 'Upregulated'
    elif s['log2FoldChange'] < -0.5 and s['padj'] < 0.1:
        return 'Downregulated'
    else:
        return 'NS'
volcano_df['color'] = volcano_df.apply(conditions, axis=1)
for key in gene_dict.keys():
    fig, ax = plt.subplots(1, 1, figsize=(3, 4))

    sns.scatterplot(data=volcano_df,
                    y='$-Log_{10}$(FDR)',
                    x='$Log_{2}$(FC)',
                    hue='color',
                    hue_order=['Upregulated', 'NS', 'Downregulated'],
                    palette=['Red', 'grey', 'Blue'],
                    s=10,
                    linewidth=0,
                    ax=ax
                    )
    for gene in gene_dict[key]:
        plt.text( volcano_df['$Log_{2}$(FC)'][gene],volcano_df['$-Log_{10}$(FDR)'][gene], gene, size=8)
    ax.get_legend().remove()
    ax.set_title(f'{key}')
    fig.savefig(f'{OUTPUT}/volcano_{key}_fdr.png', dpi=300, bbox_inches="tight")

In [None]:
gene_ls = ['RYR2','TRPV4','OXTR' ]
fig, ax = plt.subplots(1, 1, figsize=(3, 4))

sns.scatterplot(data=volcano_df,
                y='$-Log_{10}$(FDR)',
                x='$Log_{2}$(FC)',
                hue='color',
                hue_order=['Upregulated', 'NS', 'Downregulated'],
                palette=['Red', 'grey', 'Blue'],
                s=10,
                linewidth=0,
                ax=ax
                )
for gene in gene_ls:
    plt.text( volcano_df['$Log_{2}$(FC)'][gene],volcano_df['$-Log_{10}$(FDR)'][gene], gene, size=8)
ax.get_legend().remove()
ax.set_title(f'')
fig.savefig(f'{OUTPUT}/volcano_{key}_custom_fdr.png', dpi=300, bbox_inches="tight")

In [None]:
volcano_df.head(50
               )

In [None]:
for key in gene_dict.keys():
    sub_df = volcano_df.loc[gene_dict[key]]
    fig, ax = plt.subplots(1, 1, figsize=(3, 4))

    sns.scatterplot(data=sub_df,
                    y='$-Log_{10}$(FDR)',
                    x='$Log_{2}$(FC)',
                    hue='color',
                    hue_order=['Upregulated', 'NS', 'Downregulated'],
                    palette=['Red', 'grey', 'Blue'],
                    s=10,
                    linewidth=0,
                    ax=ax
                    )
    for gene in gene_dict[key]:
        plt.text( sub_df['$Log_{2}$(FC)'][gene],sub_df['$-Log_{10}$(FDR)'][gene], gene, size=8)
    ax.get_legend().remove()
    ax.set_title(f'{key}')
    fig.savefig(f'{OUTPUT}/volcano_{key}_fdr_sub.png', dpi=300, bbox_inches="tight")

In [None]:
deg_df

In [None]:
for key in gene_dict.keys():
    heatmap_df = count_df.loc[gene_dict[key]]
    heatmap_df.columns = ['PC-1','PC-2','PC-3','PC-4','PC-5','GC-1','GC-2','GC-3','GC-4','GC-5']
    g = sns.clustermap(data=heatmap_df,
                   z_score = 0,
                    col_cluster=False,
                       square=True,
                       yticklabels=True,
                   cmap = 'RdBu_r',
                       center=0,
                       cbar_pos=(0.95,0.3,0.05,0.25),
                       figsize=(4, 8)
                    )
    g.ax_heatmap.set_ylabel("")
    g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_ymajorticklabels(), fontsize = 8)
    g.ax_cbar.set_ylabel('Z-score')
    g.ax_heatmap.set_title(f'{key}')
    plt.savefig(f'{OUTPUT}/clustermap_{key}.png', dpi=300, bbox_inches="tight")

In [None]:
for key in gene_dict.keys():
    heatmap_df = count_df.loc[gene_dict[key]]
    heatmap_df = np.log10(heatmap_df+1)
    heatmap_df.columns = ['PC-1','PC-2','PC-3','PC-4','PC-5','GC-1','GC-2','GC-3','GC-4','GC-5']
    g = sns.clustermap(data=heatmap_df,
                    col_cluster=False,
                       square=True,
                       yticklabels=True,
                   cmap = 'magma',
                       cbar_pos=(0.95,0.3,0.05,0.25),
                       figsize=(4, 8)
                    )
    g.ax_heatmap.set_ylabel("")
    g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_ymajorticklabels(), fontsize = 8)
    g.ax_cbar.set_ylabel('Log10(MoR)')
    g.ax_heatmap.set_title(f'{key}')
    plt.savefig(f'{OUTPUT}/clustermap_{key}_log.png', dpi=300, bbox_inches="tight")