In [None]:
import pandas as pd
import numpy as np
import gseapy as gp
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon, ttest_ind, ttest_rel, mannwhitneyu
from statsmodels.stats.multitest import multipletests
from statannotations.Annotator import Annotator
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['figure.dpi'] = 300
from scipy.stats import pearsonr, spearmanr

In [None]:
def prepare_volcano(df_tur, df_cys, gene_log = False):
    meant = df_tur.mean().astype(float)
    meanc = df_cys.mean().astype(float)
    sign_ch = list(meant.multiply(meanc)[meant.multiply(meanc) < 0].index)
    if len(sign_ch)>0:
        print("The following scores have changed their sign after chemo:\n", sign_ch)
        meanc = meanc.drop(sign_ch)
        meant = meant.drop(sign_ch)
        logfc = np.log2(meanc.loc[meant.index].divide(meant))
    elif (gene_log == False):
        logfc = np.log2(meanc.loc[meant.index].divide(meant))
    elif (gene_log == True):
        meant = (2**df_tur - 1).mean()
        meanc = (2**df_cys - 1).mean()
        logfc = np.log2(meanc.loc[meant.index].divide(meant))
    logfc.name = 'log_FC'
    a_list = []
    for ph in meant.index:
        try:
            u = df_tur.loc[:, ph]
            v = df_cys.loc[:, ph]
            newi = u.index.intersection(v.index)
            u = u.loc[newi]
            v = v.loc[newi]
            a_list.append(wilcoxon(u, v, nan_policy = 'omit').pvalue)
        except:
            print(ph)
    pdf = pd.DataFrame({'pvalue': a_list, 'iind': list(meant.index)})
    dendf = pd.concat([logfc, pdf.set_index('iind')], axis=1).dropna()
    return dendf.assign(pv_cor = multipletests(dendf.pvalue, method = 'fdr_bh')[1]).sort_values('pvalue')

def compare_wcontrol(df_tur, df_cys, gene_log = False):
    meant = df_tur.mean()
    meanc = df_cys.mean()
    logfc = meanc.loc[meant.index] - meant
    logfc.name = 'log_FC'
    a_list = []
    for ph in meant.index:
        try:
            u = df_tur.loc[:, ph]
            v = df_cys.loc[:, ph]
            #newi = u.index.intersection(v.index)
            #u = u.loc[newi]
            #v = v.loc[newi]
            a_list.append(mannwhitneyu(u, v, nan_policy = 'omit').pvalue)
        except:
            print(ph)
    pdf = pd.DataFrame({'pvalue': a_list, 'iind': list(meant.index)})
    dendf = pd.concat([logfc, pdf.set_index('iind')], axis=1).dropna()
    return dendf.assign(pv_cor = multipletests(dendf.pvalue, method = 'fdr_bh')[1]).sort_values('pvalue')

In [None]:
colors = ['#cc78bc', '#029e73']
def plot_boxplots_between_timepoints(cytokine, data, af):
    pairs=[(("Postinduction"), ("Baseline")) ]
    
    sns.set()
    sns.set_theme(style='white', palette=colors)
    a1 = sns.boxplot(x="Timepoint", y=cytokine, #hue="Clinical_benefit", hue_order=['Yes', 'No'],
                    data=data, palette=colors, order=["Baseline", "Postinduction"], ax = af, showfliers = False, hue = "Timepoint", legend = False)
    a1 = sns.swarmplot(x="Timepoint", y=cytokine, #hue="Clinical_benefit", hue_order=['Yes', 'No'],
                    data=data, order=["Baseline", "Postinduction"], color=".25", dodge=True, ax = af, size=3)
    sns.lineplot(x="Timepoint", y=cytokine, data = data, ax=af, units = 'StudyID', estimator=None, linewidth=0.25, color = 'grey')
    annotator = Annotator(af, pairs, x="Timepoint", y=cytokine, #hue="Clinical_benefit", hue_order=['Yes', 'No'],
                    data=data, order=["Baseline", "Postinduction"])
    annotator.configure(test='Wilcoxon', text_format='simple')
    annotator.apply_and_annotate()

In [None]:
new_cohorts_tmm = pd.read_csv('../../processed_data/TMM_counts_all_TONIC_batch_corrected.csv',
                         index_col=0)

In [None]:
ann = pd.read_csv('../../processed_data/tonic_final_not_full_metadata_response_add_sets.csv', sep=',', index_col=0)
two_tp = [x for x in ann[ann.Timepoint == 'Postinduction'].StudyID.values.tolist() if x in ann[ann.Timepoint == 'Baseline'].StudyID.values.tolist()]
ann_two_tp = ann[ann.StudyID.isin(two_tp)]

In [None]:
ann_doxo = ann_two_tp[(ann_two_tp.Induction == 'Doxorubicin') & ann_two_tp.Timepoint.isin(['Postinduction', 'Baseline'])]
ann_cis = ann_two_tp[(ann_two_tp.Induction == 'Cisplatin') & ann_two_tp.Timepoint.isin(['Postinduction', 'Baseline'])]
ann_contr = ann_two_tp[(ann_two_tp.Cohort == 'T1_1') & (ann_two_tp.Induction == 'Control') & ann_two_tp.Timepoint.isin(['Postinduction', 'Baseline'])]

In [None]:
ann_sum = pd.concat([ann_doxo, ann_cis, ann_contr])

In [None]:
def two_df(df, ann_doxo):
    df0 = pd.concat([df.loc[ann_doxo.index], ann_doxo.loc[:, ['StudyID', 'Timepoint']]], axis=1)
    df1 = df0[df0.Timepoint == 'Baseline'].sort_values('StudyID').set_index('StudyID').drop('Timepoint', axis=1)
    df2 = df0[df0.Timepoint == 'Postinduction'].sort_values('StudyID').set_index('StudyID').drop('Timepoint', axis=1)
    return(df1, df2)
    
def prep_annot(selected_r, selected_p):
  extreme_1 = 0.05
  extreme_2 = 0.01
  extreme_3 = 0.001
  annot = [[('' if abs(val) > extreme_1 else '\n*')
            + ('' if abs(val) > extreme_2 else '*')
            + ('' if abs(val) > extreme_3 else '*')
            for val in row] for row in selected_p.to_numpy()]
  annot = selected_r.applymap('{:.2f}'.format).to_numpy() + annot
  return annot

def simple_scatter_spearman(df, x, y, ax):
    x1 = df.loc[:, x]
    y1 = df.loc[:, y]
    nas = np.logical_or(x1.isna(), y1.isna())
    x1 = x1[~nas]
    y1 = y1[~nas]
    df_delta = df.loc[x1.index]
    g = sns.regplot(x = x, y=y, data=df_delta, color=".3", line_kws=dict(color="r", linewidth = 0.8), scatter_kws=dict(s = 2), ax=ax, robust = True)
    ax.set_title('Spearman R = {:.2f}, p = {:.2g}'.format(*spearmanr(x1,y1)), fontsize=5, pad = 2.6)

def simple_scatter_both(df, x, y, ax):
    x1 = df.loc[:, x]
    y1 = df.loc[:, y]
    nas = np.logical_or(x1.isna(), y1.isna())
    x1 = x1[~nas]
    y1 = y1[~nas]
    df_delta = df.loc[x1.index]
    g = sns.regplot(x = x, y=y, data=df_delta, color=".3", line_kws=dict(color="r", linewidth = 0.8), scatter_kws=dict(s = 2), ax=ax, robust = False)
    ax.set_title('Spearman R = {:.2f}, p = {:.2g}\nPearson R = {:.2f}, p = {:.2g}'.format(*spearmanr(x1,y1),*pearsonr(x1,y1)), fontsize=5, pad = 2.6)
    ax.axhline(y=0, color='grey', lw= 0.1)
    ax.axvline(x=0, color='grey', lw= 0.1)

## Hallmarks

In [None]:
ss_gen = gp.ssgsea(data=new_cohorts_tmm.loc[:,ann_sum.index],
               gene_sets='MSigDB_Hallmark_2020',
               outdir='./ssgsea_output/',
               sample_norm_method='rank', # choose 'custom' for your own rank list
               permutation_num=0, # skip permutation procedure, because you don't need it
               no_plot=True, # skip plotting, because you don't need these figures
               processes=32, format='png', seed=9)

In [None]:
ss_sum = pd.pivot_table(ss_gen.res2d, index = 'Name', values = 'NES', columns = 'Term').astype(float)

In [None]:
delta_cis = two_df(ss_sum, ann_cis)[1] - two_df(ss_sum, ann_cis)[0]
delta_doxo = two_df(ss_sum, ann_doxo)[1] - two_df(ss_sum, ann_doxo)[0]
delta_contr = two_df(ss_sum, ann_contr)[1] - two_df(ss_sum, ann_contr)[0]

In [None]:
hallmark_order = ['Apical Junction',
 'Epithelial Mesenchymal Transition',
 'Angiogenesis',   
  'UV Response Dn',   
  'Myogenesis',  
  'p53 Pathway',
 'Hypoxia',
  'KRAS Signaling Dn',
 'Wnt-beta Catenin Signaling',
  'TGF-beta Signaling',
 'Estrogen Response Early',
 'Hedgehog Signaling',
 'Androgen Response',
 'Notch Signaling',
 'Apical Surface',
 'Interferon Gamma Response',
  'Xenobiotic Metabolism',
  'Apoptosis',
 'Coagulation',
  'IL-6/JAK/STAT3 Signaling',
 'KRAS Signaling Up',
  'Allograft Rejection',
  'Complement',
 'IL-2/STAT5 Signaling',
 'heme Metabolism',
  'Pancreas Beta Cells',
 'Interferon Alpha Response',
  'PI3K/AKT/mTOR  Signaling',
 'UV Response Up',
 'Inflammatory Response',
 'TNF-alpha Signaling via NF-kB',
 'DNA Repair',
 'Spermatogenesis',
 'Oxidative Phosphorylation', 
 'Reactive Oxygen Species Pathway', 
 'Adipogenesis',
 'Pperoxisome',
 'Bile Acid Metabolism',
 'Fatty Acid Metabolism',
 'Estrogen Response Late',
 'Mitotic Spindle', 
 'E2F Targets',
'G2-M Checkpoint',
 'Myc Targets V1',
 'Myc Targets V2',
 'mTORC1 Signaling',
'Cholesterol Homeostasis',
'Glycolysis',
'Protein Secretion',
'Unfolded Protein Response']

In [None]:
bold_list = ['Interferon Gamma Response', 'IL-6/JAK/STAT3 Signaling','Allograft Rejection',
  'Complement',
 'IL-2/STAT5 Signaling', 'Interferon Alpha Response', 'Inflammatory Response',
 'TNF-alpha Signaling via NF-kB']

In [None]:
selected_hm = ['Interferon Gamma Response', 'IL-6/JAK/STAT3 Signaling', 'Complement', 'Inflammatory Response',  'Epithelial Mesenchymal Transition', 'Angiogenesis']

## TME signatures (Bagaev et al)

In [None]:
ss_bag = gp.ssgsea(data=new_cohorts_tmm.loc[:,ann_sum.index],
               gene_sets="/home/m.chelushkin/Michiel_projects/rna_seq/cancer_cell/gene_signatures.gmt",
               outdir='./ssgsea_output/',
                   min_size = 2,
               sample_norm_method='rank', # choose 'custom' for your own rank list
               permutation_num=0, # skip permutation procedure, because you don't need it
               no_plot=True, # skip plotting, because you don't need these figures
               processes=32, format='png', seed=9)

In [None]:
ss_sum_bg = pd.pivot_table(ss_bag.res2d, index = 'Name', values = 'NES', columns = 'Term').astype(float)

In [None]:
delta_cis_bg = two_df(ss_sum_bg, ann_cis)[1] - two_df(ss_sum_bg, ann_cis)[0]
delta_doxo_bg = two_df(ss_sum_bg, ann_doxo)[1] - two_df(ss_sum_bg, ann_doxo)[0]
delta_contr_bg = two_df(ss_sum_bg, ann_contr)[1] - two_df(ss_sum_bg, ann_contr)[0]

In [None]:
selected_bg = ['EMT_signature', 'CAF']

In [None]:
sigs_cis = pd.concat([delta_cis.loc[:, selected_hm], delta_cis_bg.loc[:, selected_bg]], axis=1)
sigs_dox = pd.concat([delta_doxo.loc[:, selected_hm], delta_doxo_bg.loc[:, selected_bg]], axis=1)
sigs_contr = pd.concat([delta_contr.loc[:, selected_hm], delta_contr_bg.loc[:, selected_bg]], axis=1)

In [None]:
fig, axs = plt.subplots(1,3, figsize = (5, 1.6))
af = axs.flat
simple_scatter_both(sigs_cis, x = 'Epithelial Mesenchymal Transition', y = 'Interferon Gamma Response', ax = af[0])
simple_scatter_both(sigs_cis, x = 'Angiogenesis', y = 'Interferon Gamma Response', ax = af[1])
simple_scatter_both(sigs_cis, x = 'CAF', y = 'Interferon Gamma Response', ax = af[2])
fig.set_tight_layout(True)
plt.savefig("SFig7E_corr_IFNg_cispl.pdf", format="pdf", bbox_inches = 'tight')

In [None]:
fig, axs = plt.subplots(1,3, figsize = (5, 1.6))
af = axs.flat
simple_scatter_both(sigs_cis, x = 'Epithelial Mesenchymal Transition', y = 'Inflammatory Response', ax = af[0])
simple_scatter_both(sigs_cis, x = 'Angiogenesis', y = 'Inflammatory Response', ax = af[1])
simple_scatter_both(sigs_cis, x = 'CAF', y = 'Inflammatory Response', ax = af[2])
fig.set_tight_layout(True)
plt.savefig("SFig7D_corr_Infl_cispl.pdf", format="pdf", bbox_inches = 'tight')