In [None]:
import pandas as pd
import numpy as np
import os
import gseapy as gp
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.stats import wilcoxon, ttest_ind, ttest_rel, mannwhitneyu
from statsmodels.stats.multitest import multipletests
from statannotations.Annotator import Annotator
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['figure.dpi'] = 300

In [None]:
os.environ['PATH'] = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/opt/R/4.2.3/bin:/usr/bin:/sbin:/bin:/opt/miniconda3/conda/bin:/opt/miniconda3/bin"
%load_ext rpy2.ipython

In [None]:
%%R
library(nparLD)

In [None]:
def prepare_volcano(df_tur, df_cys, gene_log = False):
    meant = df_tur.mean().astype(float)
    meanc = df_cys.mean().astype(float)
    sign_ch = list(meant.multiply(meanc)[meant.multiply(meanc) < 0].index)
    if len(sign_ch)>0:
        print("The following scores have changed their sign after chemo:\n", sign_ch)
        meanc = meanc.drop(sign_ch)
        meant = meant.drop(sign_ch)
        logfc = np.log2(meanc.loc[meant.index].divide(meant))
    elif (gene_log == False):
        logfc = np.log2(meanc.loc[meant.index].divide(meant))
    elif (gene_log == True):
        meant = (2**df_tur - 1).mean()
        meanc = (2**df_cys - 1).mean()
        logfc = np.log2(meanc.loc[meant.index].divide(meant))
    logfc.name = 'log_FC'
    a_list = []
    for ph in meant.index:
        try:
            u = df_tur.loc[:, ph]
            v = df_cys.loc[:, ph]
            newi = u.index.intersection(v.index)
            u = u.loc[newi]
            v = v.loc[newi]
            a_list.append(wilcoxon(u, v, nan_policy = 'omit').pvalue)
        except:
            print(ph)
    pdf = pd.DataFrame({'pvalue': a_list, 'iind': list(meant.index)})
    dendf = pd.concat([logfc, pdf.set_index('iind')], axis=1).dropna()
    return dendf.assign(pv_cor = multipletests(dendf.pvalue, method = 'fdr_bh')[1]).sort_values('pvalue')

def compare_wcontrol(df_tur, df_cys, gene_log = False):
    meant = df_tur.mean()
    meanc = df_cys.mean()
    logfc = meanc.loc[meant.index] - meant
    logfc.name = 'log_FC'
    a_list = []
    for ph in meant.index:
        try:
            u = df_tur.loc[:, ph]
            v = df_cys.loc[:, ph]
            a_list.append(ttest_ind(u, v, nan_policy = 'omit').pvalue)
        except:
            print(ph)
    pdf = pd.DataFrame({'pvalue': a_list, 'iind': list(meant.index)})
    dendf = pd.concat([logfc, pdf.set_index('iind')], axis=1).dropna()
    return dendf.assign(pv_cor = multipletests(dendf.pvalue, method = 'fdr_bh')[1]).sort_values('pvalue')

In [None]:
def interaction_test(data_df, induction, timepoint):
    test_df = data_df[data_df.Induction.isin([induction])]
    %R -i test_df
    %R -i induction
    %R -i timepoint
    pv = {}
    for u in data_df.drop(['Timepoint', 'Induction', 'StudyID', 'pdl1_status'], axis=1).columns:
        %R -i u
        %R res_int <- f1.ld.f1(y = test_df[[u]], time = test_df$Timepoint, group = test_df$pdl1_status, subject = test_df$StudyID, time.order = c('Baseline', timepoint), group.order = c('PDL1_negative', 'PDL1_positive'), description=F, plot.RTE = F)$ANOVA.test
        %R -o res_int
        pv.update({u: res_int[2,2]})
    ps = pd.Series(pv)
    res_df = pd.DataFrame({'pvalue': ps, 'pv_cor': multipletests(ps, method = 'fdr_bh')[1]}).sort_values('pvalue')
    return res_df

In [None]:
colors = ['#cc78bc', '#029e73']
def plot_boxplots_between_timepoints(cytokine, data, af):
    pairs=[ ("Baseline", "Postinduction") ]
    
    sns.set()
    sns.set_theme(style='white', palette=colors)
    a1 = sns.boxplot(x="Timepoint", y=cytokine, #hue="Clinical_benefit", hue_order=['Yes', 'No'],
                    data=data, palette=colors, order=["Baseline", "Postinduction"], ax = af, showfliers = False, hue = "Timepoint", legend = False)
    a1 = sns.swarmplot(x="Timepoint", y=cytokine, #hue="Clinical_benefit", hue_order=['Yes', 'No'],
                    data=data, order=["Baseline", "Postinduction"], color=".25", dodge=True, ax = af, size=3)
    sns.lineplot(x="Timepoint", y=cytokine, data = data, ax=af, units = 'StudyID', estimator=None, linewidth=0.25, color = 'grey')
    annotator = Annotator(af, pairs, x="Timepoint", y=cytokine, #hue="Clinical_benefit", hue_order=['Yes', 'No'],
                    data=data, order=["Baseline", "Postinduction"])
    annotator.configure(test='Wilcoxon', text_format='simple')
    annotator.apply_and_annotate()

def dyn_comp_delta(u, ax, lin_dens3, title, xlabel, to_compare, order, test = 'Mann-Whitney', size=1.3, ofs = 1):
  sns.boxplot(y=u, x=to_compare, data = lin_dens3, ax=ax, order = order,
                    color = 'lightblue', showfliers=False)
  sns.swarmplot(y=u, x=to_compare, data = lin_dens3, ax=ax, order = order,
                    color='black', size=size)
  ax.set_xlabel('')
  ax.set_ylabel(xlabel)
  ax.set_title(title)
  
  annotator = Annotator(ax, pairs = [order], data = lin_dens3, x=to_compare, y=u, order=order)
  annotator.configure(test=test, text_format = 'simple', comparisons_correction='BH', line_offset = ofs, line_offset_to_group = ofs, line_width = 0.65)
  _, test_results = annotator.apply_test()._get_output()
  pv = test_results[0].data.pvalue
  annotator.set_custom_annotations(['{} p={:.2g}'.format(test, pv)])
  annotator.annotate(line_offset = ofs, line_offset_to_group = ofs)

In [None]:
new_cohorts_tmm = pd.read_csv('../../processed_data/TMM_counts_all_TONIC_batch_corrected.csv',
                         index_col=0)

In [None]:
ann = pd.read_csv('../../processed_data/tonic_final_not_full_metadata_response_add_sets.csv', sep=',', index_col=0)
two_tp = [x for x in ann[ann.Timepoint == 'Postinduction'].StudyID.values.tolist() if x in ann[ann.Timepoint == 'Baseline'].StudyID.values.tolist()]
ann_two_tp = ann[ann.StudyID.isin(two_tp)]

In [None]:
ann_doxo = ann_two_tp[(ann_two_tp.Induction == 'Doxorubicin') & ann_two_tp.Timepoint.isin(['Postinduction', 'Baseline'])]
ann_cis = ann_two_tp[(ann_two_tp.Induction == 'Cisplatin') & ann_two_tp.Timepoint.isin(['Postinduction', 'Baseline'])]
ann_contr = ann_two_tp[(ann_two_tp.Cohort == 'T1_1') & (ann_two_tp.Induction == 'Control') & ann_two_tp.Timepoint.isin(['Postinduction', 'Baseline'])]

In [None]:
ann_sum = pd.concat([ann_doxo, ann_cis, ann_contr])

In [None]:
pdl1 = pd.read_csv('../../processed_data/PDL1 status.csv', sep=';', index_col=1)
pdl1_dict = {k.replace('Pat_', 'T1_').replace('Pat2_', 'T2_'):v for k,v in pdl1.PDL1_CPS.to_dict().items()}
ann_sum = ann_sum.assign(pdl1 = ann_sum.StudyID.replace(pdl1_dict).infer_objects(copy=False))
ann_sum = ann_sum.assign(pdl1_status = ann_sum.pdl1.apply(lambda x: 'PDL1_positive' if x >= 10 else 'PDL1_negative'))

In [None]:
def two_df(df, ann_doxo):
    df0 = pd.concat([df.loc[ann_doxo.index], ann_doxo.loc[:, ['StudyID', 'Timepoint']]], axis=1)
    df1 = df0[df0.Timepoint == 'Baseline'].sort_values('StudyID').set_index('StudyID').drop('Timepoint', axis=1)
    df2 = df0[df0.Timepoint == 'Postinduction'].sort_values('StudyID').set_index('StudyID').drop('Timepoint', axis=1)
    return(df1, df2)

In [None]:
ann_doxo_ppos = ann_sum[(ann_sum.Induction == 'Doxorubicin') & (ann_sum.pdl1_status == 'PDL1_positive')]
ann_cis_ppos = ann_sum[(ann_sum.Induction == 'Cisplatin') & (ann_sum.pdl1_status == 'PDL1_positive')]
ann_contr_ppos = ann_sum[(ann_sum.Induction == 'Control') & (ann_sum.pdl1_status == 'PDL1_positive')]

ann_doxo_pneg = ann_sum[(ann_sum.Induction == 'Doxorubicin') & (ann_sum.pdl1_status == 'PDL1_negative')]
ann_cis_pneg = ann_sum[(ann_sum.Induction == 'Cisplatin') & (ann_sum.pdl1_status == 'PDL1_negative')]
ann_contr_pneg = ann_sum[(ann_sum.Induction == 'Control') & (ann_sum.pdl1_status == 'PDL1_negative')]

## Hallmarks

In [None]:
ss_gen = gp.ssgsea(data=new_cohorts_tmm.loc[:,ann_sum.index],
               gene_sets='MSigDB_Hallmark_2020',
               outdir='./ssgsea_output/',
               sample_norm_method='rank', # choose 'custom' for your own rank list
               permutation_num=0, # skip permutation procedure, because you don't need it
               no_plot=True, # skip plotting, because you don't need these figures
               processes=32, format='png', seed=9)

In [None]:
ss_sum = pd.pivot_table(ss_gen.res2d, index = 'Name', values = 'NES', columns = 'Term').astype(float)

In [None]:
data_df = pd.concat([ss_sum, ann_sum.loc[:, ['Timepoint', 'StudyID', 'Induction', 'pdl1_status']]], axis=1)

In [None]:
delta_cis_ppos = two_df(ss_sum, ann_cis_ppos)[1] - two_df(ss_sum, ann_cis_ppos)[0]
delta_doxo_ppos = two_df(ss_sum, ann_doxo_ppos)[1] - two_df(ss_sum, ann_doxo_ppos)[0]
delta_contr_ppos = two_df(ss_sum, ann_contr_ppos)[1] - two_df(ss_sum, ann_contr_ppos)[0]

delta_cis_pneg = two_df(ss_sum, ann_cis_pneg)[1] - two_df(ss_sum, ann_cis_pneg)[0]
delta_doxo_pneg = two_df(ss_sum, ann_doxo_pneg)[1] - two_df(ss_sum, ann_doxo_pneg)[0]
delta_contr_pneg = two_df(ss_sum, ann_contr_pneg)[1] - two_df(ss_sum, ann_contr_pneg)[0]

In [None]:
cis_delta = compare_wcontrol(delta_cis_ppos, delta_cis_pneg)
doxo_delta = compare_wcontrol(delta_doxo_ppos, delta_doxo_pneg)
contr_delta = compare_wcontrol(delta_contr_ppos, delta_contr_pneg)

In [None]:
cis_int = interaction_test(data_df, 'Cisplatin', 'Postinduction')
dox_int = interaction_test(data_df, 'Doxorubicin', 'Postinduction')
contr_int = interaction_test(data_df, 'Control', 'Postinduction')

In [None]:
hallmark_order = ['Apical Junction',
 'Epithelial Mesenchymal Transition',
 'Angiogenesis',   
  'UV Response Dn',   
  'Myogenesis',  
  'p53 Pathway',
 'Hypoxia',
  'KRAS Signaling Dn',
 'Wnt-beta Catenin Signaling',
  'TGF-beta Signaling',
 'Estrogen Response Early',
 'Hedgehog Signaling',
 'Androgen Response',
 'Notch Signaling',
 'Apical Surface',
 'Interferon Gamma Response',
  'Xenobiotic Metabolism',
  'Apoptosis',
 'Coagulation',
  'IL-6/JAK/STAT3 Signaling',
 'KRAS Signaling Up',
  'Allograft Rejection',
  'Complement',
 'IL-2/STAT5 Signaling',
 'heme Metabolism',
  'Pancreas Beta Cells',
 'Interferon Alpha Response',
  'PI3K/AKT/mTOR  Signaling',
 'UV Response Up',
 'Inflammatory Response',
 'TNF-alpha Signaling via NF-kB',
 'DNA Repair',
 'Spermatogenesis',
 'Oxidative Phosphorylation', 
 'Reactive Oxygen Species Pathway', 
 'Adipogenesis',
 'Pperoxisome',
 'Bile Acid Metabolism',
 'Fatty Acid Metabolism',
 'Estrogen Response Late',
 'Mitotic Spindle', 
 'E2F Targets',
'G2-M Checkpoint',
 'Myc Targets V1',
 'Myc Targets V2',
 'mTORC1 Signaling',
'Cholesterol Homeostasis',
'Glycolysis',
'Protein Secretion',
'Unfolded Protein Response']

In [None]:
bold_list = ['Interferon Gamma Response', 'IL-6/JAK/STAT3 Signaling','Allograft Rejection',
  'Complement',
 'IL-2/STAT5 Signaling', 'Interferon Alpha Response', 'Inflammatory Response',
 'TNF-alpha Signaling via NF-kB']

In [None]:
delta_result = pd.DataFrame({'Doxorubicin': doxo_delta.log_FC, 'Cisplatin': cis_delta.log_FC, 'No induction': contr_delta.log_FC}).astype(float)
delta_fdr = pd.DataFrame({'Doxorubicin': dox_int.pv_cor, 'Cisplatin': cis_int.pv_cor, 'No induction': contr_int.pv_cor}).astype(float)
delta_pv = pd.DataFrame({'Doxorubicin': dox_int.pvalue, 'Cisplatin': cis_int.pvalue, 'No induction': contr_int.pv_cor}).astype(float)

delta_result = delta_result.reindex(hallmark_order)
delta_fdr = delta_fdr.reindex(hallmark_order)
delta_pv = delta_pv.reindex(hallmark_order)

In [None]:
fdr = (delta_fdr < 0.25).replace({True: "•", False: ''}).T
pv = (delta_pv < 0.05).replace({True: "•", False: ''}).T

combined_annot = pd.DataFrame('', index=fdr.index, columns=fdr.columns)
text_colors = pd.DataFrame('', index=fdr.index, columns=fdr.columns)

for i in range(fdr.shape[0]):
    for j in range(fdr.shape[1]):
        if fdr.iloc[i,j] == '•':
            combined_annot.iloc[i, j] = '•'
            text_colors.iloc[i,j] = 'black'
        elif pv.iloc[i,j] == '•':
            combined_annot.iloc[i, j] = '•'
            text_colors.iloc[i,j] = '#858181'
        else:
            combined_annot.iloc[i, j] = ''
            text_colors.iloc[i,j] = 'red'

In [None]:
mpl.rcParams['font.size'] = 5
ax = sns.heatmap(delta_result.T, center=0, cmap=sns.color_palette("vlag", as_cmap=True), square=True, cbar=True, cbar_kws={'shrink': 0.1, 'anchor': (0.0, 0.5), 'aspect': 6, 'pad': 0.025},
                xticklabels=True, yticklabels=True, annot = False, fmt='', annot_kws = {'color': 'black', 'fontsize': 8})

for i in range(fdr.shape[0]):
    for j in range(fdr.shape[1]):
        text = combined_annot.iloc[i, j]
        if text:
            ax.text(j + 0.5, i + 0.5, text, ha='center', va='center',
                    color=text_colors.iloc[i, j], fontsize=8)

#ax.set_yticklabels(labels = ['Doxorubicin', 'Cisplatin'], size = 5)

#ax.set_xticklabels([])
ax.set(xlabel=None)
cax = ax.figure.axes[-1]
cax.tick_params(width=0.5, length=3)
ax.tick_params(width=0, length=3)
for lab in ax.get_xticklabels():
    text =  lab.get_text()
    if text in bold_list:
        lab.set_weight('bold')
plt.savefig("Supp_Fig4A_Hallmarks_pdl1_interaction.pdf", format="pdf", bbox_inches = 'tight')