In [1]:
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import scipy.stats as scistats

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
all_samples = sc.read('all_groups.h5ad')

In [4]:
sc.set_figure_params(dpi=200)

In [5]:
save_path = 'figures'

In [6]:
tp = all_samples.obs['timepoint']
clin_sc = all_samples.obs['clinical_score']
tp_and_cs = []

for t, c in zip(tp, clin_sc):
    tp_and_cs.append(f'Week{t.split("k")[1]}    cs{c}')

all_samples.obs['cs_and_tp'] = tp_and_cs

In [7]:
new_tp = []
for tp in all_samples.obs['timepoint']:
    if tp == 'week2':
        new_tp.append('Week 2')
    elif tp == 'week4':
        new_tp.append('Week 4')
    elif tp == 'week6':
        new_tp.append('Week 6')
all_samples.obs['new_timepoint'] = new_tp

In [8]:
monos = all_samples[(all_samples.obs['Cell type'] == 'Monocyte')]
b_cells = all_samples[(all_samples.obs['Cell type'] == 'B cell')]

In [9]:
monos.shape

(17567, 365)

# Stacked violin with p-values: Different methods to create plots

In [10]:
def violin_with_p_gene(adata_temp, clinical_score, gene, cell_type, axarr, gene_nr, custom_top, altern_H):
    plt.sca(axarr[gene_nr])
    if clinical_score != 'all':
        adata_temp_cs = adata_temp[adata_temp.obs['clinical_score'] == clinical_score]
    else:
        adata_temp_cs = adata_temp
    adata_temp_cs.obs["value"] = 0
    position_gene = -1
    for i, gn in enumerate(adata_temp_cs.var_names):
        if gn == gene:
            position_gene = i

    if cell_type != 'all cell':
        adata_cs_type = adata_temp_cs[adata_temp_cs.obs['Cell type'] == cell_type]
    else:
        adata_cs_type = adata_temp_cs
    adata_cs_type.obs["value"] = 0

    sc.settings.verbosity = 0
    #plt.rcParams["figure.figsize"] = 3, 6

    df = pd.DataFrame({"Timepoint": adata_cs_type.obs['new_timepoint'],
                       "expression": adata_cs_type.layers['two_batch_correction'][:, position_gene]})
    ax = sns.violinplot(data=df, y="expression", x="Timepoint", order=['Week 2', 'Week 4', 'Week 6'])
    
    if clinical_score == 'all':
        ax.set_ylabel(gene)
    else:
        ax.set_ylabel(f'c.s. {clinical_score}')

    week2 = adata_cs_type[adata_cs_type.obs['timepoint'] == 'week2']
    week4 = adata_cs_type[adata_cs_type.obs['timepoint'] == 'week4']
    week6 = adata_cs_type[adata_cs_type.obs['timepoint'] == 'week6']

    p_adj_week2vs4 = scistats.ranksums(week2.layers['two_batch_correction'][:, position_gene],
                                       week4.layers['two_batch_correction'][:, position_gene],
                                       alternative=altern_H)[1]  #* correction
    #print('2 vs 4: ' + str(p_adj_week2vs4))
    p_adj_week2vs6 = scistats.ranksums(week2.layers['two_batch_correction'][:, position_gene],
                                       week6.layers['two_batch_correction'][:, position_gene],
                                       alternative=altern_H)[1]  #* correction
    #print('2 vs 6: ' + str(p_adj_week2vs6))
    p_adj_week4vs6 = scistats.ranksums(week4.layers['two_batch_correction'][:, position_gene],
                                       week6.layers['two_batch_correction'][:, position_gene],
                                       alternative=altern_H)[1]  #* correction
    #print('4 vs 6: ' + str(p_adj_week4vs6))

    top = np.ceil(np.max(adata_cs_type.layers['two_batch_correction'][:, position_gene]))

    ax.annotate("", xy=(0.15, top), xytext=(0.85, top),
                arrowprops={'arrowstyle': '-'}, va='center')
    ax.annotate("", xy=(1.15, top), xytext=(1.85, top),
                arrowprops={'arrowstyle': '-'}, va='center')
    ax.annotate("", xy=(0.15, top + 1.25), xytext=(1.85, top + 1.25),
                arrowprops={'arrowstyle': '-'}, va='center')

    if p_adj_week2vs6 == 0 or p_adj_week2vs6 >= 1:
        ax.annotate(min(int(p_adj_week2vs6), 1), xy=(1, top + 1.4), fontsize=9)
    else:
        ax.annotate('{:.2e}'.format(p_adj_week2vs6), xy=(0.8, top + 1.4), fontsize=9)

    if p_adj_week2vs4 == 0 or p_adj_week2vs4 >= 1:
        ax.annotate(min(int(p_adj_week2vs4), 1), xy=(0.4, top + 0.15), fontsize=9)
    else:
        ax.annotate('{:.2e}'.format(p_adj_week2vs4), xy=(0.2, top + 0.15), fontsize=9)
    if p_adj_week4vs6 == 0 or p_adj_week4vs6 >= 1:
        ax.annotate(min(int(p_adj_week4vs6), 1), xy=(1.4, top + 0.15), fontsize=9)
    else:
        ax.annotate('{:.2e}'.format(p_adj_week4vs6), xy=(1.2, top + 0.15), fontsize=9)

    ax.set_ylim(top=custom_top)

In [11]:
def custom_violin_plot(gene_list, adata, custom_top, save_name):
    fig = plt.figure()
    plt.rcParams["figure.figsize"] = 3, (len(gene_list) * 2)
    gs = fig.add_gridspec(len(gene_list), hspace=0)
    axs = gs.subplots(sharex=True, sharey=True)
    for i in range(len(gene_list)):
        violin_with_p_gene(adata, clinical_score='all', gene=gene_list[i], cell_type='all cell', axarr=axs, gene_nr=i, custom_top=custom_top, altern_H='two-sided')

    # Hide x labels and tick labels for all but bottom plot.
    for ax in axs:
        ax.label_outer()

    plt.xlabel("")

    plt.savefig(f'{save_path}/{save_name}.pdf', bbox_inches='tight')

In [12]:
def violin_stacked_by_cs(gene, adata, custom_top):
    fig = plt.figure()
    plt.rcParams["figure.figsize"] = 3, (6 * 2)
    gs = fig.add_gridspec(6, hspace=0)
    axs = gs.subplots(sharex=True, sharey=True)
    for i, cs in enumerate([0, 11, 14, 17, 19, 26]):
        violin_with_p_gene(adata, clinical_score=cs, gene=gene, cell_type='all cell', axarr=axs, gene_nr=i,
                           custom_top=custom_top, altern_H='greater')

    # Hide x labels and tick labels for all but bottom plot.
    for ax in axs:
        ax.label_outer()

    plt.xlabel("")

    plt.savefig(f'{save_path}/{gene}_stacked_by_cs.pdf', bbox_inches='tight')

# Cytokines

In [13]:
cytokines = ['IL10', 'IL12A', 'IL15', 'IL18', 'IL1A', 'IL1B', 'IL6', 'TGFB1', 'TNF', 'CXCL8']

In [None]:
custom_violin_plot(cytokines[:5], monos, 10, 'cyto_1')
custom_violin_plot(cytokines[5:], monos, 10, 'cyto_2')

# Chemokines

In [15]:
chemokines = ['CCL22', 'CCL3', 'CCL4', 'CCL5', 'CXCL10']

In [None]:
custom_violin_plot(chemokines, monos, custom_top=12.5, save_name='chemokines')

# MHC class I and II

In [17]:
mhcs = [gene for gene in all_samples.var_names if gene.startswith('HLA')]
print(mhcs)

['HLA-A', 'HLA-C', 'HLA-DMA', 'HLA-DMB', 'HLA-DPA1', 'HLA-DPB1', 'HLA-DQA1', 'HLA-DQB1', 'HLA-DRA', 'HLA-DRB3']


In [18]:
cat_order = ['Week6    cs0', 'Week6    cs11', 'Week6    cs14', 'Week6    cs17', 'Week6    cs19', 'Week6    cs26',
             'Week4    cs0', 'Week4    cs11', 'Week4    cs14', 'Week4    cs17', 'Week4    cs19', 'Week4    cs26',
             'Week2    cs0', 'Week2    cs11', 'Week2    cs14', 'Week2    cs17', 'Week2    cs19', 'Week2    cs26']

In [19]:
sc.pl.stacked_violin(monos, mhcs, groupby='cs_and_tp', categories_order=cat_order, show=False,
                     save='HLA_expression_monos.pdf',
                     bbox_inches='tight', cmap='viridis')

{'mainplot_ax': <AxesSubplot:>,
 'color_legend_ax': <AxesSubplot:title={'center':'Median expression\nin group'}>}

In [20]:
sc.pl.stacked_violin(b_cells, mhcs, groupby='cs_and_tp', categories_order=cat_order, show=False,
                     save='HLA_expression__bcells.pdf', bbox_inches='tight', cmap='viridis')

{'mainplot_ax': <AxesSubplot:>,
 'color_legend_ax': <AxesSubplot:title={'center':'Median expression\nin group'}>}

# Individual genes: Violin plots stacked by cs

In [None]:
violin_stacked_by_cs('IL1B', monos, 11)

In [None]:
violin_stacked_by_cs('CXCL8', monos, 11.5)

In [None]:
violin_stacked_by_cs('CCL3', monos, 11)