In [8]:
import scanpy as sc
import statistics
import math
import matplotlib.pyplot as plt
import scipy.stats as scistats
import pandas as pd
import seaborn as sns
import numpy as np

In [9]:
import warnings
warnings.filterwarnings('ignore')

In [10]:
all_samples = sc.read('all_groups.h5ad')

In [11]:
save_path = 'figures'

In [12]:
sc.set_figure_params(dpi=80)

In [13]:
new_tp = []
for tp in all_samples.obs['timepoint']:
    if tp == 'week2':
        new_tp.append('Week 2')
    elif tp == 'week4':
        new_tp.append('Week 4')
    elif tp == 'week6':
        new_tp.append('Week 6')
all_samples.obs['new_timepoint'] = new_tp

In [14]:
# annotate subcluster of interest as SOI
all_samples.obs['doublet_SOI'] = np.where(all_samples.obs['leiden_clusters_res15'] == '18', 'SOI', all_samples.obs['Cell type'])

In [15]:
soi = all_samples[all_samples.obs['doublet_SOI'] == 'SOI']

In [16]:
annotated_cells = all_samples[(all_samples.obs['doublet_SOI'] != 'n.a.')]

# 4b: Marker gene expression

In [17]:
anno = {'T cell': ['CD3D', 'CD3E'],
        'abT cell': ['TRAC', 'TRBC2'],
        'CD4 T cell': ['CD4', 'IL7R'],
        'CD8 T cell': ['CD8A', 'CD8B'],
        'gdT cell': 'TRDC',
        'NK/NKT cell': 'KLRB1',
        'NK cell': ['GNLY', 'GZMB', 'NKG7'],
        'B cell': 'MS4A1',
        'Monocyte': 'LGALS3',
        'Dendritic cell': 'IRF4',
        }

ax = sc.pl.dotplot(annotated_cells, anno, groupby='doublet_SOI', use_raw=False, var_group_rotation=90,
                   swap_axes=False, dendrogram=False, show=False, save="new_SOI_Marker_genes_dotplot.pdf")



# 4d: Violin plots depicting UMI counts

In [None]:
sc.settings.verbosity = 0
plt.rcParams["figure.figsize"] = 6, 4

df = pd.DataFrame({"Cell type": annotated_cells.obs['doublet_SOI'],
                   "#UMI": annotated_cells.obs['#reads']})
order = ['SOI','B cell','CD4 T cell','CD8 T cell','DC','Monocyte','NK T cell','NK cell','gdT cell']
ax = sns.violinplot(data=df, y="#UMI", x="Cell type", color='orange', order=order)
ax.set_ylabel('UMI count')
ax.tick_params(axis='x', rotation=70)

monos = annotated_cells[annotated_cells.obs['doublet_SOI'] == 'Monocyte']
soi = annotated_cells[annotated_cells.obs['doublet_SOI'] == 'SOI']

p_adj_soi_vs_monos = scistats.ranksums(soi.obs['#reads'],
                                       monos.obs['#reads'], alternative='greater')[1]

print(p_adj_soi_vs_monos)
ax.set_ylim(bottom=0, top=1250)

plt.savefig(f"{save_path}/Violin_UMI_count_SOI.pdf", bbox_inches='tight')
plt.show()

In [19]:
max(annotated_cells.obs['#reads'])

1248.0

# 4f: Correlation: HIF1A and clinical score

In [20]:
def correlation_logfc_cs(adata, gene, cell_type):
    if cell_type != 'all':
        adata = adata[(adata.obs['doublet_SOI'] == cell_type)]
    pos_gene = -1
    for i, gn in enumerate(adata.var_names):
        if gn == gene:
            pos_gene = i

    plt.figure(figsize=(4, 4))
    logfc_values_week4, logfc_values_week6, cs_values = [], [], []
    for cs in list(set(adata.obs['clinical_score'])):
        adata_cs = adata[(adata.obs['clinical_score'] == cs)]
        week2 = adata_cs[(adata_cs.obs['timepoint'] == 'week2')]
        week4 = adata_cs[(adata_cs.obs['timepoint'] == 'week4')]
        week6 = adata_cs[(adata_cs.obs['timepoint'] == 'week6')]

        #calculate mean of normalized (NOT log-normalized) counts
        mean_week2 = statistics.mean(week2.layers['two_batch_correction'][:, pos_gene])
        mean_week4 = statistics.mean(week4.layers['two_batch_correction'][:, pos_gene])
        mean_week6 = statistics.mean(week6.layers['two_batch_correction'][:, pos_gene])
        lfc_week2vs4 = mean_week2 / mean_week4
        lfc_week2vs6 = mean_week2 / mean_week6

        logfc_values_week4.append(math.log2(lfc_week2vs4))
        logfc_values_week6.append(math.log2(lfc_week2vs6))
        cs_values.append(cs)

    x = np.array(cs_values)
    y = np.array(logfc_values_week4)
    plt.scatter(x, y, color='blue')
    plt.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1))(np.unique(x)), color='blue', label="Week 2 vs 4")
    y = np.array(logfc_values_week6)
    plt.scatter(x, y, color='red')
    plt.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1))(np.unique(x)), color='red', label="Week 2 vs 6")

    x = pd.Series(cs_values)
    y = pd.Series(logfc_values_week4)
    pcc, p_val = scistats.mstats.pearsonr(x, y)
    print(f"week 2 vs 4: PCC: {pcc}; p-value: {p_val}")
    plt.text(11, -0.25, f'PCC:{round(pcc, 2)}; p:{round(p_val, 2)}', color='blue')

    y = pd.Series(logfc_values_week6)
    pcc, p_val = scistats.mstats.pearsonr(x, y)
    print(f"week 2 vs 6: PCC: {pcc}; p-value: {p_val}")
    plt.text(11, -0.4, f'PCC:{round(pcc, 2)}; p:{round(p_val, 2)}', color='red')

    plt.legend()
    plt.xlabel('Clinical score')
    plt.ylabel('Log2 fold change')
    plt.savefig(f'{save_path}/SOI_HIF1A_cs_correlation.pdf', bbox_inches='tight')

In [None]:
correlation_logfc_cs(all_samples, 'HIF1A', 'SOI')

# 4c: UMAP: Marker gene expression

In [22]:
sc.pp.neighbors(soi)
sc.tl.umap(soi)

In [23]:
marker_genes = ['CD3D', 'TRAC', 'CD8A', 'TRDC', 'IL7R', 'LGALS3', 'KLRB1', 'GNLY', 'GZMB', 'NKG7']

In [None]:
plt.rcParams["figure.figsize"] = 4, 3
sc.pl.umap(soi, color=marker_genes[:2], save='SOI_part1_V1.pdf')
sc.pl.umap(soi, color=marker_genes[2:5], save='SOI_part2_V1.pdf')
sc.pl.umap(soi, color=marker_genes[5:7], save='SOI_part3_V1.pdf')
sc.pl.umap(soi, color=marker_genes[7:10], save='SOI_part4_V1.pdf')

# 4e: Violins stacked by cs

In [25]:
def violin_with_p_gene(adata_temp, clinical_score, gene, cell_type, axarr, gene_nr, custom_top):
    plt.sca(axarr[gene_nr])
    if clinical_score != 'all':
        adata_temp_cs = adata_temp[adata_temp.obs['clinical_score'] == clinical_score]
    else:
        adata_temp_cs = adata_temp
    adata_temp_cs.obs["value"] = 0
    position_gene = -1
    for i, gn in enumerate(adata_temp_cs.var_names):
        if gn == gene:
            position_gene = i

    if cell_type != 'all cell':
        adata_cs_type = adata_temp_cs[adata_temp_cs.obs['Cell type'] == cell_type]
    else:
        adata_cs_type = adata_temp_cs
    adata_cs_type.obs["value"] = 0

    sc.settings.verbosity = 0

    df = pd.DataFrame({"Timepoint": adata_cs_type.obs['new_timepoint'],
                       "expression": adata_cs_type.layers['two_batch_correction'][:, position_gene]})
    ax = sns.violinplot(data=df, y="expression", x="Timepoint", order=['Week 2', 'Week 4', 'Week 6'])
    ax.set_xlabel(f'c.s. {clinical_score}')

    week2 = adata_cs_type[adata_cs_type.obs['timepoint'] == 'week2']
    week4 = adata_cs_type[adata_cs_type.obs['timepoint'] == 'week4']
    week6 = adata_cs_type[adata_cs_type.obs['timepoint'] == 'week6']

    p_adj_week2vs4 = scistats.ranksums(week2.layers['two_batch_correction'][:, position_gene],
                                       week4.layers['two_batch_correction'][:, position_gene],
                                       alternative='greater')[1]
    p_adj_week2vs6 = scistats.ranksums(week2.layers['two_batch_correction'][:, position_gene],
                                       week6.layers['two_batch_correction'][:, position_gene],
                                       alternative='greater')[1]
    p_adj_week4vs6 = scistats.ranksums(week4.layers['two_batch_correction'][:, position_gene],
                                       week6.layers['two_batch_correction'][:, position_gene],
                                       alternative='greater')[1]

    top = np.ceil(np.max(adata_cs_type.layers['two_batch_correction'][:, position_gene])) +1

    ax.annotate("", xy=(0.15, top), xytext=(0.85, top),
                arrowprops={'arrowstyle': '-'}, va='center')
    ax.annotate("", xy=(1.15, top), xytext=(1.85, top),
                arrowprops={'arrowstyle': '-'}, va='center')
    ax.annotate("", xy=(0.15, top + 1.25), xytext=(1.85, top + 1.25),
                arrowprops={'arrowstyle': '-'}, va='center')

    if p_adj_week2vs6 == 0 or p_adj_week2vs6 >= 1:
        ax.annotate(min(int(p_adj_week2vs6), 1), xy=(1, top + 1.4), fontsize=9)
    else:
        ax.annotate('{:.2e}'.format(p_adj_week2vs6), xy=(0.8, top + 1.4), fontsize=9)

    if p_adj_week2vs4 == 0 or p_adj_week2vs4 >= 1:
        ax.annotate(min(int(p_adj_week2vs4), 1), xy=(0.4, top + 0.15), fontsize=9)
    else:
        ax.annotate('{:.2e}'.format(p_adj_week2vs4), xy=(0.2, top + 0.15), fontsize=9)
    if p_adj_week4vs6 == 0 or p_adj_week4vs6 >= 1:
        ax.annotate(min(int(p_adj_week4vs6), 1), xy=(1.4, top + 0.15), fontsize=9)
    else:
        ax.annotate('{:.2e}'.format(p_adj_week4vs6), xy=(1.2, top + 0.15), fontsize=9)

    plt.xticks(rotation=45,ha='right', fontsize=12.5)
    ax.set_ylim(top=custom_top)

In [26]:
def violin_stacked_by_cs(gene, adata, custom_top):
    fig = plt.figure()
    plt.rcParams["figure.figsize"] = (6 * 2.5),2.3
    gs = fig.add_gridspec(1,6, wspace=0)
    axs = gs.subplots(sharex=True, sharey=True)
    for i, cs in enumerate([0, 11, 14, 17, 19, 26]):
        violin_with_p_gene(adata, clinical_score=cs, gene=gene, cell_type='all cell', axarr=axs, gene_nr=i, custom_top=custom_top)

    # Hide x labels and tick labels for all but bottom plot.
    for ax in axs:
        ax.label_outer()

    plt.savefig(f'{save_path}/{gene}_SOI_stacked_by_cs.pdf', bbox_inches='tight')

In [None]:
violin_stacked_by_cs('HIF1A', soi, 11)