# cytokines in cell types

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import scanpy as sc
import os
from matplotlib import rcParams
from matplotlib import font_manager
import matplotlib.pyplot as plt
rcParams['pdf.fonttype'] = 42
sc.settings.set_figure_params(dpi = 150, color_map = 'RdPu', dpi_save = 150, vector_friendly = True, format = 'pdf')
font_manager.fontManager.addfont("...software/Arial.ttf")
print(font_manager.findfont("Arial"))
plt.rcParams["font.sans-serif"] = ["Arial"]
sc.settings.set_figure_params(dpi = 150, color_map = 'RdPu', dpi_save = 300, vector_friendly = True, format = 'pdf')

In [None]:
os.chdir('.../clean/')
adata = sc.read_h5ad(filename='adata_full_rev_4_clean_scvi.h5ad')

In [None]:
adata

In [None]:
cells = list(np.unique(adata.obs['cell_type_level_0']))
cells

In [None]:
adata[adata.obs['cell_type_level_0'].isin(['T_DN','T_DP','T_SP'])].obs['age_group'].value_counts()

In [None]:
adata[adata.obs['cell_type_level_0'].isin(['B',
 'Epithelial',
 'Myeloid',
 'RBC',
 'Schwann',
 'Stroma',])].obs['study_group'].value_counts()

In [None]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)

In [None]:
os.chdir('...Figure_3_nat/')
anova_results = pd.read_csv('...Figure_3_nat/anova_model_results.csv')

In [None]:
# subset genes to these that have low cosine similarity or these that were significantly differnt (bonferonni FDR<0.05) in the anova interaction test 
diff_genes = pd.concat([
    anova_results['Gene'], 
    (anova_results['Cosine Similarity'] < 0.5).rename('low_cos_similarity'), 
    (anova_results['Interaction_rejected']).rename('interaction_H0_rejected')], axis=1)
diff_genes = diff_genes[(diff_genes['low_cos_similarity']) | (diff_genes['interaction_H0_rejected'])]
diff_genes

In [None]:
diff_genes.to_csv('...Figure_3_nat/diff_genes.csv')

In [None]:
# plot  
# del adata_tmp
import numpy as np
anno = 'cell_type_level_1'
cells = ['B', 'DC', 'EC', 'Fb', 'Macrophage', 'Mast', 'Mesothelium', 'Mono',
       'Myeloid_progenitor', 'Schwann', 'TEC',
       'TEC-mimetic', 'T_CD4', 'T_CD8', 'T_DN', 'T_DN(early)', 'T_DP',
       'T_NK', 'T_Treg', 'T_innate', 'T_αβT(entry)', 'Vascular',]

genes = diff_genes['Gene']
adata_tmp = adata.copy()

# subset to cells and generate new category for plotting 
adata_tmp.obs[anno] = pd.Categorical(adata_tmp.obs[anno], categories=cells, ordered=True)
adata_tmp = adata_tmp[~adata_tmp.obs[anno].isna()]
groupby = [anno,'age_group']       
adata_tmp.obs['groupby_combined'] = adata_tmp.obs[groupby].astype(str).apply('_'.join, axis=1)

# Filter for groups with at least 50 cells
group_sizes = adata_tmp.obs.groupby('groupby_combined').size()
groups_to_keep = group_sizes[group_sizes >= 50].index
adata_tmp = adata_tmp[adata_tmp.obs['groupby_combined'].isin(groups_to_keep)]

# plotting 
fig = sc.pl.dotplot(adata_tmp,
                    var_names=genes,
#                     swap_axes=True,
                    groupby='groupby_combined',
                    standard_scale='var',
                    return_fig=True,)

In [None]:
fig.add_totals().style(dot_edge_color='black', dot_edge_lw=0.5, cmap="Reds")
# plt.show()

fig.savefig('figures/Diff_cyt.pdf')

In [None]:
# plot for figure 6 

In [None]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)

In [None]:
genes = ['CCL25','CXCL12','CCL19','CCL21','CCL17','CCL22','CXCL9','CXCL10','CXCL11']

In [None]:
# plot  
# del adata_tmp
import numpy as np
anno = 'cell_type_level_4'
bdata = adata[adata.obs['age_group']!='fetal'] # subset to paed
# subset by cell count
value_counts = bdata.obs[anno].value_counts()
filtered_value_counts = value_counts[value_counts > 50]
filtered_cell_types = list(filtered_value_counts.index)
bdata = bdata[bdata.obs[anno].isin(filtered_cell_types)]
# # subset by relative expression 
data_subset  = bdata[:, genes].to_df()
data_subset[anno] = bdata.obs[anno]
mean_expression_per_cell_type = data_subset.groupby(anno).mean()
scaled_expression = mean_expression_per_cell_type.div(mean_expression_per_cell_type.max())
filtered_scaled_expression = scaled_expression[scaled_expression.apply(lambda x: (x > 0.2).any(), axis=1)] # remove cells that don't have at least on gene that is expressed over 0.2 in the group
bdata = bdata[bdata.obs[anno].isin(filtered_scaled_expression.index.tolist())]

In [None]:
ordered_categories_list = [
 'cTECIII',
 'cTECII',
 'cTECI',
 'mcTEC',
 'mcTEC-Prolif',
 'mTECI',
 'mTECII',
 'mTECIII',
 'mTECI-trans',
 'medFb',
 'medFB-MHCIIh',
 'medFb-RGS5',
 'EC-Art',
 'EC-Art-ELN',
 'EC-Ven',
 'EC-Cap',
 'EC-Lymphatic',
 'InterloFb',
 'PeriloFb',
 'Pericyte_CCL19',
 'Pericyte_COL1A1',   
 'B-Prolif',
 'B-memory',
 'Macrophage-APOC2',
 'DC1',
 'DC2',
 'DC2-Prolif',
 'aDC1',
 'aDC2',
 'aDC3',
 ]
print("see we didn't drop anything")
len(np.unique(ordered_categories_list))
len(filtered_scaled_expression.index.tolist())
bdata.obs[anno] = pd.Categorical(bdata.obs[anno], categories=ordered_categories_list, ordered=True)


In [None]:
# plotting 
fig = sc.pl.dotplot(bdata,
                    var_names=genes,
#                     swap_axes=True,
                    groupby=anno,
                    standard_scale='var',
                    return_fig=True,)
fig.add_totals().style(dot_edge_color='black', dot_edge_lw=0.5, cmap="Reds")
# plt.show()
os.chdir('...Figure_6_nat/')
fig.savefig('figures/cytokines_cells_paed_count_over_50_expression_over_02.pdf')