# Setup   
## Goals   
   
1. Identify cell types captured in MS1 that express analytes assayed in both in-house and Evetech cytokine assays   
   
## Import packages   

In [1]:
import scanpy as sc
import pandas as pd
import numpy as np
import os

In [5]:
print(sc.__version__)
print(pd.__version__)
print(np.__version__)

1.9.1
1.5.1
1.23.4


## Import data   

In [2]:
ms1 = sc.read_h5ad('/projects/b1038/Pulmonary/nmarkov/script/data/33tram-day0/30obj-export.h5ad')
ms1_covid = sc.read_h5ad('/projects/b1038/Pulmonary/nmarkov/script/data/33tram-day0/30obj-covid-export.h5ad')
print(ms1)

AnnData object with n_obs × n_vars = 105715 × 21819
    obs: 'Patient', 'Day after intubation', 'Sample type', 'Sample', 'Sample status', 'Cluster', 'COVID-19', 'No of SARS-CoV-2 genes', 'IFNA Response', 'IFNG Response', 'No of genes', 'No of UMIs', '% of mito genes', '% of ribo genes'
    var: 'gene_ids-0', 'feature_types-0', 'n_cells-0', 'gene_ids-1', 'feature_types-1', 'n_cells-1', 'gene_ids-10', 'feature_types-10', 'n_cells-10', 'gene_ids-11', 'feature_types-11', 'n_cells-11', 'gene_ids-12', 'feature_types-12', 'n_cells-12', 'gene_ids-13', 'feature_types-13', 'n_cells-13', 'gene_ids-14', 'feature_types-14', 'n_cells-14', 'gene_ids-15', 'feature_types-15', 'n_cells-15', 'gene_ids-16', 'feature_types-16', 'n_cells-16', 'gene_ids-17', 'feature_types-17', 'n_cells-17', 'gene_ids-18', 'feature_types-18', 'n_cells-18', 'gene_ids-2', 'feature_types-2', 'n_cells-2', 'gene_ids-3', 'feature_types-3', 'n_cells-3', 'gene_ids-4', 'feature_types-4', 'n_cells-4', 'gene_ids-5', 'feature_types-5', 

# Identify key expressors   
## Flag genes of interest   

In [3]:
goi = ['CCL21', 'CXCL13', 'CCL27', 'EGF', 'CXCL5',
       'CCL11', 'CCL24', 'CCL26', 'FGF2', 'FLT3LG',
       'CX3CL1', 'CSF3', 'CSF2', 'CXCL1', 'CCL1',
       'IFNA2', 'IFNG', 'IL10', 'IL13', 'IL15',
       'IL16', 'IL17A', 'IL1A', 'IL1B', 'IL1RN',
       'IL2', 'IL20', 'IL21', 'IL23', 'IFNL2',
       'IL3', 'IL33', 'IL4', 'IL5', 'IL6',
       'IL7', 'IL8', 'IL9', 'CXCL10', 'LIF',
       'CCL2', 'CCL8', 'CCL7', 'CCL13', 'CCL22',
       'CCL3', 'CCL4', 'CCL15', 'CD40LG', 'KITLG',
       'CXCL12', 'CCL17', 'TGFA', 'TNF', 'LTA',
       'THPO', 'TNFSF10', 'TSLP', 'VEGFA']

detected = np.intersect1d(goi, ms1.var_names)
undetected = np.setdiff1d(goi, ms1.var_names)
len(detected)

48

## By celltype (COVID only)
### Add celltype to dataset

In [4]:
celltype = [None] * len(ms1.obs["Cluster"])
for i in range(0, len(ms1.obs["Cluster"]) - 1):
    clus = ms1.obs["Cluster"][i]
    if clus == "iNKT cells":
        celltype[i] = "iNKT Cells"
    elif clus == "CD8 cytotoxic T cells" or clus == "CD8 cytotoxic TRM T cells" or clus == "CD8 prolif. T cells":
        celltype[i] = "CD8+ T Cells"
    elif clus == "MoAM1" or clus == "MoAM2" or clus == "MoAM3" or clus == "MoAM4":
        celltype[i] = "MoAM"
    elif clus == "CD4 CM T cells" or clus == "CD4 cytotoxic T cells" or clus == "CD4 prolif. T cells":
        celltype[i] = "CD4+ T Cells"
    elif clus == "TRAM1" or clus == "TRAM2" or clus == "Prolif. AM":
        celltype[i] = "TRAM"
    elif clus == "Treg":
        celltype[i] = "Treg"
    elif clus == "Ciliated cells":
        celltype[i] = "Ciliated Cells"
    elif clus == "Mixed myeloid":
        celltype[i] = "Mixed Myeloid"
    elif clus == "Club, Basal cells":
        celltype[i] = "Club, Basal Cells"
    elif clus == "AT2, AT1 cells":
        celltype[i] = "AT2, AT1 Cells"
    elif clus == "Infected AT2, AT1 cells":
        celltype[i] = "Infected AT2, AT1 Cells"
    elif clus == "B cells":
        celltype[i] = "B Cells"
    elif clus == "pDC":
        celltype[i] = "pDC"
    elif clus == "Plasma cells":
        celltype[i] = "Plasma Cells"
    elif clus == "DC1" or clus == "DC2" or clus == "Migratory DC":
        celltype[i] = "Dendritic Cells"
    elif clus == "Ionocytes":
        celltype[i] = "Ionocytes"
    elif clus == "Mast cells":
        celltype[i] = "Mast Cells"
        
#now add to AnnData
ms1.obs["Celltype"] = celltype

In [6]:
celltype = [None] * len(ms1_covid.obs["Cluster"])
for i in range(0, len(ms1_covid.obs["Cluster"]) - 1):
    clus = ms1_covid.obs["Cluster"][i]
    if clus == "iNKT cells":
        celltype[i] = "iNKT Cells"
    elif clus == "CD8 cytotoxic T cells" or clus == "CD8 cytotoxic TRM T cells" or clus == "CD8 prolif. T cells":
        celltype[i] = "CD8+ T Cells"
    elif clus == "MoAM1" or clus == "MoAM2" or clus == "MoAM3" or clus == "MoAM4":
        celltype[i] = "MoAM"
    elif clus == "CD4 CM T cells" or clus == "CD4 cytotoxic T cells" or clus == "CD4 prolif. T cells":
        celltype[i] = "CD4+ T Cells"
    elif clus == "TRAM1" or clus == "TRAM2" or clus == "Prolif. AM":
        celltype[i] = "TRAM"
    elif clus == "Treg":
        celltype[i] = "Treg"
    elif clus == "Ciliated cells":
        celltype[i] = "Ciliated Cells"
    elif clus == "Mixed myeloid":
        celltype[i] = "Mixed Myeloid"
    elif clus == "Club, Basal cells":
        celltype[i] = "Club, Basal Cells"
    elif clus == "AT2, AT1 cells":
        celltype[i] = "AT2, AT1 Cells"
    elif clus == "Infected AT2, AT1 cells":
        celltype[i] = "Infected AT2, AT1 Cells"
    elif clus == "B cells":
        celltype[i] = "B Cells"
    elif clus == "pDC":
        celltype[i] = "pDC"
    elif clus == "Plasma cells":
        celltype[i] = "Plasma Cells"
    elif clus == "DC1" or clus == "DC2" or clus == "Migratory DC":
        celltype[i] = "Dendritic Cells"
    elif clus == "Ionocytes":
        celltype[i] = "Ionocytes"
    elif clus == "Mast cells":
        celltype[i] = "Mast Cells"
        
#now add to AnnData
ms1_covid.obs["Celltype"] = celltype

In [7]:
cluster_expression = sc.get.obs_df(
    ms1_covid,
    keys=["Celltype", *detected],
    use_raw = True
    )

In [8]:
celltype_expression_means = cluster_expression.groupby("Celltype").mean()
celltype_expression_means.to_csv("/projects/b1038/Pulmonary/rgrant/microglia_aging_flu/scRNAseq/data/230125_celltype_expression.csv")

In [9]:
celltype_expression_means

Unnamed: 0_level_0,CCL1,CCL13,CCL15,CCL17,CCL2,CCL22,CCL24,CCL26,CCL27,CCL3,...,IL6,IL7,KITLG,LIF,LTA,TGFA,TNF,TNFSF10,TSLP,VEGFA
Celltype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"AT2, AT1 Cells",0.0,0.013579,0.0,0.0,0.117845,0.006232,0.0,0.0,0.0,0.019713,...,0.0,0.010698,0.103733,0.031643,0.0,0.05629,0.001545,0.35768,0.063897,0.337826
B Cells,0.0,0.004672,0.0,0.014647,0.279265,0.034092,0.0,0.0,0.0,0.076966,...,0.010517,0.082585,0.0,0.0,0.298613,0.0,0.151751,0.561098,0.0,0.011328
CD4+ T Cells,0.0,0.008248,0.0,0.003008,0.098054,0.000566,0.001022,0.0,9.5e-05,0.217793,...,0.000245,0.022664,0.0,0.005644,0.220298,0.004001,0.214534,0.759788,0.000548,0.001317
CD8+ T Cells,0.000194,0.00752,0.0,0.003115,0.192287,0.000446,0.000544,0.0,0.0,0.338988,...,0.002448,0.006674,0.000839,0.002054,0.112804,0.005365,0.142035,0.788746,0.000764,0.004459
Ciliated Cells,0.0,0.01452,0.079106,0.013772,0.180331,0.000598,0.002018,0.00047,0.0,0.033685,...,0.001657,0.11009,0.050296,0.020292,0.005361,0.052853,0.006987,0.541761,0.001728,0.01134
"Club, Basal Cells",0.0,0.010708,0.0,0.007545,0.102265,0.0,0.0,0.004745,0.0,0.02408,...,0.004865,0.026344,0.103811,0.012552,0.0,0.088301,0.0,1.222194,0.031204,0.172947
Dendritic Cells,0.0,0.00779,0.0,0.604419,0.257715,0.324969,0.003536,0.0,0.0,0.086309,...,0.00923,0.025739,0.00365,0.002102,0.013893,0.024146,0.151499,0.582671,0.0,0.046613
"Infected AT2, AT1 Cells",0.0,0.0,0.0,0.041722,0.152487,0.030698,0.0,0.0,0.0,0.032552,...,0.038165,0.0,0.190189,0.0,0.0,0.028374,0.044061,0.211419,0.0,0.416783
Ionocytes,0.0,0.023563,0.0,0.0,0.125282,0.0,0.0,0.0,0.0,0.13136,...,0.0,0.021421,0.138118,0.0,0.038362,0.100205,0.034779,0.547897,0.023549,0.114463
Mast Cells,0.0,0.0,0.0,0.0,0.122097,0.0,0.0,0.0,0.0,0.042059,...,0.0,0.0,0.0,0.212256,0.0,0.136337,0.0,1.633558,0.0,0.076221


## By subcluster (COVID only)

In [10]:
cluster_expression = sc.get.obs_df(
    ms1_covid,
    keys=["Cluster", *detected],
    use_raw = True
    )

cluster_expression_means = cluster_expression.groupby("Cluster").mean()
cluster_expression_means.to_csv("/projects/b1038/Pulmonary/rgrant/microglia_aging_flu/scRNAseq/data/230125_subcluster_expression.csv")

# Summarize by sample for correlations (including non-COVID)
## By celltype

In [11]:
cor_data_celltype = sc.get.obs_df(
    ms1,
    keys=["Celltype", "Patient", *detected],
    use_raw = True
    )

cor_data_celltype_mean = cor_data_celltype.groupby(["Celltype", "Patient"]).mean()
cor_data_celltype_mean.to_csv("/projects/b1038/Pulmonary/rgrant/microglia_aging_flu/scRNAseq/data/230125_celltype_expression_patient.csv")

## By cluster

In [12]:
cor_data_celltype = sc.get.obs_df(
    ms1,
    keys=["Cluster", "Patient", *detected],
    use_raw = True
    )

cor_data_celltype_mean = cor_data_celltype.groupby(["Cluster", "Patient"]).mean()
cor_data_celltype_mean.to_csv("/projects/b1038/Pulmonary/rgrant/microglia_aging_flu/scRNAseq/data/230125_subcluster_expression_patient.csv")