In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
import os
import sys
import scanpy as sc
from pyprojroot import here
import session_info
import anndata as ad

# import Spectra as spc

# from Spectra import K_est as kst

sys.path.insert(1, str(here('bin')))
from customPalette import diseases_palette
from customPalette import diseaseCategories


import warnings
warnings.filterwarnings('ignore')

In [2]:
project_dir = here('03_downstream_analysis/07_gene_regulatory_network/')

# Load object

**adata**

In [3]:
input_file_path = "/scratch_isilon/groups/singlecell/shared/projects/Inflammation-PBMCs-Atlas/03_downstream_analysis/02_gene_universe_definition/results/04_MAIN_geneUniverse.log1p.h5ad"
adata = sc.read_h5ad(input_file_path)

In [9]:
adata

AnnData object with n_obs × n_vars = 4435922 × 8253
    obs: 'studyID', 'libraryID', 'sampleID', 'chemistry', 'disease', 'sex', 'binned_age', 'Level1', 'Level2'
    var: 'hgnc_id', 'symbol', 'locus_group', 'HUGO_status', 'highly_variable'
    uns: 'log1p'

In [8]:
adata.uns

{'log1p': {}}

**Metadata**

In [14]:
SLE_metadata = pd.read_csv(here("03_downstream_analysis/06_inflammation_signatures/results/figures/IFN_response_analysis/SLE_metadata.csv"))
SLE_metadata.head()

Unnamed: 0,sampleID,SLEDAI_score,Flare,Responder,studyID
0,Perez2022_1004_T0,2.0,notF,,Perez2022
1,Perez2022_1014_T0,2.0,notF,,Perez2022
2,Perez2022_1019_T0,0.0,notF,,Perez2022
3,Perez2022_1022_T0,2.0,notF,,Perez2022
4,Perez2022_1031_T0,2.0,notF,,Perez2022


# Subset adata

**Cells: SLE or healthy from CNAG and Jimmi Ye**

In [9]:
# Subset cells based on studyID and disease
studyIDs_of_interest = ['Perez2022', 'SCGT00']
diseases_of_interest = ['healthy', 'SLE']

adata_subset = adata[
    adata.obs['studyID'].isin(studyIDs_of_interest) &
    adata.obs['disease'].isin(diseases_of_interest)
]
adata_subset

View of AnnData object with n_obs × n_vars = 996812 × 8253
    obs: 'studyID', 'libraryID', 'sampleID', 'chemistry', 'disease', 'sex', 'binned_age', 'Level1', 'Level2'
    var: 'hgnc_id', 'symbol', 'locus_group', 'HUGO_status', 'highly_variable'
    uns: 'log1p'

**Genes: SPI1 and STAT1 targets**

In [11]:
genes_of_interest = [
    "OAS1", "IFITM3", "S100A10", "FTH1", "ADGRG1", "S100A6", "GAPDH",
    "MX2", "ACTB", "ISG20", "MT2A", "IFI27", "IFIT2", "IFIT3", "PRF1",
    "ADAR", "IFI6", "IFIT1", "IFITM1", "ISG15", "MX1", "RAC2", "LGALS1",
    "PSMB9", "FOS", "SPI1", "VIM", "S100A8", "TKT", "TIMP1", "GSTP1",
    "ITGAX", "SERPINA1", "FGL2", "LYZ", "TSPO", "SLC11A1", "TNFSF13",
    "STXBP2", "LGALS3", "PSAP", "FBP1", "TYMP", "COX4I1", "CD86",
    "UPP1", "LY96", "LAP3", "CALM1", "SRSF7", "FUS"
]
len(genes_of_interest)

51

In [10]:
adata_subset = adata_subset[:, adata_subset.var.symbol.isin(genes_of_interest)]
adata_subset

View of AnnData object with n_obs × n_vars = 996812 × 51
    obs: 'studyID', 'libraryID', 'sampleID', 'chemistry', 'disease', 'sex', 'binned_age', 'Level1', 'Level2'
    var: 'hgnc_id', 'symbol', 'locus_group', 'HUGO_status', 'highly_variable'
    uns: 'log1p'

# Add Metadata

In [21]:
merged_df = pd.merge(adata_subset.obs, SLE_metadata[["sampleID","SLEDAI_score","Flare","Responder"]], on='sampleID', how='left')

In [24]:
adata_subset.obs = merged_df

In [25]:
adata_subset

AnnData object with n_obs × n_vars = 996812 × 51
    obs: 'studyID', 'libraryID', 'sampleID', 'chemistry', 'disease', 'sex', 'binned_age', 'Level1', 'Level2', 'SLEDAI_score', 'Flare', 'Responder'
    var: 'hgnc_id', 'symbol', 'locus_group', 'HUGO_status', 'highly_variable'
    uns: 'log1p'

# Save processed adata

In [26]:
adata_subset

AnnData object with n_obs × n_vars = 996812 × 51
    obs: 'studyID', 'libraryID', 'sampleID', 'chemistry', 'disease', 'sex', 'binned_age', 'Level1', 'Level2', 'SLEDAI_score', 'Flare', 'Responder'
    var: 'hgnc_id', 'symbol', 'locus_group', 'HUGO_status', 'highly_variable'
    uns: 'log1p'

In [27]:
adata_subset.write(here("03_downstream_analysis/07_gene_regulatory_network/results/04_MAIN_geneUniverse.log1p_SLEsubset.h5ad"), compression="gzip")

In [28]:
here("03_downstream_analysis/07_gene_regulatory_network/results/04_MAIN_geneUniverse.log1p_SLEsubset.h5ad")

PosixPath('/scratch_isilon/groups/singlecell/shared/projects/Inflammation-PBMCs-Atlas/03_downstream_analysis/07_gene_regulatory_network/results/04_MAIN_geneUniverse.log1p_SLEsubset.h5ad')

In [31]:
adata_subset.obs.studyID.value_counts()

Perez2022    860914
SCGT00       135898
Name: studyID, dtype: int64