In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
import os
import sys
import scanpy as sc
from pyprojroot import here
import session_info
import anndata as ad

# import Spectra as spc

# from Spectra import K_est as kst

sys.path.insert(1, str(here('bin')))
from customPalette import diseases_palette
from customPalette import diseaseCategories


import warnings
warnings.filterwarnings('ignore')

In [2]:
project_dir = here('03_downstream_analysis/07_gene_regulatory_network/')

# Load object

**adata**

In [3]:
input_file_path = "/scratch_isilon/groups/singlecell/shared/projects/Inflammation-PBMCs-Atlas/03_downstream_analysis/02_gene_universe_definition/results/04_MAIN_geneUniverse.log1p.h5ad"
adata = sc.read_h5ad(input_file_path)

In [4]:
adata

AnnData object with n_obs × n_vars = 4435922 × 8253
    obs: 'studyID', 'libraryID', 'sampleID', 'chemistry', 'disease', 'sex', 'binned_age', 'Level1', 'Level2'
    var: 'hgnc_id', 'symbol', 'locus_group', 'HUGO_status', 'highly_variable'
    uns: 'log1p'

In [5]:
adata.uns

{'log1p': {}}

# Subset adata

In [10]:
# Subset cells based on studyID and disease
studyIDs_of_interest = ['SCGT00', 'SCGT04', 'SCGT02']
diseases_of_interest = ['healthy', 'cirrhosis']

adata_subset = adata[
    adata.obs['studyID'].isin(studyIDs_of_interest) &
    adata.obs['disease'].isin(diseases_of_interest) &
    adata.obs['Level1'].isin(["Mono"])
]
adata_subset

View of AnnData object with n_obs × n_vars = 78149 × 8253
    obs: 'studyID', 'libraryID', 'sampleID', 'chemistry', 'disease', 'sex', 'binned_age', 'Level1', 'Level2'
    var: 'hgnc_id', 'symbol', 'locus_group', 'HUGO_status', 'highly_variable'
    uns: 'log1p'

In [11]:
adata_subset.obs[["disease", "studyID", "Level1"]].value_counts()

disease    studyID  Level1
cirrhosis  SCGT04   Mono      63102
healthy    SCGT02   Mono       9594
           SCGT00   Mono       5453
Name: count, dtype: int64

**Genes: SP1 and STAT1 targets**

In [14]:
genes_of_interest = [
    "OAS1", "IFITM3", "S100A10", "FTH1", "ADGRG1", "S100A6", "GAPDH",
    "MX2", "ACTB", "ISG20", "MT2A", "IFI27", "IFIT2", "IFIT3", "PRF1",
    "ADAR", "IFI6", "IFIT1", "IFITM1", "ISG15", "MX1", "RAC2", "LGALS1",
    "PSMB9", "FOS", "SPI1", "VIM", "S100A8", "TKT", "TIMP1", "GSTP1",
    "ITGAX", "SERPINA1", "FGL2", "LYZ", "TSPO", "SLC11A1", "TNFSF13",
    "STXBP2", "LGALS3", "PSAP", "FBP1", "TYMP", "COX4I1", "CD86",
    "UPP1", "LY96", "LAP3", "CALM1", "SRSF7", "FUS"
]
len(genes_of_interest)

51

In [15]:
adata_subset = adata_subset[:, adata_subset.var.symbol.isin(genes_of_interest)]
adata_subset

View of AnnData object with n_obs × n_vars = 78149 × 51
    obs: 'studyID', 'libraryID', 'sampleID', 'chemistry', 'disease', 'sex', 'binned_age', 'Level1', 'Level2'
    var: 'hgnc_id', 'symbol', 'locus_group', 'HUGO_status', 'highly_variable'
    uns: 'log1p'

# Save processed adata

In [16]:
adata_subset

View of AnnData object with n_obs × n_vars = 78149 × 51
    obs: 'studyID', 'libraryID', 'sampleID', 'chemistry', 'disease', 'sex', 'binned_age', 'Level1', 'Level2'
    var: 'hgnc_id', 'symbol', 'locus_group', 'HUGO_status', 'highly_variable'
    uns: 'log1p'

In [17]:
adata_subset.write(here("03_downstream_analysis/07_gene_regulatory_network/results/04_MAIN_geneUniverse.log1p_Cirrhosissubset.h5ad"), compression="gzip")

In [18]:
here("03_downstream_analysis/07_gene_regulatory_network/results/04_MAIN_geneUniverse.log1p_Cirrhosissubset.h5ad")

PosixPath('/scratch_isilon/groups/singlecell/shared/projects/Inflammation-PBMCs-Atlas/03_downstream_analysis/07_gene_regulatory_network/results/04_MAIN_geneUniverse.log1p_Cirrhosissubset.h5ad')