### Get the at least 1% expressed genes for NicheNet to determine what receptor is expressed

In [6]:
#import the library
import scanpy as sc
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb
import seaborn as sns
import logging
import os
os.chdir(r'E:\AAA_Labwork\Tcell tissues\v2')
import importlib
import warnings
warnings.filterwarnings("ignore")

import pickle as pkl
from matplotlib.colors import LinearSegmentedColormap

In [2]:
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')

In [3]:
#This pallete is for colorblinds
my_palette = ['#0351A8','#8CB0E0','#D56D11','#FFBB78','#234E08','#53CB8B','#D30083','#CB788D','#4E195A','#C58CCF','#AA290F','#B03FD1','#E8BCCF','#64605F','#B2AD9A','#D2D30B','#D1BD4F','#06DCF2','#9EDAE5','#517219','#5B43CF','#D92F24','#FFD900','#002F33','#B8A3A3']

In [4]:
from matplotlib.colors import LinearSegmentedColormap
values = [0, 1]
colors = [(227, 227, 227), (255, 42, 18)]
colors = [tuple(np.array(color) / 255) for color in colors]
my_cmap = LinearSegmentedColormap.from_list('', colors)

In [12]:
def get_expressed_genes(donors, adata, pct=0.01):
    expressed_genes = {}

    for donor in donors:
        adata_sub = adata[adata.obs['batch'] == donor]

        gene_expression = (adata_sub.layers['counts'] > 0).sum(axis=0) / adata_sub.n_obs
        if isinstance(gene_expression, np.matrix) or hasattr(gene_expression, "A1"):
            gene_expression = np.array(gene_expression).flatten()
        expressed_genes[donor] = adata.var_names[gene_expression >= pct].tolist()
    
    return expressed_genes

In [7]:
adata_gut = sc.read_h5ad(r"gut_annotated.h5ad")
adata_liver = sc.read_h5ad(r"liver_annotated.h5ad")

In [9]:
adata_DEG_gut_T = adata_gut[(adata_gut.obs['celltype_lowres'].isin(['T Cells']))&(adata_gut.obs['batch'].isin(['3','4']))]
adata_DEG_gut_Mac = adata_gut[(adata_gut.obs['celltype_lowres'].isin(['Macrophages']))&(adata_gut.obs['batch'].isin(['3','4']))]

In [10]:
adata_DEG_liver_T = adata_liver[(adata_liver.obs['celltype_lowres'].isin(['T Cells']))&(adata_liver.obs['batch'].isin(['3','4']))]
adata_DEG_liver_Mac = adata_liver[(adata_liver.obs['celltype_lowres'].isin(['Macrophages']))&(adata_liver.obs['batch'].isin(['3','4']))]

In [11]:
adata_list = [adata_DEG_gut_T,adata_DEG_gut_Mac,adata_DEG_liver_T,adata_DEG_liver_Mac]

In [18]:
adata_gut.obs['Donor ID']

AAACCTGAGCGTGAAC-1-3    Donor AJD3280
AAACCTGAGCTTCGCG-1-3    Donor AJD3280
AAACCTGAGTATGACA-1-3    Donor AJD3280
AAACCTGAGTTGAGAT-1-3    Donor AJD3280
AAACCTGCACGTAAGG-1-3    Donor AJD3280
                            ...      
TTTGGTTTCTCGATGA-1-5    Donor AJKQ118
TTTGTCAAGGCATGGT-1-5    Donor AJKQ118
TTTGTCACACGAGGTA-1-5    Donor AJKQ118
TTTGTCACAGTCCTTC-1-5    Donor AJKQ118
TTTGTCATCGCACTCT-1-5    Donor AJKQ118
Name: Donor ID, Length: 14218, dtype: category
Categories (3, object): ['Donor AJD3280', 'Donor AJG2309', 'Donor AJKQ118']

In [15]:
save_names = ['gutT','gutMac','liverT','liverMac']
for i, key in enumerate(adata_list): 
    name = save_names[i]
    expressed_genes = get_expressed_genes(['3','4'],key)
    pd.DataFrame(expressed_genes['3']).to_csv('3_'+name+'_receptors.csv')
    pd.DataFrame(expressed_genes['4']).to_csv('4_'+name+'_receptors.csv')

## Prepare cell type profiles

In [None]:
donors = ['Donor AJD3280', 'Donor AJG2309']
for donor in donors:

    liver_df = adata_liver[adata_liver.obs['Donor ID'].isin([donor])].to_df()
    liver_df = liver_df.fillna(0)
    gut_df = adata_gut[adata_gut.obs['Donor ID'].isin([donor])].to_df()
    gut_df = gut_df.fillna(0)

    liver_df['cell type'] = adata_liver[adata_liver.obs['Donor ID'].isin([donor])].obs['celltype_lowres']
    liver_count = liver_df.groupby(['cell type']).mean().unstack().unstack().T
    liver_count = liver_count.iloc[:,list((liver_count == 0).all(axis = 0) == 0)]
    liver_count[liver_count<0.01] = 0

    liver_count.to_csv('nichenet/profiles/'+donor+'_liver_profile.csv')

    gut_df['cell type'] = adata_gut[adata_gut.obs['Donor ID'].isin([donor])].obs['celltype_lowres']
    gut_count = gut_df.groupby(['cell type']).mean().unstack().unstack().T
    gut_count = gut_count.iloc[:,list((gut_count == 0).all(axis = 0) == 0)]
    gut_count[gut_count<0.01] = 0
    gut_count.to_csv('nichenet/profiles/'+donor+'_gut_profile.csv')