In [1]:
import scanpy as sc
import pandas as pd
import numpy as np

In [2]:
adata = sc.read("/Users/jinmr2/Dropbox/Code/data/batch2_all_raw_filtered.h5ad")

In [10]:
adata

AnnData object with n_obs × n_vars = 24673 × 35635
    obs: 'tsne1', 'tsne2', 'ind', 'stim', 'cluster', 'cell', 'multiplets'
    var: 'gene symbol'
    uns: 'rank_genes_groups'

In [3]:
sc.tl.rank_genes_groups(adata, groupby = "cell", pts = True)



In [4]:
adata.uns["rank_genes_groups"].keys()

dict_keys(['params', 'pts', 'pts_rest', 'names', 'scores', 'pvals', 'pvals_adj', 'logfoldchanges'])

In [7]:
df_deg = format_DEGs(adata)

In [6]:
def format_DEGs(adata):
    keys = ["names","scores","logfoldchanges","pvals","pvals_adj","pts","pts_rest"]
    for i,key in enumerate(keys):
        a = pd.DataFrame(adata.uns["rank_genes_groups"][key]) # transfer to data frame
        b = pd.DataFrame(a.values.T.reshape(1,a.shape[0]*a.shape[1]).T) # reformat the data frame to one column
           
        if i == 0:
            b.columns = [key] # rename the column name
            b["Status"] = sorted(list(a.columns)*a.shape[0]) # add Status annotation
            b.set_index([key],inplace=True)
            b_merged = b
        else:
            if key in ["pts","pts_rest"]:
                pts_all = []
                for cell_group in np.unique(b_merged["Status"]):
                    genes = b_merged.loc[b_merged["Status"] == cell_group,:].index.values
                    pts_all = pts_all + list(a.loc[genes, cell_group])
                b_merged[key] = pts_all
            else:
                b_merged[key] = list(b[0])
        
    return b_merged

In [8]:
df_deg

Unnamed: 0_level_0,Status,scores,logfoldchanges,pvals,pvals_adj,pts,pts_rest
names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CD74,B cells,51.954353,12.832310,0.000000e+00,0.000000e+00,0.990284,0.526109
CD79A,B cells,45.080624,7.622758,0.000000e+00,0.000000e+00,0.690245,0.012670
RPL18A,B cells,32.703323,5.058470,3.407655e-201,6.132919e-199,0.996891,0.947783
MS4A1,B cells,32.226830,7.371540,9.420891e-192,1.568754e-189,0.456277,0.005158
CD37,B cells,32.126408,2.349647,1.389609e-192,2.335789e-190,0.650991,0.296561
...,...,...,...,...,...,...,...
S100A10,NK cells,-72.303185,-5.342782,0.000000e+00,0.000000e+00,0.230808,0.541623
PABPC1,NK cells,-73.809273,-5.536338,0.000000e+00,0.000000e+00,0.434521,0.839638
FTL,NK cells,-76.292999,-72.683151,0.000000e+00,0.000000e+00,0.810336,0.875705
S100A11,NK cells,-77.062469,-6.259836,0.000000e+00,0.000000e+00,0.357250,0.590476


In [9]:
df_deg.to_csv("DEGs_cell.txt", sep = "\t")