# Classify cells by previously published gene expression profiles

## Import statements

In [1]:
import os,sys
import datetime

In [2]:
import scanpy as sc
sc.logging.print_versions()
sc.logging.print_memory_usage()
sc.settings.verbosity = 2

scanpy==1.4.4.post1 anndata==0.6.22.post1 umap==0.3.7 numpy==1.15.4 scipy==1.3.1 pandas==0.23.4 scikit-learn==0.20.1 statsmodels==0.10.1 python-igraph==0.7.1 louvain==0.6.1
Memory usage: current 0.20 GB, difference +0.20 GB


In [3]:
## This cell is run once to download my custom functions and import statements from github
#
#!git clone --depth=1 https://github.com/rapolaszilionis/utility_functions
#    
## github doesn't seem to have an option to download a specific version of the repo from the history.
## So I download my utility functions and save the download time by appending it to the directory name.
## These utility functions to be shared together with the notebook.
#
#toappend = datetime.datetime.now().strftime('%y%m%d_%Hh%M')
#newname = "utility_functions_%s"%toappend
#print(newname)
#
#
## rename the py file with utility functions
#os.rename("utility_functions",newname)

In [4]:
# add the utility function folder to PATH
sys.path.append(os.path.abspath("utility_functions_200517_09h14/"))

from rz_import_statements import *
import rz_functions as rz
import rz_utility_spring as srz

python version: 3.6.7


## Load  scRNAseq data to classify

In [30]:
adata = sc.read_h5ad('backups/mito_total_counts_filt_raw_27563x40930_200517_10h29.h5ad')

In [6]:
# for the bayesian classifier, one should conceptually use raw counts
# this is because under the hood, the gene counts for a cell i are considered a
# sample from a multinomial distribution.
# in practise this doesn't really matter for finding the most likely profile (max likelihood),
# but does change the likelihood values. Raw counts are not always available.

# check that we are dealing with raw counts (total counts should be integers and different for each cell)
adata.X.sum(axis=1)[:5]

matrix([[14875.],
        [ 6876.],
        [ 4643.],
        [ 5972.],
        [ 5021.]], dtype=float32)

## Classify by Immunity 2019 profiles 
Data from supplementary tables 2 and 7 of [Zilionis et al, 2019 Immunity](https://www.cell.com/immunity/fulltext/S1074-7613(19)30126-8?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS1074761319301268%3Fshowall%3Dtrue).

### Load data

In [7]:
cat0 = pd.read_excel('Zilionis_et_al_2019_supplementary_tables/Table_S2.xlsx',
                    sheet_name = 'mouse_imm_minor',
                    skiprows=1,index_col=0)
cat0.head()

Unnamed: 0,mB cells,mBasophils,mDC1,mDC2,mDC3,mMac1,mMac2,mMac3,mMac4,mMono1,...,mN2,mN3,mN4,mN5,mN6,mNK cells,mT1,mT2,mT3,mpDC
0610007P14Rik,0.089767,0.0,0.063403,0.174103,0.114397,0.12858,0.11421,0.038328,0.191185,0.064419,...,0.020442,0.010433,0.046293,0.068089,0.010836,0.114067,0.115641,0.098038,0.259043,0.290715
0610009B22Rik,0.017358,0.0,0.132578,0.024709,0.008071,0.040755,0.02549,0.0,0.037833,0.024209,...,0.008132,0.011201,0.037617,0.059525,0.003548,0.034531,0.007538,0.00793,0.0,0.066908
0610009L18Rik,0.003314,0.0,0.0,0.0,0.004949,0.0,0.0,0.007517,0.0,0.001792,...,0.0,0.0,0.007343,0.011623,0.016644,0.004382,0.006455,0.028907,0.0,0.018451
0610009O20Rik,0.015298,0.0,0.006752,0.010455,0.003718,0.004981,0.000929,0.0,0.014603,0.012035,...,0.0,0.025192,0.007082,0.008596,0.020377,0.013152,0.009441,0.024701,0.017964,0.036571
0610010F05Rik,0.009331,0.0,0.003668,0.064097,0.066309,0.029815,0.016213,0.0,0.028321,0.019101,...,0.015744,0.0,0.014237,0.020148,0.0,0.027219,0.010989,0.02676,0.059417,0.014259


### Check how the data is normalized and add pseudovalue

In [8]:
cat0.sum()[:5]

mB cells      2434.620456
mBasophils    2434.620456
mDC1          2434.620456
mDC2          2434.620456
mDC3          2434.620456
dtype: float64

In [9]:
# Decide on the pseudovalue to add. Results should not be particularly sensitive to this parameter.
# I will use 1 count-per-10k (equivalent to 100TPM)
pseudocptk = 1
pseudo = pseudocptk*cat0.sum()[0]/1e5
print(pseudo)

cat = cat0+pseudo

0.024346204556159864


### Find common gene names between the two datasets.
 Filtering on variable genes is also an option but I start by simply using all genes

In [10]:
Eraw = adata.X
print(type(Eraw))

gene_list = adata.var_names

<class 'scipy.sparse.csr.csr_matrix'>


In [11]:
# common genes
gmask = np.in1d(gene_list, cat.index)

# genes detected in the current dataset:
m2 = np.array(Eraw.sum(axis=0))[0]>0

# combine masks
gmask = gmask&m2


common_genes = gene_list[gmask]
print(len(gene_list),len(cat.index),len(common_genes))

40930 28205 20565


### Classify

In [12]:
print(Eraw.shape)

(27563, 40930)


In [13]:
start = time.time()
bays = []
i = 0
step=5000
comment = 'Zilionis2019_mouse_minor'
for j in range(step,Eraw.shape[0]+step,step):
    
    # Eraw - sparse cells x gene matrix
    j = min(j,Eraw.shape[0])
    tmp_dense = pd.DataFrame(Eraw.T[gmask][:,i:j].todense())
    tmp_dense.index = np.array(gene_list)[gmask]
    
    bay = rz.bayesian_classifier(tmp_dense,cat.loc[common_genes])
    bays.append(bay)
    i0 = i
    i = j
    
    print('%.2f min.'%((time.time()-start)/60.))
    print('cells from %d to %d done'%(i0,j))

# concatenate
bay = pd.concat(bays,axis=1)

# reset index
bay.columns = np.arange(bay.shape[1])

fname = 'backups/loglikelihoods_bay_classif_%s_%s'%(comment,rz.now())
print(fname)
rz.save_df(bay,fname)

0.18 min.
cells from 0 to 5000 done
0.35 min.
cells from 5000 to 10000 done
0.51 min.
cells from 10000 to 15000 done
0.68 min.
cells from 15000 to 20000 done
0.86 min.
cells from 20000 to 25000 done
0.96 min.
cells from 25000 to 27563 done
backups/loglikelihoods_bay_classif_Zilionis2019_mouse_minor_200517_11h31


## Classify by Immgen profiles

### Load data

In [15]:
# save as above but now storing results in a dictionary:

path_dict = {
    'Immgen':('Zilionis_et_al_2019_supplementary_tables/Table_S7.xlsx','Immgen'),
             }


# for reusability of the code when classifying by different reference datasets
# I store dataframe gene expression profiles to classify by in a dictionary
# 1 item in this case.

cat0_dict = {}

# super-slow...
start = time.time()
for key,value in path_dict.items():
    cat0_dict[key] = pd.read_excel(value[0],sheet_name=value[1],skiprows=3,index_col=0)
    print('%.1f min.'%((time.time()-start)/60.))
    print(cat0_dict[key].head(2))

1.1 min.
               SC_LTSL_BM  SC_STSL_BM  SC_LTSL_FL  SC_STSL_FL  SC_MPP34F_BM  \
0610007C21Rik       61.99       72.90       82.54       74.85         80.91   
0610007L01Rik      104.46      102.38      108.72      108.72        104.42   

               SC_ST34F_BM  SC_CMP_BM_DR  SC_MEP_BM  SC_GMP_BM  SC_CDP_BM  \
0610007C21Rik        76.30         86.65      64.56     132.78     114.41   
0610007L01Rik       111.69        139.07     141.16     220.25     135.14   

                ...    EO_AT_v2  Eo_BL_v2   MC_Tr   MC_To   MC_Sk   MC_PC  \
0610007C21Rik   ...       72.39     49.81  113.40   95.43  107.02  158.87   
0610007L01Rik   ...      126.39     91.14  182.37  188.27  184.97  153.00   

                MC_Es  MC_digest_PC   BA_Sp   BA_Bl  
0610007C21Rik  107.52        187.15   98.70  102.66  
0610007L01Rik  200.19        155.20  140.11  130.30  

[2 rows x 276 columns]


### Check how the data is normalized and add pseudovalue

In [16]:
# Check how the data is normalized, is there a pseudovalue added already?
for cat0 in cat0_dict.values():
    print(cat0.sum()[:5])
    print(cat0.min().min())

SC_LTSL_BM      1217550.00
SC_STSL_BM      1217550.16
SC_LTSL_FL      1217550.23
SC_STSL_FL      1217550.27
SC_MPP34F_BM    1217550.18
dtype: float64
10.98


In [17]:
# OK, the data is normalized to counts per million, and a pseudovalue of 10TPM is already added
# Just for consistency, make the pseudovalue 100TPM. This shouldn't matter too much
# add pseudovalues of 1cptk (=100 TPM)

pseudotpm = 100.
cat_dict = {}
for key,value in cat0_dict.items():
    print(key)
    themin = value.min().min()
    print('Previous pseudovalue:',themin)
    cat = value + pseudotpm - themin
    print('Current pseudovalue:',cat.min().min())
    print()
    cat_dict[key] = cat

Immgen
Previous pseudovalue: 10.98
Current pseudovalue: 100.0



### Find common gene names between the two datasets.

In [18]:
# find common genes detected in my data
common_gene_dict = {}
gmask_dict = {}

# genes detected in the current dataset:
m2 = np.array(Eraw.sum(axis=0))[0]>0

for key,cat in cat_dict.items():

    print(key)
    
    # common genes
    gmask = np.in1d(gene_list, cat.index)
    
    # combine masks
    gmask = gmask&m2
    
    common_genes = gene_list[gmask]
    print(len(gene_list),len(cat.index),len(common_genes))
    print()
    
    common_gene_dict[key] = common_genes
    gmask_dict[key] = gmask

Immgen
40930 21755 18667



### Classify

In [19]:
for key,cat in cat_dict.items():
    
    gmask = gmask_dict[key]
    common_genes = common_gene_dict[key]
    
    start = time.time()
    bays = []
    i = 0
    step=50
    comment = key
    print(comment)
    for j in range(step,Eraw.shape[0]+step,step):
        
        # Eraw - sparse cells x gene matrix
        j = min(j,Eraw.shape[0])
        tmp_dense = pd.DataFrame(Eraw.T[gmask][:,i:j].todense())
        tmp_dense.index = np.array(gene_list)[gmask]
        
        bay = rz.bayesian_classifier(tmp_dense,cat.loc[common_genes])
        bays.append(bay)
        i0 = i
        i = j
        
        print('%.2f min.'%((time.time()-start)/60.))
        print('cells from %d to %d done'%(i0,j))
    
    # concatenate
    bay = pd.concat(bays,axis=1)
    
    # reset index
    bay.columns = np.arange(bay.shape[1])
    
    fname = 'backups/loglikelihoods_bay_classif_%s_%s'%(comment,rz.now())
    print(fname)
    rz.save_df(bay,fname)

Immgen
0.04 min.
cells from 0 to 50 done
0.07 min.
cells from 50 to 100 done
0.10 min.
cells from 100 to 150 done
0.13 min.
cells from 150 to 200 done
0.16 min.
cells from 200 to 250 done
0.18 min.
cells from 250 to 300 done
0.21 min.
cells from 300 to 350 done
0.24 min.
cells from 350 to 400 done
0.26 min.
cells from 400 to 450 done
0.29 min.
cells from 450 to 500 done
0.32 min.
cells from 500 to 550 done
0.35 min.
cells from 550 to 600 done
0.38 min.
cells from 600 to 650 done
0.41 min.
cells from 650 to 700 done
0.44 min.
cells from 700 to 750 done
0.47 min.
cells from 750 to 800 done
0.50 min.
cells from 800 to 850 done
0.53 min.
cells from 850 to 900 done
0.56 min.
cells from 900 to 950 done
0.59 min.
cells from 950 to 1000 done
0.62 min.
cells from 1000 to 1050 done
0.65 min.
cells from 1050 to 1100 done
0.69 min.
cells from 1100 to 1150 done
0.72 min.
cells from 1150 to 1200 done
0.75 min.
cells from 1200 to 1250 done
0.77 min.
cells from 1250 to 1300 done
0.80 min.
cells from 1

6.62 min.
cells from 10550 to 10600 done
6.65 min.
cells from 10600 to 10650 done
6.68 min.
cells from 10650 to 10700 done
6.71 min.
cells from 10700 to 10750 done
6.75 min.
cells from 10750 to 10800 done
6.79 min.
cells from 10800 to 10850 done
6.82 min.
cells from 10850 to 10900 done
6.84 min.
cells from 10900 to 10950 done
6.87 min.
cells from 10950 to 11000 done
6.90 min.
cells from 11000 to 11050 done
6.93 min.
cells from 11050 to 11100 done
6.96 min.
cells from 11100 to 11150 done
7.00 min.
cells from 11150 to 11200 done
7.02 min.
cells from 11200 to 11250 done
7.05 min.
cells from 11250 to 11300 done
7.08 min.
cells from 11300 to 11350 done
7.11 min.
cells from 11350 to 11400 done
7.13 min.
cells from 11400 to 11450 done
7.16 min.
cells from 11450 to 11500 done
7.19 min.
cells from 11500 to 11550 done
7.22 min.
cells from 11550 to 11600 done
7.25 min.
cells from 11600 to 11650 done
7.28 min.
cells from 11650 to 11700 done
7.32 min.
cells from 11700 to 11750 done
7.35 min.
cells 

12.55 min.
cells from 20450 to 20500 done
12.58 min.
cells from 20500 to 20550 done
12.60 min.
cells from 20550 to 20600 done
12.63 min.
cells from 20600 to 20650 done
12.65 min.
cells from 20650 to 20700 done
12.68 min.
cells from 20700 to 20750 done
12.70 min.
cells from 20750 to 20800 done
12.73 min.
cells from 20800 to 20850 done
12.75 min.
cells from 20850 to 20900 done
12.78 min.
cells from 20900 to 20950 done
12.80 min.
cells from 20950 to 21000 done
12.83 min.
cells from 21000 to 21050 done
12.85 min.
cells from 21050 to 21100 done
12.88 min.
cells from 21100 to 21150 done
12.90 min.
cells from 21150 to 21200 done
12.93 min.
cells from 21200 to 21250 done
12.96 min.
cells from 21250 to 21300 done
12.98 min.
cells from 21300 to 21350 done
13.01 min.
cells from 21350 to 21400 done
13.03 min.
cells from 21400 to 21450 done
13.06 min.
cells from 21450 to 21500 done
13.08 min.
cells from 21500 to 21550 done
13.11 min.
cells from 21550 to 21600 done
13.13 min.
cells from 21600 to 216

## Assign labels to cells

### Record the cell type with the max-log-likelihood 

In [20]:
bay.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27553,27554,27555,27556,27557,27558,27559,27560,27561,27562
SC_LTSL_BM,-48309.073428,-24166.987301,-16681.988703,-20593.905932,-17453.812919,-13094.24698,-13022.315087,-16012.045148,-13797.766732,-13705.090593,...,-5332.45886,-3622.149746,-3903.128982,-3087.842732,-3756.318251,-3486.614135,-4081.198521,-2958.430976,-3705.007363,-2916.393444
SC_STSL_BM,-48212.832739,-24138.424449,-16686.12482,-20534.472901,-17396.036511,-13111.641847,-12959.500968,-15987.439478,-13815.897862,-13639.555531,...,-5371.313593,-3604.952214,-3916.363374,-3076.583056,-3757.173513,-3471.081026,-4095.877579,-2967.07535,-3703.645924,-2909.698209
SC_LTSL_FL,-48330.447718,-24218.101099,-16849.700853,-20693.430406,-17545.871888,-13298.445154,-13061.736784,-16110.263794,-14019.599815,-13750.353425,...,-5508.548715,-3645.063212,-3980.413607,-3103.417052,-3798.502413,-3502.001728,-4179.328231,-3027.020421,-3740.255818,-2928.739741
SC_STSL_FL,-48220.250855,-24165.128704,-16831.53339,-20651.34382,-17501.675599,-13282.189333,-13023.757246,-16085.04079,-14004.810211,-13700.41216,...,-5507.508254,-3635.098037,-3976.820818,-3093.805475,-3792.865974,-3489.170141,-4174.265413,-3024.3339,-3734.519434,-2919.642589
SC_MPP34F_BM,-48108.761104,-24091.382919,-16776.401344,-20573.141218,-17419.266879,-13248.45969,-12968.768448,-16008.23372,-13973.075014,-13639.310963,...,-5499.018555,-3616.956926,-3959.911966,-3080.572664,-3778.487295,-3482.031985,-4162.78286,-3013.622824,-3722.262368,-2918.284831


In [31]:
# whoops, I overwrote the "bay" variable, I'll need to load results from backup
import glob

path_dict = {
    'Immgen':glob.glob('backups/loglikelihoods_bay_classif_Immgen_200517*')[0],
    'Zilionis2019_mouse_minor':
    glob.glob('backups/loglikelihoods_bay_classif_Zilionis2019_mouse_minor_200517*')[0]
}

bay_dict = {key:rz.load_df(value) for key,value in path_dict.items()}

# this is what the classification results looks like, as log-likelihoods
bay_dict['Immgen']

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,27553,27554,27555,27556,27557,27558,27559,27560,27561,27562
SC_LTSL_BM,-48309.073428,-24166.987301,-16681.988703,-20593.905932,-17453.812919,-13094.246980,-13022.315087,-16012.045148,-13797.766732,-13705.090593,...,-5332.458860,-3622.149746,-3903.128982,-3087.842732,-3756.318251,-3486.614135,-4081.198521,-2958.430976,-3705.007363,-2916.393444
SC_STSL_BM,-48212.832739,-24138.424449,-16686.124820,-20534.472901,-17396.036511,-13111.641847,-12959.500968,-15987.439478,-13815.897862,-13639.555531,...,-5371.313593,-3604.952214,-3916.363374,-3076.583056,-3757.173513,-3471.081026,-4095.877579,-2967.075350,-3703.645924,-2909.698209
SC_LTSL_FL,-48330.447718,-24218.101099,-16849.700853,-20693.430406,-17545.871888,-13298.445154,-13061.736784,-16110.263794,-14019.599815,-13750.353425,...,-5508.548715,-3645.063212,-3980.413607,-3103.417052,-3798.502413,-3502.001728,-4179.328231,-3027.020421,-3740.255818,-2928.739741
SC_STSL_FL,-48220.250855,-24165.128704,-16831.533390,-20651.343820,-17501.675599,-13282.189333,-13023.757246,-16085.040790,-14004.810211,-13700.412160,...,-5507.508254,-3635.098037,-3976.820818,-3093.805475,-3792.865974,-3489.170141,-4174.265413,-3024.333900,-3734.519434,-2919.642589
SC_MPP34F_BM,-48108.761104,-24091.382919,-16776.401344,-20573.141218,-17419.266879,-13248.459690,-12968.768448,-16008.233720,-13973.075014,-13639.310963,...,-5499.018555,-3616.956926,-3959.911966,-3080.572664,-3778.487295,-3482.031985,-4162.782860,-3013.622824,-3722.262368,-2918.284831
SC_ST34F_BM,-48138.187077,-24094.003282,-16785.503014,-20586.739706,-17441.494544,-13258.683026,-12987.125919,-16015.977545,-13975.670274,-13658.797086,...,-5502.202969,-3622.176462,-3962.985758,-3084.776016,-3779.631657,-3485.828741,-4166.437506,-3013.086261,-3727.026494,-2921.606262
SC_CMP_BM_DR,-48081.302452,-24076.129368,-16808.696344,-20598.188596,-17442.788605,-13289.203150,-12966.268006,-16019.100446,-13989.588600,-13641.384236,...,-5516.331848,-3624.793537,-3973.099819,-3086.515861,-3789.236317,-3487.942806,-4176.536737,-3023.448033,-3733.569882,-2922.480502
SC_MEP_BM,-48235.335613,-24243.865463,-16996.009022,-20802.338620,-17638.358739,-13467.557293,-13055.102538,-16195.546242,-14095.170382,-13744.140668,...,-5604.771981,-3656.088036,-4037.163440,-3108.606666,-3850.963576,-3518.639440,-4244.500697,-3069.877294,-3789.023887,-2956.728948
SC_GMP_BM,-48289.869126,-24237.418435,-16811.504020,-20625.488400,-17422.782592,-13314.283825,-13041.392260,-15997.988124,-14007.826809,-13711.410585,...,-5512.287852,-3649.159231,-3967.551712,-3104.798550,-3784.311858,-3523.160779,-4188.678998,-3031.611566,-3734.394879,-2940.573797
SC_CDP_BM,-48142.094163,-24100.422625,-16695.502753,-20508.487601,-17294.365469,-13202.228076,-13028.558082,-15900.319330,-13944.842267,-13687.352368,...,-5454.038751,-3621.543933,-3931.347622,-3082.773095,-3750.594021,-3486.540441,-4142.825182,-3000.545679,-3694.521450,-2910.387230


### Add results to adata.obs

In [32]:
# here is the per-cell info I have so far
adata.obs.head(3)

Unnamed: 0_level_0,barcode,library,total_counts,pct_counts_mito,_library_before_renaming,mouse,condition
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,bcECPI,CSF1Ri_1_1,14875.0,5.815126,Blz1a,CSF1Ri_1,CSF1Ri
1,bcESAZ,CSF1Ri_1_1,6876.0,4.217568,Blz1a,CSF1Ri_1,CSF1Ri
2,bcIBUV,CSF1Ri_1_1,4643.0,2.864527,Blz1a,CSF1Ri_1,CSF1Ri


In [33]:
# add the classification results: for each cell, at which profile (row) has the largest log-likelihood?
for key,value in bay_dict.items():
    newkey = 'closest_'+key
    print(newkey)
    adata.obs[newkey] = value.idxmax().values
    
    # note: important to add ".values" at the end, it turns a Pandas Series into an index-less np.array.
    
adata.obs.head(3)

closest_Immgen
closest_Zilionis2019_mouse_minor


Unnamed: 0_level_0,barcode,library,total_counts,pct_counts_mito,_library_before_renaming,mouse,condition,closest_Immgen,closest_Zilionis2019_mouse_minor
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,bcECPI,CSF1Ri_1_1,14875.0,5.815126,Blz1a,CSF1Ri_1,CSF1Ri,B1a_Sp,mB cells
1,bcESAZ,CSF1Ri_1_1,6876.0,4.217568,Blz1a,CSF1Ri_1,CSF1Ri,NK_DAP10-_Sp,mT3
2,bcIBUV,CSF1Ri_1_1,4643.0,2.864527,Blz1a,CSF1Ri_1,CSF1Ri,GN_Arth_SynF,mN4


In [34]:
# also add simplified immgen
adata.obs['closest_Immgen_simplified'] = [i.split('_')[0] for i in adata.obs['closest_Immgen']]
adata.obs.head(3)

Unnamed: 0_level_0,barcode,library,total_counts,pct_counts_mito,_library_before_renaming,mouse,condition,closest_Immgen,closest_Zilionis2019_mouse_minor,closest_Immgen_simplified
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,bcECPI,CSF1Ri_1_1,14875.0,5.815126,Blz1a,CSF1Ri_1,CSF1Ri,B1a_Sp,mB cells,B1a
1,bcESAZ,CSF1Ri_1_1,6876.0,4.217568,Blz1a,CSF1Ri_1,CSF1Ri,NK_DAP10-_Sp,mT3,NK
2,bcIBUV,CSF1Ri_1_1,4643.0,2.864527,Blz1a,CSF1Ri_1,CSF1Ri,GN_Arth_SynF,mN4,GN


In [35]:
help(rz.save_df)

Help on function save_df in module rz_functions:

save_df(obj, filename)
    # From Adrian Veres for saving and loading pandas dataframes (modified)



### Save an updated version of adata.obs

In [36]:
# save the update obs dataframe
# no need to save the entire adata object, counts didn't change

fname = 'backups/obs_info_%dx%d_%s'%(adata.obs.shape[0],adata.obs.shape[1],rz.now())
print(fname)
rz.save_df(adata.obs,fname)

backups/obs_info_27563x10_200517_12h03
