# Explore T cell only SPRING plot to define subsets observed  
Notably, the analysis reveals two doublet clusters, which are excluded from further analysis

## Import statements

In [2]:
import os,sys
import datetime

In [3]:
import scanpy as sc
sc.logging.print_versions()
sc.logging.print_memory_usage()
sc.settings.verbosity = 2

scanpy==1.4.4.post1 anndata==0.6.22.post1 umap==0.3.7 numpy==1.15.4 scipy==1.3.1 pandas==0.23.4 scikit-learn==0.20.1 statsmodels==0.10.1 python-igraph==0.7.1 louvain==0.6.1
Memory usage: current 0.20 GB, difference +0.20 GB


In [4]:
## This cell is run once to download my custom functions and import statements from github
#
#!git clone --depth=1 https://github.com/rapolaszilionis/utility_functions
#    
## github doesn't seem to have an option to download a specific version of the repo from the history.
## So I download my utility functions and save the download time by appending it to the directory name.
## These utility functions to be shared together with the notebook.
#
#toappend = datetime.datetime.now().strftime('%y%m%d_%Hh%M')
#newname = "utility_functions_%s"%toappend
#print(newname)
#
#
## rename the py file with utility functions
#os.rename("utility_functions",newname)

In [5]:
# add the utility function folder to PATH
sys.path.append(os.path.abspath("utility_functions_200517_09h14/"))

from rz_import_statements import *
import rz_functions as rz
import rz_fig_params # this adjust mpl.rcParams, almost nothing to import, import after scanpy to overwrite rc.Params
import rz_utility_spring as srz

python version: 3.6.7


## Load  graph directly from SPRING directory

In [6]:
path1 = "/Users/rapolaszilionis/Google Drive/analyses/SPRING_dev/data/pittet"
project_dir = path1+'/CSF1Ri/'
plot_name = 'T_cells_only'

In [7]:
cg0 = srz.read_cell_groupings(project_dir+plot_name+'/categorical_coloring_data.json')

In [8]:
print(cg0.keys())

dict_keys(['closest_Immgen', 'closest_Immgen_simplified', 'closest_Zilionis2019_mouse_minor', 'condition', 'library', 'mouse', 'sp_cl_T_cells_only_10', 'sp_cl_T_cells_only_12', 'sp_cl_T_cells_only_15', 'sp_cl_T_cells_only_2', 'sp_cl_T_cells_only_20', 'sp_cl_T_cells_only_3', 'sp_cl_T_cells_only_4', 'sp_cl_T_cells_only_5', 'sp_cl_T_cells_only_6', 'sp_cl_T_cells_only_7', 'sp_cl_T_cells_only_8', 'sp_cl_T_cells_only_9', 'top10pct_dbtl_score', 'top3pct_dbtl_score', 'top5pct_dbtl_score'])


In [9]:
# select clustering colotracks
clustrack = "sp_cl_T_cells_only_10"

# list of cluster labels
labels = cg0[clustrack]['label_list']

# load cell index
cellix = np.loadtxt(project_dir+plot_name+'/cell_filter.txt',dtype=int)

In [12]:
rev_renamer = {
    'T_CD4':['1','8'],
    'T_CD8':['0','6','9'],
    'T_reg':['7'],
    'T_Calca_?':['5'],
    'T_Cd163l1_?':['3'],
    'T_doublet_B':['2'],
    'T_doublet_Neutro':['4']
}

renamer = {i:key for key,value in rev_renamer.items() for i in value}

In [13]:
# rename:
renamed = [renamer[i] for i in labels]

## Load obs

In [14]:
obs = rz.load_df('backups/obs_info_27563x27_200607_21h30.npz')

In [15]:
obs.head()

Unnamed: 0,barcode,library,total_counts,pct_counts_mito,_library_before_renaming,mouse,condition,closest_Immgen,closest_Zilionis2019_mouse_minor,closest_Immgen_simplified,...,n_counts,removed_as_RBC,removed_as_Krt8hi,used_in_all_cells_clean_iter1_refCSF1Ri,sp_cl_all_cells_clean_iter1_refCSF1Ri_150,removed_as_dblt_2,used_in_all_cells_clean_iter2,sp_cl_all_cells_clean_iter2_100,*population,used_in_T_cells_only
0,bcECPI,CSF1Ri_1_1,14875,5.81513,Blz1a,CSF1Ri_1,CSF1Ri,B1a_Sp,mB cells,B1a,...,14875,False,False,True,19,False,True,68,mB cells,False
1,bcESAZ,CSF1Ri_1_1,6876,4.21757,Blz1a,CSF1Ri_1,CSF1Ri,NK_DAP10-_Sp,mT3,NK,...,6876,False,False,True,19,False,True,4,mNK cells,False
2,bcIBUV,CSF1Ri_1_1,4643,2.86453,Blz1a,CSF1Ri_1,CSF1Ri,GN_Arth_SynF,mN4,GN,...,4643,False,False,True,145,False,True,75,mN4,False
3,bcGWNX,CSF1Ri_1_1,5972,6.39652,Blz1a,CSF1Ri_1,CSF1Ri,Mo_6C+II-_LN,mMac1,Mo,...,5972,False,False,True,138,False,True,41,mMac1,False
4,bcCOWJ,CSF1Ri_1_1,5021,3.82394,Blz1a,CSF1Ri_1,CSF1Ri,Mo_6C+II-_LN,mMono1,Mo,...,5021,False,False,True,67,False,True,1,mMono1,False


In [16]:
# archive previous population labels
obs['archive_population'] = obs['*population'].copy()

In [17]:
# replace T cells labels with new ones:
obs.loc[obs.index[cellix],'*population'] = renamed

In [18]:
# save obs
fname = 'backups/obs_info_%dx%d_%s'%(obs.shape[0],obs.shape[1],rz.now())
print(fname)
rz.save_df(obs,fname)

backups/obs_info_27563x28_200607_21h52


## Append colortracks to SPRING plots

In [30]:
path1 = "/Users/rapolaszilionis/Google Drive/analyses/SPRING_dev/data/pittet/"
project_dir = path1+'/CSF1Ri/'


In [42]:
# switch below:
plot_name = 'T_cells_only/'
#plot_name = 'all_cells_clean_iter2/'

In [43]:
# get cell filter:
cell_ix = np.loadtxt(project_dir+plot_name+'/cell_filter.txt',dtype=int)

In [44]:
# load current cell groupings
cg0 = srz.read_cell_groupings(project_dir+plot_name+'categorical_coloring_data.json')

# color dictionary of dictionaries
cdd = {key:value['label_colors'] for key,value in cg0.items()}

# cell groupings with color information
cg = {key:value['label_list'] for key,value in cg0.items()}

In [39]:
# add new population labels:
cols_to_add = ['*population','archive_population']
cg_to_add = obs.iloc[cell_ix][cols_to_add].astype(str).to_dict(orient='list')

In [40]:
# I want to use specific colors:

color_dict = copy.deepcopy(cdd['*population'])

for key in cols_to_add:
    cdd[key] = color_dict
    cg[key] = cg_to_add[key]

In [45]:
# append categorical colortrack
srz.append_cell_groupings(project_dir+plot_name,cg,colordd=cdd)