INFO: Run with conda environment sc-mar2021

# 0. Load packages

<a id='load_packages'></a>

In [1]:
import os
import sys
import glob
import re

import anndata
import scanpy as sc
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb

In [2]:
plt.rcParams['figure.figsize']=(20,12) #rescale figures
sc.settings.verbosity = 3
#sc.set_figure_params(dpi=200, dpi_save=300)
sc.logging.print_versions()



-----
anndata     0.7.5
scanpy      1.7.1
sinfo       0.3.1
-----
PIL                 8.1.2
anndata             0.7.5
anyio               NA
attr                20.3.0
babel               2.9.0
backcall            0.2.0
brotli              NA
cairo               1.19.1
certifi             2021.05.30
cffi                1.14.5
chardet             4.0.0
constants           NA
cycler              0.10.0
cython_runtime      NA
dateutil            2.8.1
decorator           4.4.2
get_version         2.1
h5py                2.10.0
highs_wrapper       NA
idna                2.10
igraph              0.9.1
ipykernel           5.3.4
ipython_genutils    0.2.0
ipywidgets          7.6.3
jedi                0.18.0
jinja2              2.11.3
joblib              1.0.1
json5               NA
jsonschema          3.2.0
jupyter_server      1.4.1
jupyterlab_server   2.3.0
kiwisolver          1.3.1
legacy_api_wrap     1.2
leidenalg           0.8.4
llvmlite            0.35.0
louvain             0.7.0
markupsa

In [3]:
sys.path.insert(0,'..')
import paths_downsampling as paths
p = paths.get_paths()
print(p)

{'basedir': '/psycl/g/mpsagbinder/mgp/workspace/SingleNuc_PostmortemBrain/', 'rawdir_RNA': '/psycl/g/mpsngs/HiSeq_Helmholtz/20210324_Anna_Froehlich_10X_RNAseq/03_downsampled/', 'figdir': '/psycl/g/mpsagbinder/mgp/workspace/SingleNuc_PostmortemBrain/figures/', 'writedir': '/psycl/g/mpsagbinder/mgp/workspace/SingleNuc_PostmortemBrain/scanpy_adata/', 'allendir': '/psycl/g/mpsagbinder/mgp/workspace/SingleNuc_PostmortemBrain/reference_data/allen_human/'}


# 1. Load data

<a id='load_data'></a>

In [17]:
adata = sc.read_csv(p['allendir'] + 'matrix.csv', first_column_names = True)

In [18]:
adata.obs

F2S4_160113_027_A01
F2S4_160113_027_B01
F2S4_160113_027_C01
F2S4_160113_027_D01
F2S4_160113_027_E01
...
F2S4_190227_100_C01
F2S4_190227_100_E01
F2S4_190227_100_F01
F2S4_190227_100_G01
F2S4_190227_100_H01


In [22]:
# add phenotype data to adata.obs
metadata = pd.read_csv(p['allendir']+'metadata.csv', sep = ',')

# merge metadata to adata.obs
adata.obs = adata.obs.merge(metadata, how = 'left', left_index = True, right_on = 'sample_name',).set_index('sample_name')
adata.obs

Unnamed: 0_level_0,exp_component_name,specimen_type,cluster_color,cluster_order,cluster_label,class_color,class_order,class_label,subclass_color,subclass_order,...,cell_type_alt_alias_order,cell_type_alt_alias_label,cell_type_designation_color,cell_type_designation_order,cell_type_designation_label,external_donor_name_color,external_donor_name_order,external_donor_name_label,outlier_call,outlier_type
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F2S4_160113_027_A01,LS-15005h_S01_E1-50,nucleus,,,,,,,,,...,,,,,,#3DCC3D,2,H200.1025,True,Outlier L1-3 SST OR2AD1P
F2S4_160113_027_B01,LS-15005h_S02_E1-50,nucleus,#E170FE,32.0,Inh L2-5 VIP TOX2,#0066FF,4.0,GABAergic,#996517,3.0,...,32.0,,#E170FE,32.0,Neuron 032,#3DCC3D,2,H200.1025,False,
F2S4_160113_027_C01,LS-15005h_S03_E1-50,nucleus,#8E5864,2.0,Inh L1 LAMP5 GGT8P,#0066FF,4.0,GABAergic,#FF7373,1.0,...,2.0,,#8E5864,2.0,Neuron 002,#3DCC3D,2,H200.1025,False,
F2S4_160113_027_D01,LS-15005h_S04_E1-50,nucleus,#8B5862,1.0,Inh L1 LAMP5 NDNF,#0066FF,4.0,GABAergic,#FF7373,1.0,...,1.0,,#8B5862,1.0,Neuron 001,#3DCC3D,2,H200.1025,False,
F2S4_160113_027_E01,LS-15005h_S05_E1-50,nucleus,#CF6EC9,34.0,Inh L1-3 VIP ZNF322P1,#0066FF,4.0,GABAergic,#996517,3.0,...,34.0,,#CF6EC9,34.0,Neuron 034,#3DCC3D,2,H200.1025,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
F2S4_190227_100_C01,SM-GE4QU_S187_E1-50,nucleus,#312E27,111.0,Astro L1-6 FGFR3 ETNPPL,#17994B,3.0,Non-neuronal,#73ABFF,13.0,...,111.0,,#312E27,111.0,Non-neuron 001,#3DCC3D,2,H200.1025,False,
F2S4_190227_100_E01,SM-GE4QU_S189_E1-50,nucleus,#BFC124,75.0,Exc L6 THEMIS LINC00343,#AFCC3D,2.0,Glutamatergic,#52FF26,7.0,...,75.0,,#BFC124,75.0,Neuron 075,#3DCC3D,2,H200.1025,False,
F2S4_190227_100_F01,SM-GE4QU_S190_E1-50,nucleus,#8B5862,1.0,Inh L1 LAMP5 NDNF,#0066FF,4.0,GABAergic,#FF7373,1.0,...,1.0,,#8B5862,1.0,Neuron 001,#3DCC3D,2,H200.1025,False,
F2S4_190227_100_G01,SM-GE4QU_S191_E1-50,nucleus,#71AF9A,116.0,Oligo L4-6 OPALIN,#17994B,3.0,Non-neuronal,#311799,15.0,...,116.0,,#71AF9A,116.0,Non-neuron 006,#3DCC3D,2,H200.1025,False,


In [23]:
adata

AnnData object with n_obs × n_vars = 49417 × 50281
    obs: 'exp_component_name', 'specimen_type', 'cluster_color', 'cluster_order', 'cluster_label', 'class_color', 'class_order', 'class_label', 'subclass_color', 'subclass_order', 'subclass_label', 'full_genotype_color', 'full_genotype_order', 'full_genotype_label', 'donor_sex_color', 'donor_sex_order', 'donor_sex_label', 'region_color', 'region_order', 'region_label', 'cortical_layer_color', 'cortical_layer_order', 'cortical_layer_label', 'cell_type_accession_color', 'cell_type_accession_order', 'cell_type_accession_label', 'cell_type_alias_color', 'cell_type_order', 'cell_type_alias_label', 'cell_type_alt_alias_color', 'cell_type_alt_alias_order', 'cell_type_alt_alias_label', 'cell_type_designation_color', 'cell_type_designation_order', 'cell_type_designation_label', 'external_donor_name_color', 'external_donor_name_order', 'external_donor_name_label', 'outlier_call', 'outlier_type'

In [24]:
adata.obs.dtypes

exp_component_name              object
specimen_type                   object
cluster_color                   object
cluster_order                  float64
cluster_label                   object
class_color                     object
class_order                    float64
class_label                     object
subclass_color                  object
subclass_order                 float64
subclass_label                  object
full_genotype_color            float64
full_genotype_order            float64
full_genotype_label            float64
donor_sex_color                 object
donor_sex_order                  int64
donor_sex_label                 object
region_color                    object
region_order                     int64
region_label                    object
cortical_layer_color            object
cortical_layer_order             int64
cortical_layer_label            object
cell_type_accession_color       object
cell_type_accession_order      float64
cell_type_accession_label

In [26]:
# Save adata object
adata.write(p['allendir']+'adata_raw_allenBrain.h5ad')

... storing 'specimen_type' as categorical
... storing 'cluster_color' as categorical
... storing 'cluster_label' as categorical
... storing 'class_color' as categorical
... storing 'class_label' as categorical
... storing 'subclass_color' as categorical
... storing 'subclass_label' as categorical
... storing 'donor_sex_color' as categorical
... storing 'donor_sex_label' as categorical
... storing 'region_color' as categorical
... storing 'region_label' as categorical
... storing 'cortical_layer_color' as categorical
... storing 'cortical_layer_label' as categorical
... storing 'cell_type_accession_color' as categorical
... storing 'cell_type_accession_label' as categorical
... storing 'cell_type_alias_color' as categorical
... storing 'cell_type_alias_label' as categorical
... storing 'cell_type_alt_alias_color' as categorical
... storing 'cell_type_alt_alias_label' as categorical
... storing 'cell_type_designation_color' as categorical
... storing 'cell_type_designation_label' as cat