# Human Brain Cell Types Analysis
Compares the source taxonomies of the Whole Human Brain Ontology with the CxG integrated adult human brain dataset to evaluate their compatibility

In [4]:
## IMPORT depedencies 
#!{sys.executable} -m pip install hdf5plugin

import os
import shutil
import pandas as pd
import anndata as ad
import hdf5plugin
import numpy as np
import urllib.request as request
import zipfile

## Read h5ad files

Please manually download the h5ad files from https://cellxgene.cziscience.com/collections/283d65eb-dd53-496d-adb7-7570c7caa443

In [3]:
ann_data_nn = ad.read_h5ad("h5ad-downloads/transcriptomic_diversity_of_cell_types_across_adult_human_brain_NN.h5ad", backed="r")
print(ann_data_nn.obs.shape)
ann_data_nn.obs.head(4)

(888263, 33)


Unnamed: 0_level_0,ROIGroup,ROIGroupCoarse,ROIGroupFine,roi,organism_ontology_term_id,disease_ontology_term_id,self_reported_ethnicity_ontology_term_id,assay_ontology_term_id,sex_ontology_term_id,development_stage_ontology_term_id,...,tissue_ontology_term_id,is_primary_data,cell_type,assay,disease,organism,sex,tissue,self_reported_ethnicity,development_stage
CellID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10X362_3:TCAGTGAGTATTGACC,Hypothalamus,Hypothalamus,Hypothalamus,Human MN,NCBITaxon:9606,PATO:0000461,HANCESTRO:0005,EFO:0009922,PATO:0000384,HsapDv:0000123,...,UBERON:0001898,False,oligodendrocyte,10x 3' v3,normal,Homo sapiens,male,hypothalamus,European,29-year-old human stage
10X362_5:TCCGTGTGTGAAAGTT,Hypothalamus,Hypothalamus,Hypothalamus,Human MN,NCBITaxon:9606,PATO:0000461,HANCESTRO:0005,EFO:0009922,PATO:0000384,HsapDv:0000136,...,UBERON:0001898,False,oligodendrocyte,10x 3' v3,normal,Homo sapiens,male,hypothalamus,European,42-year-old human stage
10X362_5:CACGGGTAGAGCAGAA,Hypothalamus,Hypothalamus,Hypothalamus,Human MN,NCBITaxon:9606,PATO:0000461,HANCESTRO:0005,EFO:0009922,PATO:0000384,HsapDv:0000136,...,UBERON:0001898,False,oligodendrocyte,10x 3' v3,normal,Homo sapiens,male,hypothalamus,European,42-year-old human stage
10X362_5:GATTCTTGTATGTCAC,Hypothalamus,Hypothalamus,Hypothalamus,Human MN,NCBITaxon:9606,PATO:0000461,HANCESTRO:0005,EFO:0009922,PATO:0000384,HsapDv:0000136,...,UBERON:0001898,False,oligodendrocyte,10x 3' v3,normal,Homo sapiens,male,hypothalamus,European,42-year-old human stage


## Read cell_to_cell_set_assignments (c2c_data)
Downloads, unzips, loads and merges the cell to cell set assingments data:

In [12]:
df_dtype = dict()
# df_dtype["sample_name"] = str
for i in range(1, 492):
    df_dtype["CS202210140_" + str(i)] = bool
def process_chunk(df):
    return df.astype(df_dtype) 

In [15]:
## Human:
c2c_path = os.path.join(os.getcwd(), "h5ad-downloads/cell_to_cell_set_assignments_CS202210140.csv")
if not os.path.exists(c2c_path):
    # Unzip
    with zipfile.ZipFile(os.path.join(os.getcwd(), "h5ad-downloads/adult-human-brain_v1.zip"), 'r') as zip_ref:
        zip_ref.extractall(os.path.join(os.getcwd(), "h5ad-downloads/CS202210140/"))
shutil.copyfile(os.path.join(os.getcwd(), "h5ad-downloads/CS202210140/cell_to_cell_set_assignments.csv"), c2c_path)

processed_chunks = [] 

chunksize = 10 ** 5
with pd.read_csv(c2c_path, chunksize=chunksize) as reader:
    for chunk in reader:
        processed = process_chunk(chunk)
        processed_chunks.append(processed)
#         print(processed.dtypes)

c2c_human_data = pd.concat(processed_chunks)
print(c2c_human_data.shape)

(3369219, 493)


In [16]:
c2c_human_data.head(4)

Unnamed: 0,sample_name,CS202210140_1,CS202210140_2,CS202210140_3,CS202210140_4,CS202210140_5,CS202210140_6,CS202210140_7,CS202210140_8,CS202210140_9,...,CS202210140_483,CS202210140_484,CS202210140_485,CS202210140_486,CS202210140_487,CS202210140_488,CS202210140_489,CS202210140_490,CS202210140_491,CS202210140_492
0,10X386_2:CATGGATTCTCGACGG,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,0
1,10X383_5:TCTTGCGGTGAATTGA,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,0
2,10X386_2:CTCATCGGTCGAGCAA,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,0
3,10X378_8:TTGGATGAGACAAGCC,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,0
