##### Copyright 2025 Google LLC.

In [2]:
pip install cellxgene-census

Collecting cellxgene-census
  Downloading cellxgene_census-1.17.0-py3-none-any.whl.metadata (5.2 kB)
Collecting tiledbsoma>=1.15.3 (from cellxgene-census)
  Downloading tiledbsoma-1.16.2-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (7.0 kB)
Collecting anndata (from cellxgene-census)
  Downloading anndata-0.11.4-py3-none-any.whl.metadata (9.3 kB)
Collecting s3fs>=2021.06.1 (from cellxgene-census)
  Downloading s3fs-2025.3.2-py3-none-any.whl.metadata (1.9 kB)
Collecting aiobotocore<3.0.0,>=2.5.4 (from s3fs>=2021.06.1->cellxgene-census)
  Downloading aiobotocore-2.22.0-py3-none-any.whl.metadata (24 kB)
Collecting scanpy>=1.9.2 (from tiledbsoma>=1.15.3->cellxgene-census)
  Downloading scanpy-1.11.1-py3-none-any.whl.metadata (9.9 kB)
Collecting somacore==1.0.28 (from tiledbsoma>=1.15.3->cellxgene-census)
  Downloading somacore-1.0.28-py3-none-any.whl.metadata (1.5 kB)
Collecting pyarrow-hotfix (from somacore==1.0.28->tiledbsoma>=1.15.3->cellxgene-census)
  Downloading pyarrow_hotfix-0.7-p

In [4]:
import cellxgene_census
import pandas as pd

with cellxgene_census.open_soma(census_version="2025-01-30") as census:

    # Hücre tipleri: microglial cell, neuron
    # Doku: brain
    # Cinsiyet: female
    # Hastalık: breast carcinoma

    cell_metadata = census["census_data"]["homo_sapiens"]["obs"].read(
        value_filter=(
            "sex == 'female' and "
            "cell_type in ['microglial cell', 'neuron'] and "
            "tissue_general == 'brain' and "
            "disease_ontology_term_id == 'MONDO:0007254' and "
            "is_primary_data == True"
        ),
        column_names=[
            "assay", "cell_type", "tissue", "tissue_general",
            "suspension_type", "disease", "sex", "disease_ontology_term_id"
        ]
    )

    cell_metadata = cell_metadata.concat().to_pandas()

print(cell_metadata.head())
print(f"\nToplam hücre sayısı: {len(cell_metadata)}")


       assay        cell_type                         tissue tissue_general  \
0  10x 3' v3  microglial cell  medial orbital frontal cortex          brain   
1  10x 3' v3  microglial cell  medial orbital frontal cortex          brain   
2  10x 3' v3  microglial cell  medial orbital frontal cortex          brain   
3  10x 3' v3  microglial cell  medial orbital frontal cortex          brain   
4  10x 3' v3  microglial cell  medial orbital frontal cortex          brain   

  suspension_type        disease     sex disease_ontology_term_id  \
0         nucleus  breast cancer  female            MONDO:0007254   
1         nucleus  breast cancer  female            MONDO:0007254   
2         nucleus  breast cancer  female            MONDO:0007254   
3         nucleus  breast cancer  female            MONDO:0007254   
4         nucleus  breast cancer  female            MONDO:0007254   

   is_primary_data  
0             True  
1             True  
2             True  
3             True  
4    

In [6]:
import cellxgene_census

with cellxgene_census.open_soma(census_version="2025-01-30") as census:
    obs_reader = census["census_data"]["homo_sapiens"]["obs"].read(
        column_names=["cell_type"]
    )

    # Iterator'den parça alıyoruz (örnek: ilk 50 bin satır)
    df_chunks = []
    count = 0
    max_rows = 50000

    for chunk in obs_reader:
        df = chunk.to_pandas()
        df_chunks.append(df)
        count += len(df)
        if count >= max_rows:
            break

    df_combined = pd.concat(df_chunks, ignore_index=True)

print("Benzersiz hücre tipleri (cell_type):")
print(df_combined["cell_type"].value_counts())


Benzersiz hücre tipleri (cell_type):
cell_type
neuron                                                       9693939
oligodendrocyte                                              4437030
L2/3-6 intratelencephalic projecting glutamatergic neuron    3861717
glutamatergic neuron                                         3125705
unknown                                                      2993011
                                                              ...   
macrophage dendritic cell progenitor                              31
hypertrophic chondrocyte                                          28
immature natural killer cell                                      26
microfold cell of epithelium of small intestine                   19
pancreatic endocrine cell                                          8
Name: count, Length: 819, dtype: int64


In [7]:
import cellxgene_census
import pandas as pd

with cellxgene_census.open_soma(census_version="2025-01-30") as census:
    # B hücrelerini içeren gözlemleri filtrele
    cell_metadata = census["census_data"]["homo_sapiens"]["obs"].read(
        value_filter="cell_type == 'B cell'",
        column_names=["cell_type", "assay", "tissue", "tissue_general", "sex", "disease"]
    )

    # Parçaları birleştir ve pandas'a çevir
    cell_metadata = cell_metadata.concat().to_pandas()

# Sonuçları göster
print(cell_metadata.head())
print(f"\nToplam B hücresi sayısı: {len(cell_metadata)}")


  cell_type      assay              tissue      tissue_general     sex  \
0    B cell  10x 3' v3  respiratory airway  respiratory system  female   
1    B cell  10x 3' v2              thymus     endocrine gland    male   
2    B cell  10x 3' v2              thymus     endocrine gland    male   
3    B cell  10x 3' v2              thymus     endocrine gland    male   
4    B cell  10x 3' v2              thymus     endocrine gland    male   

    disease  
0  COVID-19  
1    normal  
2    normal  
3    normal  
4    normal  

Toplam B hücresi sayısı: 1691023


In [10]:
import cellxgene_census
import pandas as pd
import plotly.express as px

# Veriyi al
with cellxgene_census.open_soma(census_version="2025-01-30") as census:
    obs_reader = census["census_data"]["homo_sapiens"]["obs"].read(
        column_names=["cell_type"]
    )

    # İlk 30.000 hücreyi oku (RAM dostu)
    chunks = []
    total = 0
    for chunk in obs_reader:
        df = chunk.to_pandas()
        chunks.append(df)
        total += len(df)
        if total >= 30000:
            break

    data = pd.concat(chunks, ignore_index=True)

# Hücre tipi frekanslarını say
cell_counts = data["cell_type"].value_counts()

# En sık geçen 9 hücre tipi (B cell hariç)
top_cell_types = cell_counts.drop(labels=["B cell"], errors="ignore").nlargest(9)

# B cell sayısını al
b_cell_count = cell_counts.get("B cell", 0)

# B cell serisi oluştur
b_cell_series = pd.Series({"B cell": b_cell_count})

# Birleştir (yeni pandas ile uyumlu şekilde)
final_counts = pd.concat([top_cell_types, b_cell_series])
final_counts = final_counts.sort_values(ascending=False).reset_index()
final_counts.columns = ["cell_type", "count"]

# Plotly çizimi
fig = px.bar(final_counts, x="cell_type", y="count", title="Top Cell Types (Including B cell)")
fig.update_layout(xaxis_title="Cell Type", yaxis_title="Count")
fig.show()


1.Neuron cell
The most abundant cell type according to the diagram.We can see that there is nearly 10M neuron cell in this data.

2.B cell
According to the diagram there is nearly 2M B cell.They produce antibodies and drives humoral immunity.Since there hasn't been that much B cell we might say that this data is not in trouble.

3.Fibroblast
According to the diagram there is nearly 3M fibroblast cell.

4.Oligodendrocyt
According to the diagram there is nearly 4.5M cell.

5.Macrophage
According to the diagram there is nearly 2M cell.