# Cell Type Distribution Analysis

This notebook explores cell type distributions across different tissues using the CZI CELLxGENE Census.

## Setup

In [None]:
import sys
sys.path.append('../src')

import cellxgene_census
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from data_loader import load_cell_metadata, get_available_tissues, get_cell_type_summary
from visualization import plot_cell_type_heatmap, plot_top_cell_types

# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

print("✓ Imports successful!")

## 1. Explore Available Tissues

First, let's see what tissues are available in the Census.

In [None]:
# Get tissue counts
tissue_counts = get_available_tissues(organism="homo_sapiens")

# Display top 20 tissues
print("Top 20 tissues by cell count:\n")
print(tissue_counts.head(20))

In [None]:
# Visualize tissue distribution
fig, ax = plt.subplots(figsize=(12, 8))
tissue_counts.head(20).plot(kind='barh', ax=ax, color='steelblue')
ax.set_xlabel('Number of Cells')
ax.set_ylabel('Tissue')
ax.set_title('Top 20 Tissues by Cell Count')
plt.tight_layout()
plt.show()

## 2. Load Cell Metadata

Choose specific tissues to analyze in detail.

In [None]:
# Choose tissues to analyze
TARGET_TISSUES = ["blood", "lung", "brain", "heart"]

# Load cell metadata
cell_data = load_cell_metadata(
    organism="homo_sapiens",
    tissues=TARGET_TISSUES,
    max_cells_per_tissue=5000
)

# Display first few rows
cell_data.head()

In [None]:
# Quick stats
print(f"Total cells: {len(cell_data):,}")
print(f"Unique tissues: {cell_data['tissue'].nunique()}")
print(f"Unique cell types: {cell_data['cell_type'].nunique()}")
print(f"\nData shape: {cell_data.shape}")

## 3. Analyze Cell Type Distribution

In [None]:
# Get summary statistics
summary = get_cell_type_summary(cell_data)
summary.head(10)

In [None]:
# Most abundant cell types
top_cell_types = cell_data['cell_type'].value_counts().head(10)
print("Top 10 most abundant cell types:\n")
for i, (cell_type, count) in enumerate(top_cell_types.items(), 1):
    print(f"{i:2d}. {cell_type}: {count:,} cells")

## 4. Visualizations

In [None]:
# Heatmap of cell types across tissues
plt.figure(figsize=(12, 10))
sns.heatmap(
    summary.head(20),  # Top 20 cell types
    cmap='YlOrRd',
    cbar_kws={'label': 'Cell Count'},
    linewidths=0.5
)
plt.title('Cell Type Distribution Across Tissues (Top 20 Cell Types)', pad=20)
plt.xlabel('Tissue')
plt.ylabel('Cell Type')
plt.tight_layout()
plt.show()

In [None]:
# Cell type distribution per tissue
fig, axes = plt.subplots(2, 2, figsize=(14, 12))
axes = axes.flatten()

for idx, tissue in enumerate(TARGET_TISSUES):
    tissue_data = cell_data[cell_data['tissue'] == tissue]
    cell_counts = tissue_data['cell_type'].value_counts().head(10)

    axes[idx].barh(range(len(cell_counts)), cell_counts.values, color='steelblue')
    axes[idx].set_yticks(range(len(cell_counts)))
    axes[idx].set_yticklabels(cell_counts.index, fontsize=9)
    axes[idx].set_xlabel('Number of Cells')
    axes[idx].set_title(f'{tissue.title()} - Top 10 Cell Types')
    axes[idx].invert_yaxis()

plt.tight_layout()
plt.show()

## 5. Tissue-Specific Analysis

Deep dive into a specific tissue.

In [None]:
# Choose a tissue to analyze in detail
FOCUS_TISSUE = "blood"

tissue_cells = cell_data[cell_data['tissue'] == FOCUS_TISSUE]

print(f"Analysis of {FOCUS_TISSUE.title()}:")
print(f"Total cells: {len(tissue_cells):,}")
print(f"Unique cell types: {tissue_cells['cell_type'].nunique()}")
print(f"\nCell type distribution:")
print(tissue_cells['cell_type'].value_counts().head(10))

In [None]:
# Pie chart of cell composition
fig, ax = plt.subplots(figsize=(10, 8))
cell_counts = tissue_cells['cell_type'].value_counts().head(8)

ax.pie(
    cell_counts.values,
    labels=cell_counts.index,
    autopct='%1.1f%%',
    startangle=90
)
ax.set_title(f'Cell Type Composition in {FOCUS_TISSUE.title()}')
plt.show()

## 6. Custom Analysis

Add your own analysis here!

In [None]:
# Your custom analysis code here
# For example:
# - Compare disease vs normal samples
# - Analyze sex differences in cell composition
# - Look at specific cell types across tissues
# - etc.

## 7. Export Results

In [None]:
# Save summary to CSV
summary.to_csv('../results/cell_type_summary.csv')
print("✓ Summary saved to results/cell_type_summary.csv")

# Save cell data sample
cell_data.head(1000).to_csv('../results/cell_data_sample.csv', index=False)
print("✓ Cell data sample saved to results/cell_data_sample.csv")