In [2]:
import scanpy as sc
import pandas as pd
import numpy as np
import os

# Load the H5AD file
adata = sc.read_h5ad('/home/glennrdx/Documents/Research_Project/processed_h5ad/crypt_enriched_final.h5ad')

In [6]:
# Get unique cell types
cell_types = adata.obs['leiden15'].unique()

# Create main directory
os.makedirs("/home/glennrdx/Documents/Research_Project/scRNAseq-MSc-Analysis/3. upstream_analysis/crypt/differential_expression_deseq2/DESeq2_data_files/", exist_ok=True)

for cell_type in cell_types:
    # Create directory for each cell type
    cell_type_dir = os.path.join("/home/glennrdx/Documents/Research_Project/scRNAseq-MSc-Analysis/3. upstream_analysis/crypt/differential_expression_deseq2/DESeq2_data_files/", cell_type)
    os.makedirs(cell_type_dir, exist_ok=True)
    
    # Filter data for the current cell type
    adata_subset = adata[adata.obs['leiden15'] == cell_type]
    
    # Create count matrix
    count_matrix = pd.DataFrame(adata_subset.X.toarray(),
                                index=adata_subset.obs_names,
                                columns=adata_subset.var_names)
    
    # Aggregate counts by sample
    count_matrix = count_matrix.groupby(adata_subset.obs['Sample'], observed=True).sum()
    
    # Save count matrix
    count_matrix.to_csv(os.path.join(cell_type_dir, "count_matrix.csv"))
    
    # Create metadata
    metadata = adata_subset.obs[['Sample', 'Diet']].drop_duplicates()
    metadata = metadata.set_index('Sample')
    
    # Rename columns in metadata if necessary
    metadata = metadata.rename(columns={
        'gene_symbol': 'X',
        'log2FoldChange': 'LogFC',
        'pvalue': 'P.Value',
        'padj': 'adj.P.Val'
    })
    
    # Save metadata
    metadata.to_csv(os.path.join(cell_type_dir, "metadata.csv"))
    
    print(f"Processed {cell_type}")

print("Done!")

Processed Enterocyte Progenitor
Processed Goblet
Processed EEC
Processed EEC Progenitor
Processed Tuft Progenitor
Processed ISC
Processed Goblet Progenitor
Processed Tuft
Processed Enterocyte
Processed Not Annotated
Processed Paneth
Processed Paneth Progenitor
Done!
