In [32]:
import scanpy as sc
import pandas as pd
import numpy as np
import os
import math

# Load your AnnData object
adata = sc.read_h5ad('/home/glennrdx/Documents/Research_Project/processed_h5ad/crypt_enriched_final.h5ad')

# Define the directory path where the CSVs will be saved
output_dir = "/home/glennrdx/Documents/Research_Project/scRNAseq-MSc-Analysis/upstream_analysis/crypt/differential_expression_scanpy"

# Ensure the directory exists (create it if it doesn't)
os.makedirs(output_dir, exist_ok=True)

# Get the unique cluster identifiers
leiden15 = adata.obs['leiden15'].unique()
results = {}

# Perform differential expression analysis for each cluster
for cluster in leiden15:

    # Subset the data for the cluster
    adata_cluster = adata[adata.obs['leiden15'] == cluster].copy()

    # Perform differential expression between Diet conditions
    sc.tl.rank_genes_groups(adata_cluster, groupby='Diet', method='t-test')  # or 'wilcoxon', 'logreg', etc.

    # Store the results
    results[cluster] = adata_cluster.uns['rank_genes_groups']

    # Unpack the results
    genes = results[cluster]['names']
    pvals = results[cluster]['pvals']
    pvals_adj = results[cluster]['pvals_adj']
    logfoldchanges = results[cluster]['logfoldchanges']

    # Flatten the tuples and create the DataFrame
    flat_results = []
    for i in range(len(genes)):
        for j in range(len(genes[i])):  # iterate over each item in the tuple
            logfc_value = logfoldchanges[i][j]

            flat_results.append({
                '': genes[i][j],
                'logFC': logfc_value,
                'P.Value': pvals[i][j],
                'adj.P.Val': pvals_adj[i][j]
            })
    
    # Convert to DataFrame
    df = pd.DataFrame(flat_results)

    # Define the file path for the current cluster
    file_path = os.path.join(output_dir, f'differential_expression_cluster_{cluster}.csv')
    
    # Save the DataFrame to a CSV file with the specified format
    df.to_csv(file_path, index=False)
    
    print(f"Saved results for cluster {cluster} to {file_path}")

Saved results for cluster Enterocyte Progenitor to /home/glennrdx/Documents/Research_Project/scRNAseq-MSc-Analysis/upstream_analysis/crypt/differential_expression_scanpy/differential_expression_cluster_Enterocyte Progenitor.csv
Saved results for cluster Goblet to /home/glennrdx/Documents/Research_Project/scRNAseq-MSc-Analysis/upstream_analysis/crypt/differential_expression_scanpy/differential_expression_cluster_Goblet.csv
Saved results for cluster EEC to /home/glennrdx/Documents/Research_Project/scRNAseq-MSc-Analysis/upstream_analysis/crypt/differential_expression_scanpy/differential_expression_cluster_EEC.csv
Saved results for cluster EEC Progenitor to /home/glennrdx/Documents/Research_Project/scRNAseq-MSc-Analysis/upstream_analysis/crypt/differential_expression_scanpy/differential_expression_cluster_EEC Progenitor.csv
Saved results for cluster Tuft Progenitor to /home/glennrdx/Documents/Research_Project/scRNAseq-MSc-Analysis/upstream_analysis/crypt/differential_expression_scanpy/diff

In [38]:
len(adata.var)

17573

In [19]:
adata_cluster_single = adata[adata.obs['leiden15'] == cluster]
sc.tl.rank_genes_groups(adata_cluster_single, groupby='Diet', method='t-test')

  adata.uns[key_added] = {}


In [21]:
adata_cluster_single.uns['rank_genes_groups']

{'params': {'groupby': 'Diet',
  'reference': 'rest',
  'method': 't-test',
  'use_raw': False,
  'layer': None,
  'corr_method': 'benjamini-hochberg'},
 'names': rec.array([('Rpl9-ps6', 'Fabp1'), ('Btg1', 'Agt'), ('Gm42418', 'Pdss1'),
            ..., ('Pdss1', 'Gm42418'), ('Agt', 'Btg1'),
            ('Fabp1', 'Rpl9-ps6')],
           dtype=[('CD', 'O'), ('HFHSD', 'O')]),
 'scores': rec.array([(  3.468802 , 10.030092 ), (  3.454072 ,  6.6669044),
            (  3.3278198,  4.2323904), ..., ( -4.2323904, -3.3278198),
            ( -6.6669044, -3.454072 ), (-10.030092 , -3.468802 )],
           dtype=[('CD', '<f4'), ('HFHSD', '<f4')]),
 'pvals': rec.array([(8.10150779e-04, 1.37964655e-17),
            (8.65007448e-04, 8.23993030e-10),
            (1.25490412e-03, 4.58004442e-05), ...,
            (4.58004442e-05, 1.25490412e-03),
            (8.23993030e-10, 8.65007448e-04),
            (1.37964655e-17, 8.10150779e-04)],
           dtype=[('CD', '<f8'), ('HFHSD', '<f8')]),
 'pvals_adj'