In [None]:
import scanpy as sc
import igraph
import leidenalg
import numpy as np

In [None]:
# Read the file
adata = sc.read_h5ad("E9E10_all.h5ad")

In [None]:
# Inspect the data
print(adata)

In [None]:
# Visualize the clusters
sc.pl.umap(
    adata,
    color='leiden',               
    legend_loc='on data',         # Display labels directly on the UMAP
    title='Cluster Visualization',  # Add a title
    size=40,                      # Adjust marker size                
    palette='Set2',               # Use a custom color palette
)

# subset neural and epithelial cells

In [None]:
## Subset specific clusters
clusters_of_interest = ["9","13","6","5","2","15","8","16","12","7"] 
# Subset the data for the specified clusters
adata_subset = adata[adata.obs['leiden'].isin(clusters_of_interest)]

# Save the subsetted data for further analysis
adata_subset.write_h5ad("E9E10_neural_and_epi.h5ad")


In [None]:
#further subset NC and AC, discard EXC

samples_of_interest = ["E9NC","E9AC","E10NC","E10AC"]  # Replace with your specific sample IDs

# Subset the AnnData object
adata_subset = adata_subset[adata_subset.obs['orig.ident'].isin(samples_of_interest)]

# Inspect the subsetted data
print(adata_subset)

#69296 cells

# Save the subsetted data for further analysis
adata_subset.write_h5ad("E9E10NC.AC_neural_and_epi.h5ad")


In [None]:
print(adata_subset)
#69296 CELLS

In [None]:
adata_subset = sc.read_h5ad("E9E10NC.AC_neural_and_epi.h5ad")

In [None]:
#check cell number 
orig_ident_counts = adata_subset.obs['orig.ident'].value_counts()
print("Counts of each 'orig.ident':")
print(orig_ident_counts)


In [None]:
#show the subset umap
sc.pl.umap(
    adata_subset,
    color='leiden',               # Color by cluster
    legend_loc='on data',         # Display labels directly on the UMAP
    title='E9E10_neural_and_epi',  # Add a title
    size=10,                      # Adjust marker size
    palette='Set2',               # Use a custom color palette
)

# Processing and run umap

In [None]:
# Normalize the subset
sc.pp.normalize_total(adata_subset, target_sum=1e4)

# Log-transform the data
sc.pp.log1p(adata_subset)

# Identify highly variable genes
sc.pp.highly_variable_genes(adata_subset, n_top_genes=2000)

# Scale the data
sc.pp.scale(adata_subset, max_value=10)

In [None]:
# Compute PCA
sc.tl.pca(adata_subset, svd_solver='arpack')

# Visualize explained variance (optional)
sc.pl.pca_variance_ratio(adata_subset, log=True)


In [None]:
# Compute neighborhood graph
sc.pp.neighbors(adata_subset, n_neighbors=10, n_pcs=30)  # Adjust n_pcs based on PCA results


In [None]:
# Perform clustering
sc.tl.leiden(adata_subset, resolution=0.5)  # Adjust resolution 
# Visualize clusters on UMAP
sc.tl.umap(adata_subset)

In [None]:
sc.pl.umap(adata_subset, color='leiden', legend_loc='on data', palette='Dark2')

In [None]:
sc.pl.umap(adata_subset, color='leiden',  palette='Set2', )

In [None]:
## Save with Scanpy
sc.pl.umap(adata_subset, color='leiden', legend_loc='on data', palette='Dark2', save='E9E10NC.AC_neural_and_epi_umap.tiff')

In [None]:
sc.pl.umap(adata_subset, color='leiden', legend_loc='on data', palette='Dark2')


In [None]:
# Save again
adata_subset.write_h5ad("E9E10NC.AC_neural_and_epi.h5ad")

# check different markers for annotation

In [None]:
# NC
sc.pl.umap(adata_subset, color=['Foxd3','Sox10'],size=10 )

In [None]:
#violin plot
sc.pl.violin(adata_subset, ['Foxd3','Sox10'], groupby='leiden')


In [None]:
# Neuron
sc.pl.umap(adata_subset, color=['Tubb3','Elavl3'], size=10)

In [None]:
#Epi
sc.pl.umap(adata_subset, color=['Epcam','Krt8'],size=10 )

In [None]:
#Otic vesicle
sc.pl.umap(adata_subset, color=['Oc90','Pax2','Foxg1'], size=10)

In [None]:
#find markers
# Compute markers
sc.tl.rank_genes_groups(adata_subset, groupby='leiden', method='wilcoxon')

# View top markers
sc.pl.rank_genes_groups(adata_subset, n_genes=50, sharey=False)

# Extract and save results
markers_df = pd.DataFrame({
    group: adata_subset.uns['rank_genes_groups']['names'][group]
    for group in adata_subset.uns['rank_genes_groups']['names'].dtype.names
})
markers_df.to_csv("E9E10NC.AC_neural_and_epi.markers.csv")


In [None]:
# Extract marker gene results
result = adata_subset.uns['rank_genes_groups']
groups = result['names'].dtype.names  # Cluster names

# Create a comprehensive DataFrame with all statistics
markers_df = pd.DataFrame()

for group in groups:
    group_df = pd.DataFrame({
        'gene': result['names'][group],
        'score': result['scores'][group],
        'logfoldchange': result['logfoldchanges'][group],
        'pval': result['pvals'][group],
        'pval_adj': result['pvals_adj'][group],
    })
    group_df['cluster'] = group  # Add cluster identifier
    markers_df = pd.concat([markers_df, group_df], ignore_index=True)

# Save the full table to a CSV file
markers_df.to_csv("marker_genes_full.csv", index=False)

# Display the first few rows of the DataFrame
print(markers_df.head())
