In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import os
import igraph
import leidenalg

In [None]:
pwd

In [None]:
cd ..

In [None]:
# read neural and epithelial file
adata = sc.read('E9E10NC.AC_neural_and_epi.h5ad')

In [None]:
import warnings
warnings.filterwarnings('ignore')
sc.pl.umap(adata,color=['leiden'],s=10,legend_loc="on data")

In [None]:
sc.pl.umap(adata, color='percent.mt', s=10, cmap='viridis', legend_loc='right margin')

In [None]:
sc.pl.umap(adata, color='nFeature_RNA', s=10, cmap='viridis', legend_loc='right margin')

In [None]:
sc.pl.umap(adata, color='nCount_RNA', s=10, cmap='viridis', legend_loc='right margin')

In [None]:
# Epithelial cluster - includes otic lineage - needed further subclustering and annotation. Seems like clusters 26, 13, 30. 

sc.pl.umap(adata,color=['Epcam', 'Sox3', 'Fbxo2', 'Tbx2' ],s=4, ncols=3)


# subset epithelial clusters

In [None]:
#subset Epithelial clusters 13, 26, 30

# Subset specific clusters
clusters_of_interest = ["13","26","30"] 
# Subset the data for the specified clusters
adata_subset_epcam = adata[adata.obs['leiden'].isin(clusters_of_interest)]

In [None]:
import warnings
warnings.filterwarnings('ignore')
sc.pl.umap(adata_subset_epcam,color=['leiden'],s=10,legend_loc="on data")

In [None]:
sc.pl.umap(adata_subset_epcam,color=['Trp63','Tbx2'  ],s=10, ncols=3)

In [None]:
sc.pp.highly_variable_genes(adata_subset_epcam, min_mean=0.0125, max_mean=3, min_disp=0.5)

In [None]:
adata_subset_epcam = adata_subset_epcam[:, adata_subset_epcam.var['highly_variable']]

In [None]:
sc.pp.normalize_total(adata_subset_epcam, target_sum=1e4)
sc.pp.log1p(adata_subset_epcam)
sc.pp.scale(adata_subset_epcam, max_value=10)

In [None]:
# Visualize expression distribution of key genes
adata_subset_epcam.raw = adata_subset_epcam  # Keep raw data intact for visualization
sc.pl.violin(adata_subset_epcam, ['Epcam', 'Trp63'], jitter=True)


In [None]:
sc.tl.pca(adata_subset_epcam, svd_solver="arpack")
sc.pl.pca_variance_ratio(adata_subset_epcam, log=True)

In [None]:
sc.pp.neighbors(adata_subset_epcam, n_neighbors=30, n_pcs=50)
sc.tl.leiden(adata_subset_epcam, resolution=1.0)
sc.tl.umap(adata_subset_epcam)
sc.pl.umap(adata_subset_epcam, color=['leiden'], s=10, legend_loc="on data")


In [None]:
# epidermal markers
sc.pl.umap(adata_copy,color=['Krt14', 'Krt5', 'Itga6', 'Trp63', 'Tbx2', 'Odam'],s=4, ncols=3)

In [None]:
sc.pl.violin(adata_subset_epcam, 
             keys=['Trp63', 'Ripply3'],
             jitter=True, 
             groupby='leiden', 
             show=True)

In [None]:
#otic markers - seems like cluster 9.
sc.pl.umap(adata_subset_epcam,color=['Fbxo2', 'Tbx2'],s=4, ncols=3)

In [None]:
# epidermal markers
sc.pl.umap(adata_subset_epcam,color=['Krt14', 'Krt5', 'Itga6', 'Trp63'],s=4, ncols=3)

In [None]:
adata_subset_epcam.write_h5ad("adata_subset_epcam.h5ad")

# lets make a DE list

In [None]:
import warnings
warnings.filterwarnings('ignore')

sc.tl.rank_genes_groups(adata_subset_epcam, groupby='leiden', method='wilcoxon')
sc.tl.dendrogram(adata_subset_epcam,groupby='leiden')
sc.pl.rank_genes_groups_dotplot(adata_subset_epcam, n_genes=5, swap_axes=True)#,save='deg.pdf')

In [None]:
sc.tl.rank_genes_groups(adata_subset_epcam, groupby='leiden', method='wilcoxon')

In [None]:
de_results = adata_subset_epcam.uns['rank_genes_groups']

In [None]:
de_genes_list = []
for group in de_results['names'].dtype.names:  # Iterating over each cluster
    group_genes = pd.DataFrame({
        'Gene': de_results['names'][group],
        'Log Fold Change': de_results['logfoldchanges'][group],
        'P-Value': de_results['pvals'][group],
        'Adjusted P-Value': de_results['pvals_adj'][group]
    })
    group_genes['Cluster'] = group  # Adding cluster information for later reference
    de_genes_list.append(group_genes)


In [None]:
de_genes_df = pd.concat(de_genes_list)

In [None]:
de_genes_df.to_excel("DE_genes.xlsx", index=False)