In [None]:
import scanpy as sc
import igraph
import leidenalg
import numpy as np

In [None]:
# Read the file
adata = sc.read_h5ad("E9E10_all.h5ad")

In [None]:
# Inspect the data
print(adata)

In [None]:
# Visualize the clusters
sc.pl.umap(
    adata,
    color='leiden',               
    legend_loc='on data',         # Display labels directly on the UMAP
    title='Cluster Visualization',  # Add a title
    size=40,                      # Adjust marker size                
    palette='Set2',               # Use a custom color palette
)

# subset neural and epithelial cells

In [None]:
## Subset specific clusters
clusters_of_interest = ["9","13","6","5","2","15","8","16","12","7"] 
# Subset the data for the specified clusters
adata_subset = adata[adata.obs['leiden'].isin(clusters_of_interest)]

# Save the subsetted data for further analysis
adata_subset.write_h5ad("E9E10_neural_and_epi.h5ad")


In [None]:
#further subset NC and AC

samples_of_interest = ["E9NC","E9AC","E10NC","E10AC"]  # Replace with your specific sample IDs

# Subset the AnnData object
adata_subset = adata_subset[adata_subset.obs['orig.ident'].isin(samples_of_interest)]

# Inspect the subsetted data
print(adata_subset)

#69296 cells

# Save the subsetted data for further analysis
adata_subset.write_h5ad("E9E10NC.AC_neural_and_epi.h5ad")


In [None]:
print(adata_subset)
#69296 CELLS

In [None]:
adata_subset = sc.read_h5ad("E9E10NC.AC_neural_and_epi.h5ad")

In [None]:
#check cell number 
orig_ident_counts = adata_subset.obs['orig.ident'].value_counts()
print("Counts of each 'orig.ident':")
print(orig_ident_counts)


In [None]:
#show the subset umap
sc.pl.umap(
    adata_subset,
    color='leiden',               # Color by cluster
    legend_loc='on data',         # Display labels directly on the UMAP
    title='E9E10_neural_and_epi',  # Add a title
    size=10,                      # Adjust marker size
    palette='Set2',               # Use a custom color palette
)

# Processing and run umap

In [None]:
# Normalize the subset
sc.pp.normalize_total(adata_subset, target_sum=1e4)

# Log-transform the data
sc.pp.log1p(adata_subset)

# Identify highly variable genes
sc.pp.highly_variable_genes(adata_subset, n_top_genes=2000)

# Scale the data
sc.pp.scale(adata_subset, max_value=10)

In [None]:
# Compute PCA
sc.tl.pca(adata_subset, svd_solver='arpack')

# Visualize explained variance (optional)
sc.pl.pca_variance_ratio(adata_subset, log=True)


In [None]:
# Compute neighborhood graph
sc.pp.neighbors(adata_subset, n_neighbors=10, n_pcs=30)  # Adjust n_pcs based on PCA results


In [None]:
# Perform clustering
sc.tl.leiden(adata_subset, resolution=0.5)  # Adjust resolution 
# Visualize clusters on UMAP
sc.tl.umap(adata_subset)

In [None]:
sc.pl.umap(adata_subset, color='leiden', legend_loc='on data', palette='Dark2')

In [None]:
sc.pl.umap(adata_subset, color='leiden',  palette='Set2', )

In [None]:
## Save with Scanpy
sc.pl.umap(adata_subset, color='leiden', legend_loc='on data', palette='Dark2', save='E9E10NC.AC_neural_and_epi_umap.tiff')

In [None]:
sc.pl.umap(adata_subset, color='leiden', legend_loc='on data', palette='Dark2')


In [None]:
# Save again
adata_subset.write_h5ad("E9E10NC.AC_neural_and_epi.h5ad")

# check different markers for annotation

In [None]:
# NC
sc.pl.umap(adata_subset, color=['Foxd3','Sox10'],size=10 )

In [None]:
#violin plot
sc.pl.violin(adata_subset, ['Foxd3','Sox10'], groupby='leiden')


In [None]:
# Neuron
sc.pl.umap(adata_subset, color=['Tubb3','Elavl3'], size=10)

In [None]:
#Epi
sc.pl.umap(adata_subset, color=['Epcam','Krt8'],size=10 )

In [None]:
#Otic vesicle
sc.pl.umap(adata_subset, color=['Oc90','Pax2','Foxg1'], size=10)

In [None]:
#find markers
# Compute markers
sc.tl.rank_genes_groups(adata_subset, groupby='leiden', method='wilcoxon')

# View top markers
sc.pl.rank_genes_groups(adata_subset, n_genes=50, sharey=False)

# Extract and save results
markers_df = pd.DataFrame({
    group: adata_subset.uns['rank_genes_groups']['names'][group]
    for group in adata_subset.uns['rank_genes_groups']['names'].dtype.names
})
markers_df.to_csv("E9E10NC.AC_neural_and_epi.markers.csv")


In [None]:
# Extract marker gene results
result = adata_subset.uns['rank_genes_groups']
groups = result['names'].dtype.names  # Cluster names

# Create a comprehensive DataFrame with all statistics
markers_df = pd.DataFrame()

for group in groups:
    group_df = pd.DataFrame({
        'gene': result['names'][group],
        'score': result['scores'][group],
        'logfoldchange': result['logfoldchanges'][group],
        'pval': result['pvals'][group],
        'pval_adj': result['pvals_adj'][group],
    })
    group_df['cluster'] = group  # Add cluster identifier
    markers_df = pd.concat([markers_df, group_df], ignore_index=True)

# Save the full table to a CSV file
markers_df.to_csv("marker_genes_full.csv", index=False)

# Display the first few rows of the DataFrame
print(markers_df.head())


# Fig1B, only AC is showed in UMAP and with color changed

In [None]:
adata_AC=sc.read("adata_AC.h5ad")

In [None]:
# Assuming clusters_2301 is a column in adata_AC.obs
print(adata_AC.obs["clusters_2301"].value_counts())

In [None]:
custom_colors = {
       # Refined Shades of Red
    '5':  "#E57373",   # muted coral red
    '23': "#C62828",   # deep crimson red
    '13': "#B71C1C",   # dark brick red
    '30': "#9B111E",   # rich garnet red
    
    '35': "#fef9e7",  # classic ruby red
'36': "#f7dc6f",  # bright red 
'37': "#f4d03f",  # fuchsia red
    
    
    # Refined Shades of Yellow
    '11': "#F9E79F",  # pastel yellow
    '12': "#FFEB3B",  # muted golden yellow 
    '21': "#F4A261",  # warm amber
    
    
     '8':  "#4CAF50",  # emerald green
    '19': "#82e0aa",  # golden mustard green
    '22': "#66BB6A",  # vibrant green
      '25': "#81C784",  # soft green
    '22': "#1e8449",  #  green

    # Refined Shades of Green
  '6':  "#2E7D32",   # forest green
    '14': "#A5D6A7",  # pastel green
        '7':  "#0D47A1",   # navy blue
    '1':  "#4B7289",   # desaturated steel blue 
    
  

    # Refined Shades of Blue
    '200': "#90CAF9",  # soft sky blue
    '300': "#64B5F6",  # classic blue
    '20': "#5DADE2",   # muted aqua blue
    '10': "#2196F3",   # bright medium blue
    '3':  "#1565C0",   # deep cobalt blue
    '2':  "#1E88E5",   # rich medium blue
    '4':  "#5F9EA0",   # cadet blue
      '16': "#ebf5fb",   # blue
   

    # Refined Shades of Purple
    '100': "#7E57C2",  # lavender purple
    '27': "#9575CD",   # soft purple 

     # Refined Shades of Red
    '5':  "#E57373",   # muted coral red


  
    # Other populations to be excluded in coupling plots

  
      '26': "#A1887F",   # light taupe brown
    '29': "#D7CCC8",   # soft beige brown
    '31': "#BCAAA4",   # warm sandy brown
}


In [None]:
# Update the leiden_colors in adata.uns to use your custom colors
adata_AC.uns['leiden_colors'] = list(custom_colors.values())

In [None]:
import pandas as pd

# Define the custom order
custom_order = [ '5', '29', '31',  '23',
               '16',  '9', '12', '19', '11', '21',
                '0', '8', '14', 
                '22', '25','15',  '200', '300','1', '3', '10', '20', '2', '7', '4','6', '27', '100', '13', '30', 
                '26']

# Reorder the 'clusters_2301' column according to the custom order
adata_AC.obs['clusters_2301'] = pd.Categorical(adata_AC.obs['clusters_2301'], categories=custom_order, ordered=True)

# Verify the new order
print(adata_AC.obs['clusters_2301'].cat.categories)


In [None]:
sc.pl.umap(adata_AC, color='clusters_2301', title='', 
           s=10, legend_loc='on data',palette=adata_AC.uns['leiden_colors'],
           frameon=False, save='E9E10.1202_legendondata.pdf')

# Fig1C, highlight tdTomato+ cells, cloneID+ cells and multicellular clones

In [None]:
#tdTomato expression

# Step 1: Extract the expression data for the "Tomato-N" gene
tomato_expression = adata_AC[:, "Tomato-N"].X.toarray().flatten()

# Step 2: Identify cells with positive expression
pos_ids = np.where(tomato_expression > 0)[0]

# Step 3: Create a new column in the obs dataframe to indicate positive expression
adata_AC.obs['Tomato_positive'] = False
adata_AC.obs.loc[adata_AC.obs_names[pos_ids], 'Tomato_positive'] = True

In [None]:
# Check how many cells are being highlighted
print(f"Number of cells to highlight: {len(pos_ids)}")

In [None]:
sc.pl.umap(
    adata_AC,
    color='Tomato_positive',
    palette={'True': '#a93226', 'False': 'darkgrey'},  # Use strings as keys
    size=10,  # Adjust the size of the points
    legend_loc='none',  # Remove the legend
    frameon=False,  # Remove the axes
    title='Tomato-N Expression'
)

In [None]:
# Load the CSV file
df = pd.read_csv('TREX/E9E10NC.AC_neural.epi.clone.csv')

# Check the first few rows of the dataframe to understand its structure
print(df.head())


In [None]:
# Check the number of unique cloneid values
unique_clone_ids = df['cloneid'].nunique()

# Print the result
print(f"Number of unique cloneids: {unique_clone_ids}")

In [None]:
# Extract cellid.old where cloneid is not NaN
cloneid_cells = df.loc[df['cloneid'].notna(), 'cellid.old'].tolist()

# Print the first few cells to verify
print(cloneid_cells[:10])

In [None]:
print(len(cloneid_cells))

In [None]:
# Create a new categorical column in `adata.obs` to mark cells to highlight
adata_AC.obs['highlight'] = pd.Categorical(adata_AC.obs.index.isin(cloneid_cells))

# Assign colors for the 'highlight' categories
# 'True' will correspond to the highlighted cells, 'False' to others
adata_AC.uns['highlight_colors'] = ['lightgrey', '#117a65']

In [None]:
# Plot the UMAP, coloring by the 'highlight' column
# Note: size is set for all cells, but we will adjust the drawing order manually
sc.pl.umap(adata_AC, 
           color='highlight',  # Color by the 'highlight' column
           size=5,  # Same size for all cells, we control the z-order
           add_outline=False,  # Remove outline
           title="cloneID+ cells",  # Title of the plot
           show=False)  # Show=False to control drawing order manually

In [None]:
# Load the CSV file
df = pd.read_csv('TREX/E9E10.neural.epi.clone_2_and_more.csv')

# Check the first few rows of the dataframe to understand its structure
print(df.head())

In [None]:
# Extract cellid.old where cloneid is not NaN
cloneid_cells = df.loc[df['cloneid'].notna(), 'cellid.old'].tolist()

# Print the first few cells to verify
print(cloneid_cells[:10])

In [None]:
print(len(cloneid_cells))

In [None]:
# Create a new categorical column in `adata.obs` to mark cells to highlight
adata_AC.obs['highlight'] = pd.Categorical(adata_AC.obs.index.isin(cloneid_cells))

# Assign colors for the 'highlight' categories
# 'True' will correspond to the highlighted cells, 'False' to others
adata_AC.uns['highlight_colors'] = ['lightgrey', '#283747']

In [None]:
# Plot the UMAP, coloring by the 'highlight' column
# Note: size is set for all cells, but we will adjust the drawing order manually
sc.pl.umap(adata_AC, 
           color='highlight',  # Color by the 'highlight' column
           size=5,  # Same size for all cells, we control the z-order
           add_outline=False,  # Remove outline
           title="multicellular cloneID+ cells",  # Title of the plot
           show=False)  # Show=False to control drawing order manually