In [None]:
#--------Loading in Packages------#
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy
from scipy.io import mmread
from scipy.sparse import csr_matrix
import seaborn as sns
import os
import tacco as tc

In [None]:
#------Loading in 8um bin filtered dataset-------#
adata = sc.read_h5ad("/projects/b1217/Edward/Python_Projects/HHA/ELfullcdata/ELfull_sdata8um-f100.h5ad")
adata

In [None]:
#--------Loading in Matrix scRNA-seq AnnData-------#
Matrix = sc.read_h5ad("/projects/b1217/HHA/Bulb_Recluster_5_22_AnnData/Matrix_Palantir_6_11_25.h5ad")
#create new column in var as names
Matrix.var["names"] = Matrix.var.index.copy()
Matrix.X = Matrix.layers['raw']
Matrix

In [None]:
#--------Color Dictionaries------#
#Colors for Fine Annotation
fine_colors = {"Lower COL17": "#89C75F",
               "Upper COL17": "#3BBCA8",
               "LPC": "#208A42",
               "Medulla": "#0C727C",
               "Early_Cortex": "#9ECAE1",
               "Middle_Cortex": "#4292C6",
               "Late_Cortex": "#08306B",
               "Early_Cuticle": "#E6C2DC",
               "Middle_Cuticle": "#C06CAB",
               "Late_Cuticle": "#89288F",
               "Early_IRS_I": "#D8A767",
               "Early_IRS_II": "#F47D2B",
               "IRS_Henle": "#F37B7D",
               "IRS_Huxley": "#7E1416",
               "IRS_Cuticle": "#D24B27"}
#Colors for Broad Annotation
broad_colors = {"COL17": "#89C75F",
               "LPC": "#208A42",
               "Cortex": "#4292C6",
               "Cuticle": "#89288F",
               "IRS": "#D24B27"}

#--------Plotting UMAPs------#
#Fine Annotation
sc.pl.umap(Matrix, color="MatrixAnnotationFine", palette = fine_colors,
    frameon=False, size = 15, legend_loc="on data")
#Broad Annotation
sc.pl.umap(Matrix, color="MatrixAnnotationBroad", palette = broad_colors,
           legend_loc="on data", frameon = False, size = 15)
sc.pl.umap(Matrix, color="GeneralAnnotation", legend_loc="on data",
           frameon = False, size = 15)

In [None]:
#--------Loading in Full scRNA Atlas-----#
HHA = sc.read_h5ad("/projects/b1217/Edward/R_Projects/HHA/h5ad/20250516_HHA-SC-Final.h5ad")
HHA

In [None]:
#--------Plotting UMAPs------#
#General Annotation
sc.pl.umap(HHA, color="GeneralAnnotation", frameon=False, legend_loc="on data")
#Final Annotation
sc.pl.umap(HHA, color="FinalAnnotation", frameon=False, legend_loc="on data")

In [None]:
#-----Loading in Mask for QC-------#
#Contains whether cells were filtered during QC selection during Matrix subclustering or are in the matrix. 
QC_Metadata = pd.read_csv("/projects/b1217/HHA/Multiome_Seurat/HHA_Full_Matrix_Mask_5_30_25.csv")
QC_Metadata.head()

In [None]:
#------Filtering for Cells retained after Additional Matrix QC-------#
QC_Mask = QC_Metadata["Barcode"][QC_Metadata["QCFiltered"] == True].astype("str")
HHA = HHA[~HHA.obs_names.isin(QC_Mask)].copy()
HHA

In [None]:
#-------Writing Filtered Anndata to h5ad-----#
HHA.write_h5ad("/projects/b1042/YiLab/HHA_scRNA_Spatial_Shared/scanpy_data/20250516_HHA-SC-Final-QCFiltered.h5ad")

In [None]:
#-----Crossmapping var_names to EnsemblID--------#
symbol_to_ensembl = dict(zip(HHA.var["names"], HHA.var["ensembl"]))
def map_to_ensembl(name):
    if name.startswith("ENSG"):
        return name
    return symbol_to_ensembl.get(name, "Unknown")
Matrix.var["ensembl"] = Matrix.var["names"].apply(map_to_ensembl)

#-------Creating EnsemblID column and set as index--------#
Matrix = Matrix[:, Matrix.var["ensembl"] != "Unknown"].copy()
new_index = Matrix.var["ensembl"]
Matrix.var.index = new_index
Matrix.var_names = new_index
Matrix.var

In [None]:
#-------Mapping Matrix Annotations to Filtered HHA--------#
#All non-matrix cells labeled Unknown
HHA.obs.index = HHA.obs.index.astype(str)
Matrix.obs.index = Matrix.obs.index.astype(str)
matrix_mapping = Matrix.obs["MatrixAnnotationFine"].to_dict()
HHA.obs["MatrixAnnotation"] = HHA.obs.index.to_series().map(matrix_mapping).fillna("Unknown")

In [None]:
#------Printing Cell Counts for each Annotation------#
for cat, n in HHA.obs["MatrixAnnotation"].value_counts().items():
    print(f"{cat}: {n}")

In [None]:
#-------Converting gene expression matrices to float for TACCO------#
HHA.X = HHA.X.astype(np.float64)
adata.X = adata.X.astype(np.float64)

In [None]:
#------Running TACCO on Full Dataset-------#
tc.tl.annotate(adata, HHA, annotation_key='MatrixAnnotation', result_key='matrix_annotation')

In [None]:
#-----Retrieving Best Annotation Mapping for Each Cluster------#
tc.utils.get_maximum_annotation(adata, 'matrix_annotation', result_key='matrix_annotation')
adata

In [None]:
#-------Plotting Initial TACCO Results-----#
#mapping unknown to transparent
matrix_colors_spatial = {"Lower COL17": "#89C75F",
               "Upper COL17": "#3BBCA8",
               "LPC": "#208A42",
               "Medulla": "#0C727C",
               "Early_Cortex": "#9ECAE1",
               "Middle_Cortex": "#4292C6",
               "Late_Cortex": "#08306B",
               "Early_Cuticle": "#E6C2DC",
               "Middle_Cuticle": "#C06CAB",
               "Late_Cuticle": "#89288F",
               "Early_IRS_I": "#D8A767",
               "Early_IRS_II": "#F47D2B",
               "IRS_Henle": "#F37B7D",
               "IRS_Huxley": "#7E1416",
               "IRS_Cuticle": "#D24B27",
                "Unknown": "#00000000"}

#--------Plotting Full Image-------#
fig, ax = plt.subplots(figsize=(40, 40))
sc.pl.spatial(adata, color=["matrix_annotation"], img_key="hires", ax=ax, palette = matrix_colors_spatial)

In [None]:
#-------Plotting Annotations on Original UMAP--------#
#Mapping unknown to gray
matrix_colors_gray = {"Lower COL17": "#89C75F",
               "Upper COL17": "#3BBCA8",
               "LPC": "#208A42",
               "Medulla": "#0C727C",
               "Early_Cortex": "#9ECAE1",
               "Middle_Cortex": "#4292C6",
               "Late_Cortex": "#08306B",
               "Early_Cuticle": "#E6C2DC",
               "Middle_Cuticle": "#C06CAB",
               "Late_Cuticle": "#89288F",
               "Early_IRS_I": "#D8A767",
               "Early_IRS_II": "#F47D2B",
               "IRS_Henle": "#F37B7D",
               "IRS_Huxley": "#7E1416",
               "IRS_Cuticle": "#D24B27",
                "Unknown": "#808080"}
sc.pl.umap(HHA, color="MatrixAnnotation", palette = matrix_colors_gray, frameon = False)

In [None]:
#-------Second Iteration TACCO Run on spots mapped to matrix-------#
#Subsetting for spots mapped to a matrix cell type
adata_sub = adata[~adata.obs["matrix_annotation"].isin(["Unknown"])].copy()
adata_sub

In [None]:
#------Rerunning Annotation using Matrix Anndata Object------#
tc.tl.annotate(adata_sub, Matrix, annotation_key='MatrixAnnotationFine', result_key='MatrixAnnotationFine')

In [None]:
#-----Retrieving Best Annotation Mapping for Each Cluster------#
tc.utils.get_maximum_annotation(adata_sub, 'MatrixAnnotationFine', result_key='MatrixAnnotationFine')

In [None]:
#--------Plotting Final TACCO Annotations in Space------#
fig, ax = plt.subplots(figsize=(40, 40))
sc.pl.spatial(adata_sub, color=["MatrixAnnotationFine"], img_key="hires", ax=ax, palette = matrix_colors_spatial)
plt.tight_layout()
#Saving as png
fig.savefig("/projects/b1217/HHA/Bulb_Seurat_Plots/Spatial/TACCO_Mapping_Spatial.png",
            dpi = 600, pad_inches = 0.5, bbox_inches = "tight")

In [None]:
#-------Saving Spatial Matrix Subset Anndata to h5ad-------#
#Temp for sharing
adata_sub.write_h5ad("/projects/b1217/HHA/Bulb_Spatial/20250516_HHA_Spatial_TACCO_Matrix_Subset.h5ad")
#Personal Storage
adata_sub.write_h5ad("/projects/b1217/HHA/Bulb_Spatial/20250516_HHA_Spatial_TACCO_Matrix_Subset.h5ad")