In [None]:
#--------Loading in Packages------#
import scanpy as sc
import anndata as ad
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import scipy
from scipy.io import mmread
from scipy.sparse import csr_matrix
import seaborn as sns
import os
import tacco as tc

In [None]:
#------Loading in 8um bin filtered dataset-------#
adata = sc.read_h5ad("/projects/b1217/HHA/EL_S1_adata/EL_S1_full_sdata8um-f100.h5ad")
adata

In [None]:
#-------Creating EnsemblID column and set as index--------#
adata.var['ensembl'] = adata.var['gene_ids'].copy()
adata = adata[:, adata.var["ensembl"] != "Unknown"].copy()
new_index = adata.var["ensembl"]
adata.var.index = new_index
adata.var_names = new_index
adata.var

In [None]:
#--------Loading in Full scRNA Atlas-----#
HHA = sc.read_h5ad("/projects/b1217/Edward/R_Projects/HHA/h5ad/20250516_HHA-SC-Final.h5ad")
HHA

In [None]:
#--------Plotting UMAPs------#
#General Annotation
sc.pl.umap(HHA, color="GeneralAnnotation", frameon=False, legend_loc="on data")
#Final Annotation
sc.pl.umap(HHA, color="FinalAnnotation", frameon=False, legend_loc="on data")

In [None]:
#------Printing Cell Counts for each Annotation------#
for cat, n in HHA.obs["FinalAnnotation"].value_counts().items():
    print(f"{cat}: {n}")

In [None]:
#--------Loading in Matrix scRNA-seq AnnData-------#
Matrix = sc.read_h5ad("/projects/b1217/HHA/Bulb_Recluster_5_22_AnnData/Matrix_Palantir_6_11_25.h5ad")
#create new column in var as names
Matrix.var["symbol"] = Matrix.var.index.copy()
Matrix

In [None]:
#--------Color Dictionaries------#
#Colors for Fine Annotation
fine_colors = {"Lower COL17": "#89C75F",
               "Upper COL17": "#3BBCA8",
               "LPC": "#208A42",
               "Medulla": "#0C727C",
               "Early_Cortex": "#9ECAE1",
               "Middle_Cortex": "#4292C6",
               "Late_Cortex": "#08306B",
               "Early_Cuticle": "#E6C2DC",
               "Middle_Cuticle": "#C06CAB",
               "Late_Cuticle": "#89288F",
               "Early_IRS_I": "#D8A767",
               "Early_IRS_II": "#F47D2B",
               "IRS_Henle": "#F37B7D",
               "IRS_Huxley": "#7E1416",
               "IRS_Cuticle": "#D24B27"}
#Colors for Broad Annotation
broad_colors = {"COL17": "#89C75F",
               "LPC": "#208A42",
               "Cortex": "#4292C6",
               "Cuticle": "#89288F",
               "IRS": "#D24B27"}

#--------Plotting UMAPs------#
#Fine Annotation
sc.pl.umap(Matrix, color="MatrixAnnotationFine", palette = fine_colors,
    frameon=False, size = 15, legend_loc="on data")
#Broad Annotation
sc.pl.umap(Matrix, color="MatrixAnnotationBroad", palette = broad_colors,
           legend_loc="on data", frameon = False, size = 15)

In [None]:
#-----Crossmapping var_names to EnsemblID--------#
symbol_to_ensembl = dict(zip(HHA.var["names"], HHA.var["ensembl"]))
def map_to_ensembl(name):
    if name.startswith("ENSG"):
        return name
    return symbol_to_ensembl.get(name, "Unknown")
Matrix.var["ensembl"] = Matrix.var["symbol"].apply(map_to_ensembl)

#-------Creating EnsemblID column and set as index--------#
Matrix = Matrix[:, Matrix.var["ensembl"] != "Unknown"].copy()
new_index = Matrix.var["ensembl"]
Matrix.var.index = new_index
Matrix.var_names = new_index
Matrix.var

In [None]:
#-----Loading in Mask for QC-------#
#Contains whether cells were filtered during QC selection during Matrix subclustering or are in the matrix. 
QC_Metadata = pd.read_csv("/projects/b1217/HHA/Multiome_Seurat/HHA_Full_Matrix_Mask_5_30_25.csv")
QC_Metadata.head()

In [None]:
#------Filtering for Cells retained after Additional Matrix QC-------#
QC_Mask = QC_Metadata["Barcode"][QC_Metadata["QCFiltered"] == True].astype("str")
HHA = HHA[~HHA.obs_names.isin(QC_Mask)].copy()
HHA

In [None]:
#-------Mapping Matrix Annotations to Filtered HHA--------#
#All non-matrix cells labeled Unknown
HHA.obs.index = HHA.obs.index.astype(str)
Matrix.obs.index = Matrix.obs.index.astype(str)
Matrix_Mapping = Matrix.obs["MatrixAnnotationFine"].to_dict()
HHA.obs["MatrixAnnotation"] = HHA.obs.index.to_series().map(Matrix_Mapping).fillna("Other")
#Adding new Annotation with Matrix + Melanocytes
#Pulling melanocyte indices and creating dictionary
Mel = HHA[HHA.obs["FinalAnnotation"].isin(["Melanocytes I", "Melanocytes II"]),:].copy()
Mel_Mapping = Mel.obs["FinalAnnotation"].to_dict()
#Merging dictionaries 
MelMat_Mapping = Matrix_Mapping | Mel_Mapping
#Adding to Object
HHA.obs["MelMatAnnotation"] = HHA.obs.index.to_series().map(MelMat_Mapping).fillna("Other")

#--------Checking Labels-------#
sc.pl.umap(HHA, color="MelMatAnnotation", frameon=False)

In [None]:
#------Printing Cell Counts for each Annotation------#
for cat, n in HHA.obs["MelMatAnnotation"].value_counts().items():
    print(f"{cat}: {n}")

In [None]:
#-------Converting gene expression matrices to float for TACCO------#
HHA.X = HHA.X.astype(np.float64)
adata.X = adata.X.astype(np.float64)

In [None]:
#------Running TACCO on Full Dataset-------#
tc.tl.annotate(adata, HHA, annotation_key='MelMatAnnotation', result_key='MelMatAnnotation')

In [None]:
#-----Retrieving Best Annotation Mapping for Each Cluster------#
tc.utils.get_maximum_annotation(adata, 'MelMatAnnotation', result_key='MelMatAnnotation')
adata

In [None]:
#-------Plotting Initial TACCO Results-----#
#mapping unknown to transparent
matrix_colors_spatial = {"Lower COL17": "#89C75F",
               "Upper COL17": "#3BBCA8",
               "LPC": "#208A42",
               "Medulla": "#0C727C",
               "Early_Cortex": "#9ECAE1",
               "Middle_Cortex": "#4292C6",
               "Late_Cortex": "#08306B",
               "Early_Cuticle": "#E6C2DC",
               "Middle_Cuticle": "#C06CAB",
               "Late_Cuticle": "#89288F",
               "Early_IRS_I": "#D8A767",
               "Early_IRS_II": "#F47D2B",
               "IRS_Henle": "#F37B7D",
               "IRS_Huxley": "#7E1416",
               "IRS_Cuticle": "#D24B27",
               "Melanocytes I": "#916848", 
               "Melanocytes II": "#722A2D",
               "Other": "#00000000"}

#--------Plotting Full Image-------#
fig, ax = plt.subplots(figsize=(40, 40))
sc.pl.spatial(adata, color=["MelMatAnnotation"], img_key="hires", ax=ax, palette = matrix_colors_spatial)

In [None]:
#-------Second Iteration TACCO Run on spots mapped to Matrix/Melanocytes-------#
#Filtering Down to Matrix/Melanocyte Hits
adata_sub = adata[~adata.obs["MelMatAnnotation"].isin(["Other"])].copy()
print(adata_sub)
#Filtering scRNA for Matrix, Melanocytes 
MelMat = HHA[~HHA.obs["MelMatAnnotation"].isin(["Other"])].copy()
#Remapping Categories to exclude other for TACCO
MelMat.obs["MelMatAnnotationFine"] = pd.Categorical(MelMat.obs["MelMatAnnotation"],
                                                   categories = ['Early_Cortex', 'Early_Cuticle', 'Early_IRS_I', 'Early_IRS_II',
       'IRS_Cuticle', 'IRS_Henle', 'IRS_Huxley', 'LPC', 'Late_Cortex',
       'Late_Cuticle', 'Lower COL17', 'Medulla', 'Melanocytes I',
       'Melanocytes II', 'Middle_Cortex', 'Middle_Cuticle', 'Upper COL17'])
print(MelMat)

In [None]:
#------Rerunning Annotation: Matrix to all Matrix/Melanocyte Bins------#
tc.tl.annotate(adata_sub, Matrix, annotation_key='MatrixAnnotationFine', result_key='MatrixAnnotationFine')

In [None]:
#-----Retrieving Best Annotation Mapping for Each Cluster------#
tc.utils.get_maximum_annotation(adata_sub, 'MatrixAnnotationFine', result_key='MatrixAnnotationFine')

In [None]:
#--------Plotting Matrix Mappings-------#
fig, ax = plt.subplots(figsize=(40, 40))
sc.pl.spatial(adata_sub, color=["MatrixAnnotationFine"], img_key="hires", ax=ax, palette = matrix_colors_spatial)

In [None]:
#---------Mapping COL17 cells along DP border------------#
adata_col17 = adata[adata.obs["MelMatAnnotation"].isin(
    ["Melanocytes I", "Melanocytes II", "Upper COL17", "Lower COL17", "Medulla", "LPC"])].copy()

#--------Plotting Full Image-------#
fig, ax = plt.subplots(figsize=(40, 40))
sc.pl.spatial(adata_col17, color=["MelMatAnnotation"], img_key="hires", ax=ax, palette = matrix_colors_spatial)

In [None]:
#-----------Subsetting for COL17A1 Populations-----------#
COL17 = Matrix[Matrix.obs["MatrixAnnotationFine"].isin(["Upper COL17", "Lower COL17", "Medulla", "LPC"])].copy()
#Removing other categories for TACCO Mapping
COL17.obs["COL17Annotation"] = pd.Categorical(COL17.obs["MatrixAnnotationFine"],
                                                   categories = ["Upper COL17", "Lower COL17", "Medulla", "LPC"])
COL17

In [None]:
#------Printing Cell Counts for each Annotation------#
for cat, n in COL17.obs["COL17Annotation"].value_counts().items():
    print(f"{cat}: {n}")

In [None]:
#------Rerunning Annotation: Matrix to all Matrix/Melanocyte Bins------#
tc.tl.annotate(adata_col17, COL17, annotation_key='COL17Annotation', result_key='COL17Annotation')

In [None]:
#-----Retrieving Best Annotation Mapping for Each Cluster------#
tc.utils.get_maximum_annotation(adata_col17, 'COL17Annotation', result_key='COL17Annotation')

In [None]:
#--------Plotting Full Image-------#
fig, ax = plt.subplots(figsize=(40, 40))
sc.pl.spatial(adata_col17, color=["COL17Annotation"], img_key="hires", ax=ax, palette = matrix_colors_spatial)

In [None]:
#---------------Pulling Scores from Obsm----------------#
Annot_Scores = adata_col17.obsm['COL17Annotation'].copy()
Annot_Names = [x.replace(" ", "_") + "_Score" for x in Annot_Scores.columns]
Annot_Scores.columns = Annot_Names
Annot_Scores
#Adding to obs
for Score in Annot_Scores.columns:
    adata_col17.obs[Score] = Annot_Scores[Score]
adata_col17.obs

In [None]:
#----------------Saving Object------------#
adata_col17.write_h5ad("/projects/b1217/HHA/Bulb_Spatial/HHA_Spatial_EL_S1_TACCO_COL17_Mapping_12_15_25.h5ad")