In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import scipy
import anndata as ad
import matplotlib.pyplot as plt

import os
import warnings

import squidpy as sq

# Suppress all future warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

### Solving directories

In [None]:
print("Previous working directory:", os.getcwd())

if 'notebooks' in os.getcwd():
    os.chdir("..")

target_path = os.path.join("healthy_skin", "raw")

if os.path.exists(target_path) and os.path.isdir(target_path):
    os.chdir(target_path)
    print("Current working directory:", os.getcwd())
else:
    print(f"Error: The directory '{target_path}' does not exist.")


Previous working directory: c:\Users\sevco\Documents\dimplomka\gitRepo\Keratoacanthoma_spatial_transcriptomics\data
Current working directory: c:\Users\sevco\Documents\dimplomka\gitRepo\Keratoacanthoma_spatial_transcriptomics\data\healthy_skin\raw


### Load Data

In [None]:
# 1. Load cell metadata
cells = pd.read_parquet("cells.parquet")
print(cells.head())

# 2. Load transcript coordinates (spot-level)
transcripts = pd.read_parquet("transcripts.parquet")
print(transcripts.head())

# 3. Load gene × cell matrix
adata = sc.read_10x_h5("cell_feature_matrix.h5")
adata.var_names_make_unique()

# Add cell metadata to AnnData
adata.obs = adata.obs.join(cells.set_index("cell_id"))

# 4. Load cell boundaries (if needed for plotting)
cell_boundaries = pd.read_parquet("cell_boundaries.parquet")
# Boundaries are stored as WKT polygons
cell_boundaries['geometry'] = cell_boundaries['polygon'].apply(wkt.loads)

# 5. Load morphology image
morph_img = tifffile.imread("morphology.ome.tif")
print(morph_img.shape)  # usually (channels, height, width) or (height, width, channels)


# QC anf filtering

In [None]:
# 6. QC filtering example
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)

# 7. Normalization & log-transform
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

# 8. Basic clustering (optional)
sc.pp.highly_variable_genes(adata)
adata = adata[:, adata.var.highly_variable]
sc.pp.scale(adata, max_value=10)
sc.tl.pca(adata)
sc.pp.neighbors(adata)
sc.tl.leiden(adata)
sc.tl.umap(adata)

In [None]:
# 9. Save processed AnnData
adata.write("xenium_skin_processed.h5ad")