In [3]:
import scanpy as sc
import pandas as pd
import infercnvpy as cnv
import numpy as np 

import sys
import os 
sys.path.append(os.path.expanduser(f"~/SSS_mount/insituCNV/InSituCNV"))
import insitucnv as icv

### Functions used 
______

In [None]:
def add_metadata_to_CosMx_samples(adata, SID):
    
    # Add cluster metadata
    clusters = pd.read_csv(f"/home/augusta/SSS_mount/insituCNV/data/WTx-CosMx_TVA/round2/{SID}/cluster_metadata.csv", index_col=0)
    cell2clusters = {idx: val for idx, val in clusters.itertuples()}  # Extract string values directly
    adata.obs["add_sub"] = adata.obs.index.map(cell2clusters)
    print(f'Adding {clusters.columns[0]} as adata.obs["add sub"].')
    
    print(f"Removing {adata.obs.add_sub.isna().sum()} cells that were not assigned a cluster.")
    adata = adata[~adata.obs["add_sub"].isna()]
    
    # Add region-of-interest (ROI) metadata
    ROI = pd.read_csv(f"/home/augusta/SSS_mount/insituCNV/data/WTx-CosMx_TVA/round2/{SID}/{SID}_ROI.csv", index_col=0)
    cell2ROI = dict(zip(ROI.index, ROI['ROI']))
    adata.obs['ROI_status'] = adata.obs.index.map(cell2ROI)
    adata.obs['healthy_ROI'] = np.where(adata.obs['ROI_status'] == 'REF', True, False)
    print('Adding ROI status!')

    # Add feature metadata
    features_all = pd.read_csv(os.path.expanduser("~/SSS_mount/insituCNV/data/WTx-CosMx_TVA/round2/features_all.csv"), index_col=0)
    # features_epi = pd.read_csv(os.path.expanduser("~/SSS_mount/insituCNV/data/WTx-CosMx_TVA/round2/features_epi.csv"), index_col=0)
    adata.var['features_all'] = adata.var_names.isin(features_all['features_all'].values)
    # adata.var['features_epi'] = adata.var_names.isin(features_epi['features_epi'].values)

    return adata

In [9]:
def process_CosMx_samples(adata):

    # Add spatial information (if present in adata.obs)
    print('Adding spatial information...')
    if "CenterX_global_px" in adata.obs and "CenterY_global_px" in adata.obs:
        adata.obsm["spatial"] = adata.obs[["CenterX_global_px", "CenterY_global_px"]].copy().to_numpy()
    else:
        print('No spatial information found!')
                  
    # Normalization and scaling
    print('Log-normalizing the counts...')
    adata.layers['raw'] = adata.X.copy()  # Copy the raw expression data before normalization
    sc.pp.normalize_total(adata)  # Normalize counts to total count per cell
    sc.pp.log1p(adata)  # Log-transform the data
    adata.layers['lognorm'] = adata.X.copy()
    

    return adata

In [11]:
import infercnvpy as cnv
import scanpy as sc
import numpy as np

def DR_smoothing_CosMx_samples(adata, smooth_neigh = 100):
    
    # Perform PCA using the selected genes for high-grade clustering (the features marked as 'features_all' == True)
    print('Computing PCA and nearest neighbors...')
    sc.pp.pca(adata, mask_var=adata.var['features_all'] == True) 
    sc.pp.neighbors(adata, n_pcs=40)  # Compute nearest neighbors using the first 40 PCs

    # Smooothing
    icv.tl.smooth_data_for_cnv(adata, n_neighbors=smooth_neigh)

    if isinstance(adata.uns["pca"]["params"]["use_highly_variable"], pd.Series):
        adata.uns["pca"]["params"]["use_highly_variable"] = adata.uns["pca"]["params"]["use_highly_variable"].to_dict()
    if isinstance(adata.uns["pca"]["params"]["mask_var"], pd.Series):
        adata.uns["pca"]["params"]["mask_var"] = adata.uns["pca"]["params"]["mask_var"].to_dict()

    return adata

### Processing all CosMx datasets
_________

In [None]:
dataset_paths = {
    '221': os.path.expanduser("~/SSS_mount/insituCNV/data/WTx-CosMx_TVA/round2/221/adata_221_raw.h5ad")
}

SIDs = ['221']

for SID in SIDs:

    adata = sc.read_h5ad(dataset_paths[SID])
    adata = add_metadata_to_CosMx_samples(adata, SID)
    adata = icv.pp.add_genomic_positions(adata)
    adata = process_CosMx_samples(adata)
    adata = DR_smoothing_CosMx_samples(adata, smooth_neigh = 100)
   
    adata.write(os.path.expanduser(f"~/SSS_mount/insituCNV/data/WTx-CosMx_TVA/round2/{sid}/adata_{SID}_processed_1.h5ad"), compression='gzip')
    print(SID, 'saved!')