In [1]:
import scanpy as sc
import spatialdata as sd
import anndata as ad
import pandas as pd
import numpy as np
import os
import scanorama
import re

In [None]:
sc._settings.ScanpyConfig.n_jobs = -1

In [None]:
# Inspecting scanorama outputs
adata = ad.read_h5ad('/Users/lukashat/Documents/PhD_Schapiro/Projects/Myeloma_Standal/QC/standard/cells_scanorama.h5ad')

In [3]:
adata


AnnData object with n_obs × n_vars = 1083943 × 42
    obs: 'Image', 'area', 'centroid-0', 'centroid-1', 'axis_major_length', 'axis_minor_length', 'eccentricity', 'image', 'image_width_px', 'image_height_px', 'image_num_channels', 'image_source_file', 'image_recovery_file', 'image_recovered', 'image_acquisition_id', 'image_acquisition_description', 'image_acquisition_start_x_um', 'image_acquisition_start_y_um', 'image_acquisition_end_x_um', 'image_acquisition_end_y_um', 'image_acquisition_width_um', 'image_acquisition_height_um', 'patient_id'
    obsm: 'X_pca', 'X_scanorama', 'X_umap'

In [4]:
adata.obs['patient_id'] = adata.obs['image'].str.split('_').str[1]
adata.obs['Object'] = [re.search(r'\b\d+\b', name).group() for name in adata.obs_names]
obs_to_remove = ['image_num_channels', 'image_recovery_file', 'image_source_file', 'image_recovered','image_acquisition_id','image_acquisition_start_x_um','image_acquisition_start_y_um','image_acquisition_end_x_um', 'image_acquisition_end_y_um', 'image_acquisition_width_um', 'image_acquisition_height_um', 'Image', 'image_width_px', 'image_height_px', 'image_acquisition_description']
adata.obs = adata.obs.drop(columns=obs_to_remove)

In [5]:
adata.obs

Unnamed: 0,area,centroid-0,centroid-1,axis_major_length,axis_minor_length,eccentricity,image,patient_id,Object
Object 1 in TS-373_IMC01_UB_001.tiff,22,1.363636,463.500000,6.466698,4.271760,0.750757,TS-373_IMC01_UB_001.tiff,IMC01,1
Object 2 in TS-373_IMC01_UB_001.tiff,21,1.285714,469.714286,6.861462,3.978932,0.814691,TS-373_IMC01_UB_001.tiff,IMC01,2
Object 3 in TS-373_IMC01_UB_001.tiff,21,1.285714,508.428571,6.493536,4.102096,0.775197,TS-373_IMC01_UB_001.tiff,IMC01,3
Object 4 in TS-373_IMC01_UB_001.tiff,23,1.304348,588.347826,6.963455,4.284087,0.788352,TS-373_IMC01_UB_001.tiff,IMC01,4
Object 5 in TS-373_IMC01_UB_001.tiff,15,1.400000,322.600000,4.618802,4.026578,0.489898,TS-373_IMC01_UB_001.tiff,IMC01,5
...,...,...,...,...,...,...,...,...,...
Object 8756 in TS-373_IMC96_B_002.tiff,17,997.941176,477.176471,6.502679,3.427793,0.849781,TS-373_IMC96_B_002.tiff,IMC96,8756
Object 8757 in TS-373_IMC96_B_002.tiff,35,997.600000,510.857143,10.305207,4.296399,0.908945,TS-373_IMC96_B_002.tiff,IMC96,8757
Object 8758 in TS-373_IMC96_B_002.tiff,5,998.400000,531.200000,3.098387,1.788854,0.816497,TS-373_IMC96_B_002.tiff,IMC96,8758
Object 8759 in TS-373_IMC96_B_002.tiff,19,997.684211,572.157895,5.845028,4.110209,0.710995,TS-373_IMC96_B_002.tiff,IMC96,8759


In [None]:
# if we want to read the corrected expression values in csvs

output_dir = "/Users/lukashat/Documents/PhD_Schapiro/Projects/Myeloma_Standal/QC/standard/batch_corrected_scanorama"


unique_images = adata.obs['image'].unique()


for image in unique_images:
    subset_adata = adata[adata.obs['image'] == image]
    
    # Convert AnnData to DataFrame
    subset_df = subset_adata.to_df()

    # Add obs data to the DataFrame, excluding 'patient_id' and 'image'
    obs_cols = [col for col in subset_adata.obs.columns if col not in ['patient_id', 'image']]
    subset_df = pd.concat([subset_adata.obs[obs_cols], subset_df], axis=1)

    # Create the desired column order
    desired_cols = ['Object'] + list(subset_adata.var_names) + [col for col in obs_cols if col != 'Object']
    subset_df = subset_df[desired_cols]

    csv_filename = f"{image.split('.tiff')[0]}.csv"
    csv_path = os.path.join(output_dir, csv_filename)
    desired_order = ['Object', ' 1', ' 2', ' 3', ' 4', ' 5', 'CD38', 'Perilipin', 'Vimentin', 'B4GALT1', 'MPO', 'CathepsinK', 'ATP5A', 'RUNX2', 'HIF1A', 'CD11b', 'CD45', 'CS', 'CD11c', 'CD36', 'CD4', 'CD34', 'CD68', 'IL32', 'IDO', 'CD8', 'GranzymeK', 'PKM2', 'IRF4', 'GLUT1', 'GranzymeB', 'Ki67', 'CollagenTypeI', 'CD3', 'HistoneH3', 'CPT1A', 'CD98', 'HLA-DR', 'ST6GAL1', 'CD138', '191Ir', '193Ir', ' 6', 'area', 'centroid-0', 'centroid-1', 'axis_major_length', 'axis_minor_length', 'eccentricity'] #'distance_to_bone']

    subset_df = subset_df[desired_order]
    subset_df.to_csv(csv_path, index=False)