In [None]:
import os
import sys
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
adata_path = '/path/to/data/soupx_combined.h5ad'
adata = sc.read_h5ad(adata_path)

csv_path = '/path/to/data/solo_output/solo_predictions.csv'
df = pd.read_csv(csv_path, index_col=0)

output_dir = os.path.join('/path/to/data/h5ad')
os.makedirs(output_dir, exist_ok=True)

In [None]:
metadata = {
    '1':  {'group': 'Sham-GFP', 'group_id': 'A', 'condition': 'Sham', 'treatment': 'GFP',   'side': 'Ipsilateral'},
    '3':  {'group': 'Sham-VEGFC', 'group_id': 'B', 'condition': 'Sham', 'treatment': 'VEGFC', 'side': 'Ipsilateral'},
    '5':  {'group': 'TBI-GFP', 'group_id': 'C', 'condition': 'TBI',  'treatment': 'GFP',   'side': 'Ipsilateral'},
    '6':  {'group': 'TBI-GFP', 'group_id': 'D', 'condition': 'TBI',  'treatment': 'GFP',   'side': 'Contralateral'},
    '7':  {'group': 'TBI-VEGFC', 'group_id': 'E', 'condition': 'TBI',  'treatment': 'VEGFC', 'side': 'Ipsilateral'},
    '8':  {'group': 'TBI-VEGFC', 'group_id': 'F', 'condition': 'TBI',  'treatment': 'VEGFC', 'side': 'Contralateral'},
}

for key in ['group', 'condition', 'treatment', 'side']:
    adata.obs[key] = adata.obs['sample_id'].map({k: v[key] for k, v in metadata.items()})

In [None]:
# Add SOLO results to Anndata

## Formatting
adata.obs['sample_id'] = adata.obs['sample_id'].astype(str)
adata.obs_names = adata.obs_names + '-' + adata.obs['sample_id']

## Load predictions
csv_path = '/path/to/data/solo_output/solo_predictions.csv'
df = pd.read_csv(csv_path, index_col=0)

df['sample_id'] = df['sample_id'].astype(str)
df.index = df.index + '-' + df['sample_id']

## Align
adata.obs['solo_prediction'] = df.loc[adata.obs_names, 'prediction'].values
# adata = adata[adata.obs['solo_prediction'] == 'singlet'].copy()
adata.obs.solo_prediction.value_counts()

In [None]:
ptprc_expr = adata[:, 'Ptprc'].X.toarray().flatten()
adata_cd45 = adata[ptprc_expr > 0].copy()

In [None]:
# Export
adata.write_h5ad(os.path.join(output_dir, '01_adata_all.h5ad'), compression='gzip')
adata_cd45.write_h5ad(os.path.join(output_dir, '01_adata_cd45.h5ad'), compression='gzip')