In [None]:
import anndata as ad
import cv2
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import requests
import scanpy as sc
import squidpy as sq
from IPython.display import display
from PIL import Image
from scipy import sparse
from urllib.parse import quote

Image.MAX_IMAGE_PIXELS = 699408640

In [None]:
file = 'merfish_not_binned.h5ad'
adata = ad.read_h5ad(file)
adata

In [None]:
adata.obs[['sample', 'replicate']].value_counts(dropna=False)

In [None]:
map_replicate = adata.obs[['sample', 'replicate']].value_counts(dropna=False) \
    .to_frame() \
    .reset_index() \
    .set_index('sample') \
    .to_dict()['replicate']
map_replicate['HTAPP-982-SMP-7629'] = 1
final_replicate_map = {key: key + '_HE_' + str(value) for key, value in map_replicate.items()}
final_replicate_map

In [None]:
adata.obs['hande_mapping'] = adata.obs['sample'].map(final_replicate_map)
adata.obs['hande_mapping'].value_counts()

In [None]:
jpgs = [f for f in os.listdir('./h_and_e_jpgs/')]
jpgs

In [None]:
adata.obs[['sample', 'hande_mapping']].value_counts()

In [None]:
adata.obs['hande_mapping'] = adata.obs['hande_mapping'].astype(str) + '_processed.jpg'

In [None]:
adata.obs['hande_mapping'].value_counts(dropna=False)

In [None]:
for sample in adata.obs['hande_mapping'].unique():
    print(sample)
    adata_subset = adata[adata.obs['hande_mapping'] == sample].copy()
    adata_subset.obsm['spatial'] = adata_subset.obs[['x', 'y']].to_numpy()
    if sample in jpgs:
        print(f"{sample} found in jpgs list")
        img_array = np.asarray(Image.open(f'h_and_e_jpgs/{sample}'))
        adata_subset.uns['image'] = img_array
        adata_subset.write(filename=sample.replace('_processed.jpg', '.h5ad'), compression='gzip')

In [None]:
sc.set_figure_params(figsize=(15, 40))

In [None]:
merfish_spatial = [f for f in os.listdir() if '_HE_' in f]

In [None]:
# Create gene lists based off of Cords et al. that were found in MERFISH
merfish_markers = {
    'mCAF': ['MMP11','CSTB'],
    'iCAF': ['APOC1','CD34','CLU','CTSL','EGFR'],
    'vCAF': ['CD9','FOXC1','ACTA2'],
    'ifnCAF': ['BLVRA'],
    'apCAF': ['HLA-DRA','HLA-DRB1','CD74'],
    'rCAF': ['CALCRL','EIF3E'],
    'dCAF': ['AKT1','MKI67','ANLN','AURKA','BIRC5','CCNB1','CDC20','CENPF','CEP55','CCNB1']
}

In [None]:
# Generate gene list scores using score_genes()
for file in merfish_spatial:
    adata = ad.read_h5ad(file)
    for caf, markers in merfish_markers.items():
        sc.tl.score_genes(adata, markers)
        adata.obs.rename(columns={'score': caf + '_score'}, inplace=True)
    adata.write(filename=file, compression='gzip')

In [None]:
for file in merfish_spatial:
    adata = ad.read_h5ad(file)
    print(file)
    caf_cols = [col for col in adata.obs.columns if 'CAF' in col]
    for col in caf_cols:
        sc.pl.spatial(
            adata[adata.obs['OT'] == 'Fibroblast'],
            color=col,
            img=adata.uns['image'],
            scale_factor=1,
            spot_size=20,
            alpha_img=0.5
        )

In [None]:
# for file in merfish_spatial:
#     adata = ad.read_h5ad(file)
#     sc.pl.spatial(
#         adata[adata.obs['OT'] == 'Fibroblast'],
#         color='OT',
#         img=adata.uns['image'],
#         scale_factor=1,
#         spot_size=20,
#     )