In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
warnings.filterwarnings('ignore')
import os
import sys
from pathlib import Path
from tqdm import tqdm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams.update({
    "pgf.texsystem": "xelatex",
    'font.family': 'serif',
    'text.usetex': False,
    'pgf.rcfonts': False,
    'figure.dpi': 300,
})
import seaborn as sns

In [6]:
# workdir 
BASE_DIR = Path(r'G:\spatial_data')
src_dir = Path(r'G:\spatial_data\reference\XW_STARMAP_mCNS_article')

# analysis dir
RUN_ID = '20250513_STARMAP_mCNS_compare'
analysis_dir = BASE_DIR / 'analysis' / RUN_ID

In [32]:
exp_mtx_ref = pd.read_csv(r'g:\spatial_data\processed\20230705_PRISM3D_mousebrain_HT_confocal\segmented\expression_matrix.csv', index_col=0)
gene_list_ref = [_.capitalize() for _ in exp_mtx_ref.columns]

In [20]:
exp_mtx_raw = pd.read_csv(src_dir / 'well01brainraw_expression_pd.csv', index_col=0)
exp_mtx_raw.head()

Unnamed: 0_level_0,well01brain_0,well01brain_1,well01brain_2,well01brain_3,well01brain_4,well01brain_5,well01brain_6,well01brain_7,well01brain_8,well01brain_9,...,well01brain_29613,well01brain_29614,well01brain_29615,well01brain_29616,well01brain_29617,well01brain_29618,well01brain_29619,well01brain_29620,well01brain_29621,well01brain_29622
GENE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A2M,0,0,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
ABCC9,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ABI3BP,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ACBD7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ACTA2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
mapped_genes_raw = pd.read_csv(src_dir / 'well01brain_spot_meta.csv', index_col=0)
mapped_genes_raw['gene'] = mapped_genes_raw['geneid'].map(lambda x: exp_mtx_raw.index.tolist()[x-1].capitalize())
mapped_genes_raw.head()

Unnamed: 0,spot_merged_1,spot_merged_2,spot_merged_3,geneid,cellid,gene
0,23020,15263,1,580,31934,Neurod1
1,23034,15232,1,823,31934,Sema4d
2,23087,14864,1,68,31872,Bhlhe22
3,23089,14860,1,68,31872,Bhlhe22
4,23094,15906,1,930,31850,Tgfb2


In [28]:
mapped_genes = mapped_genes_raw[mapped_genes_raw['gene'].map(lambda x: x.capitalize()).isin(gene_list_ref)]
overlap_genes = mapped_genes['gene'].unique()
print(len(overlap_genes))
print(overlap_genes)

21
['Cck' 'Slc1a3' 'Slc17a7' 'Lamp5' 'Rgs4' 'Gfap' 'Sst' 'Gad1' 'Pcp4' 'Aqp4'
 'Vip' 'Gad2' 'Mbp' 'Plcxd2' 'Rorb' 'Prox1' 'Apod' 'Pvalb' 'Rprm' 'Nr4a2'
 'Pmch']


In [29]:
mapped_genes['Y'] = mapped_genes['spot_merged_2']
mapped_genes['X'] = mapped_genes['spot_merged_1']

In [31]:
from tifffile import imwrite

density_dir = Path(analysis_dir / 'density')
density_dir.mkdir(exist_ok=True)

im_shape = int(mapped_genes['Y'].max())+2, int(mapped_genes['X'].max())+2
print(im_shape)

def plot_density_downsample(df, fac=100):
    y = (im_shape[0] // fac) + 1
    x = (im_shape[1] // fac) + 1
    coordinates = df[['Y','X']].to_numpy()
    canvas = np.zeros((y*fac, x*fac),dtype=np.uint16)
    canvas[coordinates[:,0], coordinates[:,1]] = 1
    canvas_down = canvas.reshape(y,fac,x,fac).sum(-1).sum(1)
    return canvas_down

for gene in tqdm(mapped_genes['gene'].unique()):
    # if glob(str(density_dir / f'{gene}.tif')): continue
    # else: 
    image = plot_density_downsample(mapped_genes[mapped_genes['gene']==gene][['Y', 'X']].astype(int))
    imwrite(density_dir/f'{gene}.tif', image.astype(np.uint16))

(16279, 26497)


100%|██████████| 21/21 [00:13<00:00,  1.61it/s]


In [42]:
from tifffile import imwrite

density_dir = Path(analysis_dir / 'density')
density_dir.mkdir(exist_ok=True)
processed = os.listdir(density_dir)

for file in os.listdir(src_dir):
    # extract feature name
    if file.endswith('_spatial.csv'):
        feature = '_'.join(file.split('_')[:-1])
    else: continue
    if feature in processed: continue
    # reading files
    print(feature)
    print('reading spots...')
    mapped_genes_raw = pd.read_csv(src_dir / f'{feature}_spot_meta.csv', index_col=0)
    mapped_genes_raw['gene'] = mapped_genes_raw['geneid'].map(lambda x: exp_mtx_raw.index.tolist()[x-1].capitalize())
    # overlap genes
    print('preprossing...')
    mapped_genes = mapped_genes_raw[mapped_genes_raw['gene'].map(lambda x: x.capitalize()).isin(gene_list_ref)]
    overlap_genes = mapped_genes['gene'].unique()
    mapped_genes['Y'] = mapped_genes['spot_merged_2']
    mapped_genes['X'] = mapped_genes['spot_merged_1']
    # density
    density_dir_tmp = Path(analysis_dir / 'density' / feature)
    density_dir_tmp.mkdir(exist_ok=True)
    im_shape = int(mapped_genes['Y'].max())+2, int(mapped_genes['X'].max())+2
    def plot_density_downsample(df, fac=50):
        y = (im_shape[0] // fac) + 1
        x = (im_shape[1] // fac) + 1
        coordinates = df[['Y','X']].to_numpy()
        canvas = np.zeros((y*fac, x*fac),dtype=np.uint16)
        canvas[coordinates[:,0], coordinates[:,1]] = 1
        canvas_down = canvas.reshape(y,fac,x,fac).sum(-1).sum(1)
        return canvas_down
    for gene in tqdm(mapped_genes['gene'].unique(), desc=f'{feature}'):
        image = plot_density_downsample(mapped_genes[mapped_genes['gene']==gene][['Y', 'X']].astype(int))
        imwrite(density_dir_tmp/f'{gene}.tif', image.astype(np.uint16))

well1_5
reading spots...
preprossing...


well1_5: 100%|██████████| 21/21 [00:04<00:00,  4.73it/s]


well2_5
reading spots...
preprossing...


well2_5: 100%|██████████| 21/21 [00:13<00:00,  1.61it/s]


well3_5
reading spots...
preprossing...


well3_5: 100%|██████████| 21/21 [00:18<00:00,  1.14it/s]


well7_5
reading spots...
preprossing...


well7_5: 100%|██████████| 21/21 [00:18<00:00,  1.14it/s]
