In [1]:
import scanpy as sc
import anndata as ad
import pandas as pd

# R code to download data from R using package spatialLIBD

`library("spatialLIBD")`


`spe <- fetch_data(type = "spe") # Download the spot-level data`

`write.csv(as.data.frame(spe@colData), 'Human_DLPFC_cell_meta.csv')`

`writeMM(spe@assays@data$counts, 'Human_DLPFC_cell.mtx')`

`gene_df <- data.frame(row.names=row.names(spe@assays@data$counts), ensID = row.names(spe@assays@data$counts))`

`write.csv(gene_df, 'Human_DLPFC_gene_meta.csv')`

`write.csv(spe@int_colData@listData$spatialData, 'Human_DLPFC_cell_spatialData.csv')`

In [8]:
gene_df = pd.read_csv("Human_DLPFC_gene_meta.csv", index_col=0)
gene_df.iloc[:3, :]

Unnamed: 0,ensID
ENSG00000243485,ENSG00000243485
ENSG00000237613,ENSG00000237613
ENSG00000186092,ENSG00000186092


In [9]:
cell_df = pd.read_csv("Human_DLPFC_cell_meta.csv", index_col=0)
cell_df.iloc[:3, :]

Unnamed: 0,sample_id,Cluster,sum_umi,sum_gene,subject,position,replicate,subject_position,discard,key,...,HVG_PCA_spatial,pseudobulk_PCA_spatial,markers_PCA_spatial,SpatialDE_UMAP_spatial,SpatialDE_pool_UMAP_spatial,HVG_UMAP_spatial,pseudobulk_UMAP_spatial,markers_UMAP_spatial,spatialLIBD,ManualAnnotation
AAACAACGAATAGTTC-1,151507,6,948,727,Br5292,0,1,Br5292_pos0,False,151507_AAACAACGAATAGTTC-1,...,5,4,3,1,1,1,1,1,L1,
AAACAAGTATCTCCCA-1,151507,3,4261,2170,Br5292,0,1,Br5292_pos0,False,151507_AAACAAGTATCTCCCA-1,...,2,3,1,2,2,1,2,1,L3,
AAACAATCTACTAGCA-1,151507,2,1969,1093,Br5292,0,1,Br5292_pos0,False,151507_AAACAATCTACTAGCA-1,...,3,5,7,2,1,4,1,6,L1,


In [10]:
cell_df = cell_df[['sample_id', 'layer_guess_reordered']]
cell_df

Unnamed: 0,sample_id,layer_guess_reordered
AAACAACGAATAGTTC-1,151507,Layer1
AAACAAGTATCTCCCA-1,151507,Layer3
AAACAATCTACTAGCA-1,151507,Layer1
AAACACCAATAACTGC-1,151507,WM
AAACAGCTTTCAGAAG-1,151507,Layer6
...,...,...
TTGTTGTGTGTCAAGA-1.11,151676,Layer6
TTGTTTCACATCCAGG-1.11,151676,WM
TTGTTTCATTAGTCTA-1.11,151676,WM
TTGTTTCCATACAACT-1.11,151676,Layer6


In [11]:
cell_spatial = pd.read_csv("Human_DLPFC_cell_spatialData.csv", index_col=0)
(cell_spatial.index == cell_df.index).all()

True

In [12]:
cell_meta = pd.concat([cell_df, cell_spatial], axis=1)
cell_meta.iloc[:10,:]

Unnamed: 0,sample_id,layer_guess_reordered,in_tissue,array_row,array_col
AAACAACGAATAGTTC-1,151507,Layer1,True,0,16
AAACAAGTATCTCCCA-1,151507,Layer3,True,50,102
AAACAATCTACTAGCA-1,151507,Layer1,True,3,43
AAACACCAATAACTGC-1,151507,WM,True,59,19
AAACAGCTTTCAGAAG-1,151507,Layer6,True,43,9
AAACAGGGTCTATATT-1,151507,Layer6,True,47,13
AAACAGTGTTCCTGGG-1,151507,WM,True,73,43
AAACATTTCCCGGATT-1,151507,Layer5,True,61,97
AAACCACTACACAGAT-1,151507,Layer3,True,3,117
AAACCCGAACGAAATC-1,151507,Layer3,True,45,115


In [18]:
img_coords = pd.read_csv("Human_DLPFC_img_coords.csv", index_col=0)
(img_coords.index == cell_meta.index).all()

False

In [19]:
inds = [i.split(".")[0] for i in cell_meta.index.tolist()]
img_coords.index.tolist() == inds

True

In [20]:
cell_meta.index = inds

In [21]:
cell_meta = pd.concat([cell_meta, img_coords], axis=1)

In [23]:
data = sc.read_mtx("Human_DLPFC_cell.mtx")
data

AnnData object with n_obs × n_vars = 33538 × 47681

In [24]:
data = data.transpose()

In [25]:
data.obs = cell_meta
data.var = gene_df

In [26]:
data.obs

Unnamed: 0,sample_id,layer_guess_reordered,in_tissue,array_row,array_col,pxl_col_in_fullres,pxl_row_in_fullres
AAACAACGAATAGTTC-1,151507,Layer1,True,0,16,2514,3276
AAACAAGTATCTCCCA-1,151507,Layer3,True,50,102,8520,9178
AAACAATCTACTAGCA-1,151507,Layer1,True,3,43,2878,5133
AAACACCAATAACTGC-1,151507,WM,True,59,19,9581,3462
AAACAGCTTTCAGAAG-1,151507,Layer6,True,43,9,7663,2779
...,...,...,...,...,...,...,...
TTGTTGTGTGTCAAGA-1,151676,Layer6,True,31,77,6378,7946
TTGTTTCACATCCAGG-1,151676,WM,True,58,42,9594,5512
TTGTTTCATTAGTCTA-1,151676,WM,True,60,30,9827,4684
TTGTTTCCATACAACT-1,151676,Layer6,True,45,27,8029,4491


In [27]:
sample_ids = list(set(data.obs['sample_id']))
sample_ids

[151671,
 151676,
 151507,
 151508,
 151669,
 151509,
 151510,
 151670,
 151673,
 151674,
 151675,
 151672]

In [30]:
from tqdm.notebook import tqdm
import os

for i, sid in enumerate(tqdm(sample_ids)):
    sample_data = data[data.obs.sample_id == sid,:]
    folder = f'tissue_{sid}'
    if not os.path.exists(folder):
        os.mkdir(folder)
    sample_data.write(f"{folder}/data.h5ad")
    sample_data.obs.to_csv(f"{folder}/cell_meta.csv")

  0%|          | 0/12 [00:00<?, ?it/s]

  df[key] = c


In [29]:
data.obs

Unnamed: 0,sample_id,layer_guess_reordered,in_tissue,array_row,array_col,pxl_col_in_fullres,pxl_row_in_fullres
AAACAACGAATAGTTC-1,151507,Layer1,True,0,16,2514,3276
AAACAAGTATCTCCCA-1,151507,Layer3,True,50,102,8520,9178
AAACAATCTACTAGCA-1,151507,Layer1,True,3,43,2878,5133
AAACACCAATAACTGC-1,151507,WM,True,59,19,9581,3462
AAACAGCTTTCAGAAG-1,151507,Layer6,True,43,9,7663,2779
...,...,...,...,...,...,...,...
TTGTTGTGTGTCAAGA-1,151676,Layer6,True,31,77,6378,7946
TTGTTTCACATCCAGG-1,151676,WM,True,58,42,9594,5512
TTGTTTCATTAGTCTA-1,151676,WM,True,60,30,9827,4684
TTGTTTCCATACAACT-1,151676,Layer6,True,45,27,8029,4491
