In [1]:
import os
import argparse
import anndata as ad
import numpy as np
import pandas as pd
import scanpy as sc
from config import Config
from utils import *


def load_ST_file(dataset, highly_genes, k, radius):
    path = "../DLPFC/" + dataset + "/" # data path
    labels_path = path + "metadata.tsv" # the meta of your data

    labels = pd.read_table(labels_path, sep='\t')
    labels = labels["layer_guess_reordered"].copy()
    NA_labels = np.where(labels.isnull())
    labels = labels.drop(labels.index[NA_labels])
    ground = labels.copy()
    ground.replace('WM', '0', inplace=True)
    ground.replace('Layer1', '1', inplace=True)
    ground.replace('Layer2', '2', inplace=True)
    ground.replace('Layer3', '3', inplace=True)
    ground.replace('Layer4', '4', inplace=True)
    ground.replace('Layer5', '5', inplace=True)
    ground.replace('Layer6', '6', inplace=True)

    adata_h5 = sc.read_visium(path, count_file='filtered_feature_bc_matrix.h5', load_images=True) # read the data
    adata_h5.var_names_make_unique()
    obs_names = np.array(adata_h5.obs.index)
    positions = adata_h5.obsm['spatial']

    data = np.delete(adata_h5.X.toarray(), NA_labels, axis=0)
    obs_names = np.delete(obs_names, NA_labels, axis=0)
    positions = np.delete(positions, NA_labels, axis=0)

    adata = ad.AnnData(pd.DataFrame(data, index=obs_names, columns=np.array(adata_h5.var.index), dtype=np.float32))

    adata.var_names_make_unique()
    adata.obs['ground_truth'] = labels
    adata.obs['ground'] = ground
    adata.obsm['spatial'] = positions
    adata.obs['array_row'] = adata_h5.obs['array_row']
    adata.obs['array_col'] = adata_h5.obs['array_col']
    adata.uns['spatial'] = adata_h5.uns['spatial']
    adata.var['gene_ids'] = adata_h5.var['gene_ids']
    adata.var['feature_types'] = adata_h5.var['feature_types']
    adata.var['genome'] = adata_h5.var['genome']
    adata.var_names_make_unique()
    adata = normalize(adata, highly_genes=highly_genes)
    fadj = features_construct_graph(adata.X, k=k)
    sadj, graph_nei, graph_neg = spatial_construct_graph1(adata, radius=radius)

    nfadj = normalize_sparse_matrix(fadj + sp.eye(fadj.shape[0]))
    nsadj = normalize_sparse_matrix(sadj + sp.eye(sadj.shape[0]))
    adata.obsm["fadj"] = nfadj
    adata.obsm["sadj"] = nsadj
    adata.obsm["graph_nei"] = graph_nei.numpy()
    adata.obsm["graph_neg"] = graph_neg.numpy()
    adata.var_names_make_unique()
    return adata


if __name__ == "__main__":
    parse = argparse.ArgumentParser()
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    datasets = ['151507', '151508', '151509', '151510', '151669', '151670',
                '151671', '151672', '151673', '151674', '151675', '151676']
    for i in range(len(datasets)):
        dataset = datasets[i]
        print(dataset)
        if not os.path.exists("../generate_data/"):
            os.mkdir("../generate_data/")
        savepath = "../generate_data/" + dataset + "/"
        config_file = '../ini/DLPFC.ini'
        if not os.path.exists(savepath):
            os.mkdir(savepath)

        config = Config(config_file)
        adata = load_ST_file(dataset, config.fdim, config.k, config.radius)
        print("saving")
        adata.write(savepath + 'DMGCN.h5ad')
        print("done\n")


  from .autonotebook import tqdm as notebook_tqdm


151507


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


start select HVGs
start features construct graph
The graph contains 243063 edges, 4221 cells.
57.5842 neighbors per cell on average.
saving
done

151508


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


start select HVGs
start features construct graph
The graph contains 252209 edges, 4381 cells.
57.5688 neighbors per cell on average.
saving
done

151509


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


start select HVGs
start features construct graph
The graph contains 276920 edges, 4788 cells.
57.8363 neighbors per cell on average.
saving
done

151510


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


start select HVGs
start features construct graph
The graph contains 265275 edges, 4595 cells.
57.7312 neighbors per cell on average.
saving
done

151669


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


start select HVGs
start features construct graph
The graph contains 206026 edges, 3636 cells.
56.6628 neighbors per cell on average.
saving
done

151670


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


start select HVGs
start features construct graph
The graph contains 198488 edges, 3484 cells.
56.9713 neighbors per cell on average.
saving
done

151671


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


start select HVGs
start features construct graph
The graph contains 234933 edges, 4093 cells.
57.3987 neighbors per cell on average.
saving
done

151672


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


start select HVGs
start features construct graph
The graph contains 222830 edges, 3888 cells.
57.3122 neighbors per cell on average.
saving
done

151673


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


start select HVGs
start features construct graph
The graph contains 205255 edges, 3611 cells.
56.8416 neighbors per cell on average.
saving
done

151674


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


start select HVGs
start features construct graph
The graph contains 206205 edges, 3635 cells.
56.7276 neighbors per cell on average.
saving
done

151675


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


start select HVGs
start features construct graph
The graph contains 201112 edges, 3566 cells.
56.3971 neighbors per cell on average.
saving
done

151676


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


start select HVGs
start features construct graph
The graph contains 194279 edges, 3431 cells.
56.6246 neighbors per cell on average.
saving
done

