In [None]:
import sys
import scanpy as sc
import anndata
import pandas as pd
import numpy as np
import os
data_type = 'float32'

import matplotlib as mpl
from matplotlib import rcParams
import matplotlib.pyplot as plt
import seaborn as sns

import anndata2ri
import rpy2.rinterface_lib.callbacks as rcb
import rpy2.robjects as ro

import warnings
import logging

import math
from scipy.sparse import issparse
from scipy.sparse import csr_matrix
from scipy.stats import median_abs_deviation
import numba
from numba.core.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning

sys.setrecursionlimit(100000)
warnings.filterwarnings("ignore", category=DeprecationWarning)

warnings.simplefilter("ignore", category=NumbaDeprecationWarning)
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)
rcb.logger.setLevel(logging.ERROR)
ro.pandas2ri.activate()
anndata2ri.activate()
ro.r('library(scry); library(Biobase); library(NMF); library(scran); library(BiocParallel)')

import json
import pandas as pd
import numpy as np
from PIL import Image
import anndata as ad

clusters = pd.read_csv("C:/Users/woloo/Desktop/cluster_mouse_spatial.csv")
markers = pd.read_csv("C:/Users/woloo/Desktop/markers_mouse_spatial.csv")
clusters.set_index("Unnamed: 0", inplace=True)
main_path = "C:/Users/woloo/Desktop/ST/mouse/raw_h5ad_with_figure/"
location_to_save_csv = "C:/Users/woloo/华为云盘/Inte-Inte-Inte-Inte-Inte-Inte/idCHD/spatial/processed_csv/"

In [None]:
# with .h5 file, and with spatial/ folder
file = list(os.walk(spatial_data_path))[0][1][21]
adata = sc.read_visium(spatial_data_path + file)
adata.write_h5ad(spatial_data_path + "../raw_h5ad_with_figure/"+file+".h5ad")

In [None]:
# with prefix, but without spatial/ folder
file = list(os.walk(spatial_data_path))[0][1][1]
file = "GSM5943199_Csrp3OE_MI_day14"
adata = sc.read_10x_mtx(spatial_data_path, prefix=file+"_")
meta = pd.read_csv(spatial_data_path+file+'_tissue_positions_list.csv', header=None)
meta.columns = ['barcode', 'in_tissue', 'array_row', 'array_col',"x","y"]
meta.set_index('barcode', inplace=True)
meta = meta.loc[adata.obs.index, :]
adata.obs = meta[['in_tissue', 'array_row', 'array_col']]
adata.obsm['spatial'] = meta[['y', 'x']].values

hires_img = tifffile.imread(spatial_data_path+"/"+file+".tif")
hires_img = hires_img.astype(np.float32) / 255.0

In [None]:
# without prefix, but has spatial/ folder
file = list(os.walk(spatial_data_path))[0][1][3]
adata = sc.read_10x_mtx(spatial_data_path+file)
meta = pd.read_csv(spatial_data_path+file+'/spatial/tissue_positions_list.csv', header=None)
meta.columns = ['barcode', 'in_tissue', 'array_row', 'array_col',"x","y"]
meta.set_index('barcode', inplace=True)
meta = meta.loc[adata.obs.index, :]
adata.obs = meta[['in_tissue', 'array_col', 'array_row']]
adata.obsm['spatial'] = meta[['y', 'x']].values

lowres_img = np.array(Image.open(spatial_data_path+file+"/spatial/tissue_lowres_image.png"))
hires_img = tifffile.imread(spatial_data_path+file+"/GSM5355668_WT_MI_day14_1.tif")
lowres_img = lowres_img.astype(np.float32) / 255.0
hires_img = hires_img.astype(np.float32) / 255.0

with open(spatial_data_path+file+"/spatial/scalefactors_json.json", "r") as f:
    scalefactors = json.load(f)

In [None]:
spatial_h5ad_path = ""
location_to_save_csv = ""

In [None]:
# Save data slot
for file in os.listdir(spatial_h5ad_path):
    if file.endswith(".h5ad") and file.startswith('filtered_'):
        i += 1
        adata = sc.read_h5ad(spatial_h5ad_path + file)
        sc.pp.normalize_total(adata, target_sum=1e5)
        sc.pp.log1p(adata)
        adata.to_df().to_csv(location_to_save_csv+file[9:19]+"_data.csv", index=True, header=True)

In [None]:
# Save image
i = 0
for file in os.listdir(spatial_h5ad_path):
    if file.endswith(".h5ad") and file.startswith('filtered_'):
        i += 1
        adata = sc.read_h5ad(spatial_h5ad_path + file)
        if 'hires' in adata.uns['spatial'][list(adata.uns['spatial'].keys())[0]]['images']:
            arr = [i for i in (adata.uns["spatial"].values())][0]["images"]["hires"]
            arr_uint8 = (arr * 255).astype(np.uint8)  # 转为0-255整数
            img = Image.fromarray(arr_uint8)

            # save as WebP
            img.save(location_to_save_csv+file[9:19]+".png", format="PNG", compress_level=3)
            
            print(f"{file} {arr.shape}")
            print(f"Processed and saved hires image for {i} : {file} as PNG.")
            print(f"scalefactors:{adata.uns['spatial'][list(adata.uns['spatial'].keys())[0]]['scalefactors']['fiducial_diameter_fullres']} {adata.uns['spatial'][list(adata.uns['spatial'].keys())[0]]['scalefactors']['spot_diameter_fullres']} {adata.uns['spatial'][list(adata.uns['spatial'].keys())[0]]['scalefactors']['tissue_hires_scalef']}")
        else:
            print(f"No hires image found in {file}, skipping.")

In [None]:
# DE analysis
whole = [i for i in os.listdir("C:/Users/woloo/Desktop/ST/mouse/raw_h5ad_with_figure/") if i.startswith("GSM")]
for file in whole:
    print(file)
    adata = sc.read(main_path+file)
    sc.pp.filter_cells(adata, min_genes=200)
    sc.pp.filter_genes(adata, min_cells=10)
    adata.var_names_make_unique()
    print("____________")
    adata.layers["log1p"] = sc.pp.log1p(adata, copy=True).X
    sc.pp.pca(adata, n_comps=30, use_highly_variable=False)
    sc.pp.neighbors(adata, n_neighbors=10, n_pcs=30)
    sc.tl.umap(adata, min_dist=0.5, spread=1.0)
    adata.obs["leiden"] = clusters.loc[adata.obs_names + "_" + str(whole.index(file)+1), "x"].to_list()
    
    adata_deg = adata[:,deg_50["gene"]]
    adata_append = adata[:,deg_300["gene"]]
    try:
        log1p_deg50 = np.array(adata_deg.layers["log1p"].T.todense().tolist())
        log1p_deg_append = np.array(adata_append.layers["log1p"].T.todense().tolist())
    except:
        log1p_deg50 = np.array(adata_deg.layers["log1p"].T.tolist())
        log1p_deg_append = np.array(adata_append.layers["log1p"].T.tolist())
    
    table = np.concatenate(
        (np.array(adata.obs_names)[:,None],    # cell barcode
        np.round(log1p_deg50.T,decimals=2),    # log1p data
        ), axis=1)
    np.savetxt(location_to_save_csv+file[:10]+"_50.csv", table, delimiter=",", fmt="%s", encoding = "utf-8")
    with open(location_to_save_csv+file[:10]+"_50.csv", "r") as tmp:
        temp = tmp.readlines()
        temp = ["cell,"+",".join([i for i in adata_deg.var_names.to_list()])+"\n"]+temp
    with open(location_to_save_csv+file[:10]+"_50.csv", "w") as tmp:
        tmp.write("".join(temp))

    table = np.concatenate(
        (np.array(adata.obs_names)[:,None],    # cell barcode
        np.round(log1p_deg_append.T,decimals=2),    # log1p data
        ), axis=1)
    np.savetxt(location_to_save_csv+file[:10]+"_app.csv", table, delimiter=",", fmt="%s", encoding = "utf-8")
    with open(location_to_save_csv+file[:10]+"_app.csv", "r") as tmp:
        temp = tmp.readlines()
        temp = ["cell,"+",".join([i for i in adata_append.var_names.to_list()])+"\n"]+temp
    with open(location_to_save_csv+file[:10]+"_app.csv", "w") as tmp:
        tmp.write("".join(temp))
        
    
    basic = pd.DataFrame(np.concatenate(
                (np.array(adata.obs_names)[:,None],    # cell barcode
                np.array(adata.obs["leiden"])[:,None],    # leiden group numbers
                np.array(adata.obsm["spatial"]),    # spatial
                np.multiply(adata.obsm["X_umap"],100).astype(int),    # umap (x,y) => (100x,100y)
                ), axis=1))
    basic.columns = pd.Series(["index","leiden","spatialx","spatialy","umapx","umapy",])
    basic.to_csv(location_to_save_csv+file[:10]+"_id.csv", index = False, lineterminator = "\r\n")
        
        
    hv_info = np.concatenate((
        np.array(deg_300["gene"])[:,None],
        np.array(deg_300["cluster"])[:,None],
        np.array(deg_300["avg_log2FC"])[:,None],
        np.array(deg_300["p_val_adj"])[:,None],
    ), axis=1)
    np.savetxt(location_to_save_csv+file[:10]+"_hv.csv", hv_info, delimiter=",", fmt="%s", encoding = "utf-8")
    with open(location_to_save_csv+file[:10]+"_hv.csv", "r") as tmp:
        temp = tmp.readlines()
        temp = ["gene,cell_type,logFC,pval_adj\n"]+temp
    with open(location_to_save_csv+file[:10]+"_hv.csv", "w") as tmp:
        tmp.write("".join(temp))

In [None]:
# NMF
for file in os.listdir(spatial_h5ad_path)[106:]:
    if file.endswith(".h5ad") and file.startswith("filtered_"):
        adata = sc.read_h5ad(spatial_h5ad_path + file)
        print(file)
        adata.layers["counts"] = adata.X.copy()  # Store raw counts
        adata.layers["log1p"] = adata.X.copy()  
        sc.pp.normalize_total(adata, target_sum=10^4)
        sc.pp.log1p(adata)
        sc.pp.log1p(adata, layer="log1p")
        ro.globalenv["adata"] = adata.X.T
        ro.r('sce = devianceFeatureSelection(adata)')
        binomial_deviance = ro.r("sce").T
        mask = np.zeros(adata.var_names.shape, dtype=bool)
        mask[binomial_deviance.argsort()[-3000:]] = True
        adata.var["highly_variable"] = mask
    
        highly_variable_matrix = adata.layers["log1p"].T[adata.var["highly_variable"]==True].T
        ro.globalenv["highly_variable_matrix"] = highly_variable_matrix
        ro.r('data <- as.matrix(highly_variable_matrix)')
        
        print("\033[33mNMF...\033[0m")
        ro.r('res <- nmf(data, 30, method="snmf/r", seed="nndsvd")')
        module_components = ro.r("t(coef(res))")
        cell_embeddings = ro.r("basis(res)")
        adata.obsm["NMF_embeddings"] = ro.r("basis(res)")
        
        # NME embeddings
        table = np.concatenate(
            (np.array(adata.obs_names)[:,None],    # cell barcode
            np.round(cell_embeddings,decimals=3),    # programs expression level
            ), axis=1)
        np.savetxt(location_to_save_csv+file.split("_")[1]+"_module.csv", table, delimiter=",", fmt="%s", encoding = "utf-8")
        with open(location_to_save_csv+file.split("_")[1]+"_module.csv", "r") as tmp:
            temp = tmp.readlines()
            temp = ["cell,"+",".join(["nmf_"+str(j) for j in range(1,31)])+"\n"]+temp
        with open(location_to_save_csv+file.split("_")[1]+"_module.csv", "w") as tmp:
            tmp.write("".join(temp))
            
        # NMF top genes
        gene_programs = pd.DataFrame(np.array(module_components, dtype=object), index=adata.var_names[adata.var["highly_variable"]])
        gene_programs.to_csv(location_to_save_csv+file.split("_")[1]+"_programs.csv", header=False, index=True)
        with open(location_to_save_csv+file.split("_")[1]+"_programs.csv", "r") as tmp:
            temp = tmp.readlines()
            temp = ["program_id,"+",".join(["gene_"+str(j) for j in range(1,31)])+"\n"]+temp
        with open(location_to_save_csv+file.split("_")[1]+"_programs.csv", "w") as tmp:
            tmp.write("".join(temp))
