In [None]:
# Make sure kernel is set to rapids
import time
import pandas as pd
import numpy as np
import scanpy as sc
import anndata
import pickle
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from natsort import natsorted

import cupy as cp
import cupyx as cpx
import cupyx
import cugraph

import cudf
import math

import h5py
import scipy

import dask
import dask_cudf
import rmm

from dask_cuda import initialize, LocalCUDACluster
from dask import delayed, dataframe
from dask.dataframe.utils import make_meta
from dask.distributed import Client, default_client
from dask_cuda.local_cuda_cluster import cuda_visible_devices
import dask.dataframe as dd

from cuml.manifold import TSNE, UMAP
from cuml.linear_model import LinearRegression

from sklearn.decomposition import PCA
from cuml.dask.cluster import KMeans as cu_dask_KMeans
from cuml.dask.manifold import UMAP as cu_dask_UMAP
from cuml.dask.linear_model import LinearRegression as cu_dask_LinearRegression
from cuml.dask.neighbors import NearestNeighbors as cu_dask_NearestNeighbors

#These need to present as .py files in the directory.
import rapids_scanpy_funcs as rsc
import utils as utils

#I'm not actually sure what this does, but I was told to include it by the RAPIDS manual.
import warnings
warnings.filterwarnings('ignore', 'Expected ')
warnings.simplefilter('ignore')

#This lets you oversubscribe memory on the GPU.
rmm.reinitialize(managed_memory=False)
cp.cuda.set_allocator(rmm.rmm_cupy_allocator)

In [None]:
#Read in anndata as a pickle file from previous script.
with open('merged_spatial_filtered', 'rb') as f:
    adata = pickle.load(f)

In [None]:
#Storing raw counts separately.
adata.raw = adata

In [None]:
sc.pp.log1p(adata)

In [None]:
sc.pp.scale(adata)

In [None]:
#Run PCA.
sc.tl.pca(adata, n_comps=100)

In [None]:
#Find neighbours. Make sure method is set to rapids or it won't run on the GPU.
sc.pp.neighbors(adata, use_rep='X_pca', n_neighbors=20, n_pcs=20, method='rapids')

In [None]:
#Leiden clustering. Have to add to keys by hand because key_added argument is not available. Note that this is a function from rapids_scanpy_funcs.py NOT from scanpy.
adata.obs["leiden_res0.1"] = rsc.leiden(adata, resolution=0.1)
adata.obs["leiden_res0.2"] = rsc.leiden(adata, resolution=0.2)
adata.obs["leiden_res0.3"] = rsc.leiden(adata, resolution=0.3)
adata.obs["leiden_res0.4"] = rsc.leiden(adata, resolution=0.4)
adata.obs["leiden_res0.5"] = rsc.leiden(adata, resolution=0.5)
adata.obs["leiden_res0.7"] = rsc.leiden(adata, resolution=0.7)
adata.obs["leiden_res0.75"] = rsc.leiden(adata, resolution=0.75)
adata.obs["leiden_res0.8"] = rsc.leiden(adata, resolution=0.8)
adata.obs["leiden_res0.9"] = rsc.leiden(adata, resolution=0.9)
adata.obs["leiden_res1.0"] = rsc.leiden(adata, resolution=1.0)
adata.obs["leiden_res1.5"] = rsc.leiden(adata, resolution=1.5)
adata.obs["leiden_res2.0"] = rsc.leiden(adata, resolution=2.0)

In [None]:
#Run UMAP. Make sure method is set to rapids or it won't run on the GPU.
sc.tl.umap(adata, random_state=42, method='rapids')
sc.pl.umap(adata, color='leiden_res0.7')
#sc.pl.umap(adata, color='leiden_res0.4')

In [None]:
# Cast to string then to cat
adata.obs["leiden_res0.7"] = adata.obs["leiden_res0.7"].astype('str')
adata.obs["leiden_res0.7"] = adata.obs["leiden_res0.7"].astype('category')

In [None]:
# Subclusternig for all applicable tmp-objects

# Sub-cluster - PASS1
# Endothelial: clustered_obj_endo_pass1_NC100_NN20_PC20_2024_06_10.rds
# 1, 3 from 0.3
#brl.leiden(adata, key_added="leiden_res0.3_c1_0.3", resolution=0.3, restrict_to = ("leiden_res0.3", ["1"]))
#brl.leiden(adata, key_added="subcluster", resolution=0.2, restrict_to = ("leiden_res0.3_c1_0.3", ["3"]))
# brl.leiden(adata, key_added="leiden_res0.3_c1_0.3", resolution=0.3, restrict_to = ("leiden_res0.3", ["1"]))
# brl.leiden(adata, key_added="leiden_res0.3_c1_0.3_c2_0.2", resolution=0.15, restrict_to = ("leiden_res0.3_c1_0.3", ["2"]))
# brl.leiden(adata, key_added="subcluster2", resolution=0.2, restrict_to = ("leiden_res0.3_c1_0.3_c2_0.2", ["3"]))

# Sub-cluster - PASS2
# Immune: clustered_obj_imm_pass2_NC100_NN20_PC20_2024_06_13.rds
# 11 from res 0.7
# brl.leiden(adata, key_added="subcluster", resolution=0.2, restrict_to = ("leiden_res0.7", ["11"]))
# 7, 11
# brl.leiden(adata, key_added="leiden_res0.7_c11_0.2", resolution=0.2, restrict_to = ("leiden_res0.7", ["11"]))
# brl.leiden(adata, key_added="subcluster2", resolution=0.2, restrict_to = ("leiden_res0.7_c11_0.2", ["7"]))

# Meyloid: clustered_obj_meyloid_NC100_NN20_PC20_2024_06_13.rds
# brl.leiden(adata, key_added="subcluster", resolution=0.2, restrict_to = ("leiden_res0.7", ["6"]))

# Lymphoid: clustered_obj_lymphoid_NC100_NN20_PC20_2024_06_13.rds
# brl.leiden(adata, key_added="subcluster", resolution=0.3, restrict_to = ("leiden_res0.7", ["7"]))

# T cells: clustered_obj_T_NC100_NN20_PC20_2024_06_13.rds
# 1, 5
# brl.leiden(adata, key_added="leiden_res0.7_c1", resolution=0.5, restrict_to = ("leiden_res0.7", ["1"]))
# brl.leiden(adata, key_added="subcluster", resolution=0.3, restrict_to = ("leiden_res0.7_c1", ["5"]))

# Mesenchymal: clustered_obj_mes_pass2_NC100_NN20_PC20_2024_06_13.rds
# 2, 3, 4
# brl.leiden(adata, key_added="leiden_res0.4_c2", resolution=0.4, restrict_to = ("leiden_res0.4", ["2"]))
# brl.leiden(adata, key_added="leiden_res0.4_c2_c3", resolution=0.2, restrict_to = ("leiden_res0.4_c2", ["3"]))
# brl.leiden(adata, key_added="subcluster", resolution=0.3, restrict_to = ("leiden_res0.4_c2_c3", ["4"]))

# Endothelial: clustered_obj_endo_pass2_NC100_NN20_PC20_2024_06_13.rds
# 0, 1
# brl.leiden(adata, key_added="leiden_res0.5_c0", resolution=0.4, restrict_to = ("leiden_res0.5", ["0"]))
# brl.leiden(adata, key_added="subcluster", resolution=0.4, restrict_to = ("leiden_res0.5_c0", ["1"]))

# Macrophahes: clustered_obj_macro_pass3_NC100_NN20_PC20_2024_06_18.rds
# 6
#brl.leiden(adata, key_added="leiden_res0.7_c5", resolution=0.3, restrict_to = ("leiden_res0.7", ["5"]))
brl.leiden(adata, key_added="subcluster", resolution=0.3, restrict_to = ("leiden_res0.7", ["6"]))


In [None]:
sc.pl.umap(adata, color="subcluster")
#sc.pl.umap(adata, color="subcluster2")

In [None]:
sc.pl.umap(adata, color=["PTPRC", "VWF",
                         "EPCAM", # Epithelial
                         "PECAM1" # Endothelial
                    ], use_raw=False)

In [None]:
sc.pl.umap(adata, color=["PTPRC", "CD3E", "CD3D", "TRAC", # Immune
                         "EPCAM", # Epithelial
                         "PECAM1", # Endothelial
                         "TTN", "FHL2", # Cardiomyocyte
                         "DCN", "C7", "FBLN1", "LTBP2", "OGN", "PDGFRA", # Fibroblasts
                         "PDGFRB", "ACTA2", # Pericytes
                         "ADIPOQ" # Adipocytes
                         #"ACTA2", "MYH11"n  # Vascular smooth muscle cells (VSMCs) 
                         #"VWF", "BMX" # Ednocardial cells
                         
                    ], use_raw=False)

In [None]:
sc.pl.umap(adata, color=["GNLY", "NKG7", "KLRB1", #"KLRC1", #NK 
                         "KLRD1", #NK
                         "CD3E", "CD3D", "CD8A", "CD4", "FOXP3", # T
                         "MS4A1", # B
                         "CD79A", # B and plasma
                         "TNFRSF17", "DERL3", # plasma
                         "MKI67", # Proliferating
                         "LILRA4", "CCR7" # pDCs
                        
                    ], use_raw=False)

In [None]:
sc.pl.umap(adata, color=[
    "CD163", "MRC1", "MARCO", "FCGR1A", # Macrophage
    "LYZ", "CD14", "FCGR3A",  "MS4A7", # Monocyte
    "FCER1A", #mo-DCs, cDC1, or pDC
    "CD1A", "CD1C",	"MRC1", "CD209", "ITGAM", # monocyte derived DCs
    "CD8A", "ITGAX", "XCR1", # cDC1
    "MS4A2", # mast
    "MKI67"
                    ], use_raw=False)

In [None]:
adata.X = sc.pp.log1p(adata.raw.X, copy=True)

In [None]:
#Cast int32 variables to float64 so that the object can be converted back to Seurat. Note that int64 may work as well, but I haven't tested it.
adata.obs["transcript_counts"] = adata.obs["transcript_counts"].astype('float64')
adata.obs["control_probe_counts"] = adata.obs["control_probe_counts"].astype('float64')
adata.obs["control_codeword_counts"] = adata.obs["control_codeword_counts"].astype('float64')
adata.obs["control_codeword_counts"] = adata.obs["control_codeword_counts"].astype('float64')
adata.obs["unassigned_codeword_counts"] = adata.obs["unassigned_codeword_counts"].astype('float64')
adata.obs["deprecated_codeword_counts"] = adata.obs["deprecated_codeword_counts"].astype('float64')
adata.obs["nCount_Xenium"] = adata.obs["nCount_Xenium"].astype('float64')
adata.obs["nFeature_Xenium"] = adata.obs["nFeature_Xenium"].astype('float64')
adata.obs["total_counts"] = adata.obs["total_counts"].astype('float64')

In [None]:
#Write anndata to h5 file to import into R for conversion to Seurat object. Next move to commandline R for sceasy.
#adata.write_h5ad(filename="/scratch/aoill/projects/heart_transplant/clustered_obj_imm_mye_subclustered_NC100_NN20_PC20_2024_05_30.h5ad")
#adata.write_h5ad(filename="/scratch/aoill/projects/heart_transplant/clustered_obj_mes2_NC100_NN20_PC20_2024_06_03.h5ad")
adata.write_h5ad(filename="/scratch/aoill/projects/heart_transplant/new/clustered_obj_macro_c5_pass3_NC100_NN20_PC20_2024_06_18.h5ad")

In [None]:
#Metadata CSV
meta_csv_file = "/scratch/aoill/projects/heart_transplant/new/tmp_metadata.csv"
adata.obs.to_csv(meta_csv_file)