In [1]:
import stlearn as st
from pathlib import Path
import scanpy as sc
st.settings.set_figure_params(dpi=300)
import matplotlib.pyplot as plt 
import pandas as pd
import anndata

In [2]:
from anndata import AnnData
from typing import Optional
def merge(
        adata1: AnnData,
        adata2: AnnData,
        copy: bool = True,

) -> Optional[AnnData]:
    merged_df = adata1.to_df().append(adata2.to_df())
    merged_df_obs = adata1.obs.append(adata2.obs)
    merged_adata = AnnData(merged_df, obs=merged_df_obs)
    merged_adata.uns["spatial"] = adata1.uns["spatial"]
    
    return merged_adata if copy else None

In [3]:
BASE_PATH = Path("/scratch/90days/uqxtan9")
TILE_PATH = Path("/tmp/tiles")
TILE_PATH.mkdir(parents=True, exist_ok=True)

In [4]:
OUT_PATH = Path("/scratch/90days/uqxtan9/stlearn_revision2/imputation_bc")
OUT_PATH.mkdir(parents=True, exist_ok=True)

In [28]:
SAMPLE = "block1"
Sample1 = st.Read10X(BASE_PATH / "breast_cancer_10x", 
                     library_id=SAMPLE,
                     quality="fulres",)
img = plt.imread(BASE_PATH / "breast_cancer_10x" /"V1_Breast_Cancer_Block_A_Section_1_image.tif", 0)
Sample1.uns["spatial"][SAMPLE]['images']["fulres"] = img

Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


In [29]:
SAMPLE = "block2"
Sample2 = st.Read10X(BASE_PATH / "breast_cancer_2_10x", 
                     library_id=SAMPLE,
                     quality="fulres",)
img = plt.imread(BASE_PATH / "breast_cancer_2_10x" /"V1_Breast_Cancer_Block_A_Section_2_image.tif", 0)
Sample2.uns["spatial"][SAMPLE]['images']["fulres"] = img

Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.


In [30]:
for andata in [
    Sample1, 
    Sample2
]:
    st.pp.filter_genes(andata,min_cells=3)
    st.pp.normalize_total(andata)
    st.pp.log1p(andata)
#     st.pp.scale(andata)
    st.em.run_pca(andata,n_comps=50)
#     TILE_PATH_ = TILE_PATH / list(Sample1.uns["spatial"].keys())[0]
#     st.pp.tiling(andata, TILE_PATH_, crop_size=299)
#     st.pp.extract_feature(andata)
#     st.spatial.SME.SME_normalize(andata, use_data="raw")

Normalization step is finished in adata.X
Log transformation step is finished in adata.X
PCA is done! Generated in adata.obsm['X_pca'], adata.uns['pca'] and adata.varm['PCs']
Normalization step is finished in adata.X
Log transformation step is finished in adata.X
PCA is done! Generated in adata.obsm['X_pca'], adata.uns['pca'] and adata.varm['PCs']


In [23]:
Sample1_SME = Sample1.copy()
Sample1_SME.X = Sample1_SME.obsm['raw_SME_normalized']
st.pp.scale(Sample1_SME)
st.em.run_pca(Sample1_SME,n_comps=50)

Scale step is finished in adata.X
PCA is done! Generated in adata.obsm['X_pca'], adata.uns['pca'] and adata.varm['PCs']


In [None]:
Sample2_SME = Sample2.copy()
Sample2_SME.X = Sample2_SME.obsm['raw_SME_normalized']
st.pp.scale(Sample2_SME)
st.em.run_pca(Sample2_SME,n_comps=50)

In [1]:
# # louvain clustering on stSME normalised data
# st.pp.neighbors(Sample1_SME,n_neighbors=35,use_rep='X_pca')
# st.tl.clustering.louvain(Sample1_SME, resolution=1.2)
# st.pl.cluster_plot(Sample1_SME,use_label="louvain")

In [18]:
df = pd.read_csv("./bc_clusters.csv", index_col=0)

In [19]:
df

Unnamed: 0,in_tissue,array_row,array_col,sum_counts,imagecol,imagerow,tile_path,louvain
AAACAAGTATCTCCCA-1,1,50,102,12072.0,1437.953781,1314.933980,/tmp/tiles/Sampel1/1437.9537810000002-1314.933...,0
AAACACCAATAACTGC-1,1,59,19,54083.0,502.640259,1489.603946,/tmp/tiles/Sampel1/502.640259-1489.6039455-40....,1
AAACAGAGCGACTCCT-1,1,14,94,4237.0,1349.092396,609.158410,/tmp/tiles/Sampel1/1349.0923957500002-609.1584...,4
AAACAGGGTCTATATT-1,1,47,13,22649.0,435.478543,1254.290416,/tmp/tiles/Sampel1/435.4785435-1254.2904165-40...,0
AAACAGTGTTCCTGGG-1,1,73,43,51367.0,772.524745,1764.521435,/tmp/tiles/Sampel1/772.5247447500001-1764.5214...,1
...,...,...,...,...,...,...,...,...
TTGTTGTGTGTCAAGA-1,1,31,77,19123.0,1157.013190,941.996690,/tmp/tiles/Sampel1/1157.01318975-941.99669025-...,9
TTGTTTCACATCCAGG-1,1,58,42,20750.0,761.798672,1470.462032,/tmp/tiles/Sampel1/761.7986722500001-1470.4620...,1
TTGTTTCATTAGTCTA-1,1,60,30,33065.0,626.485142,1509.405926,/tmp/tiles/Sampel1/626.4851422500001-1509.4059...,1
TTGTTTCCATACAACT-1,1,45,27,18630.0,593.234318,1215.346522,/tmp/tiles/Sampel1/593.2343175000001-1215.3465...,0


In [21]:
Sample1.obs["louvain"] = df["louvain"]

In [2]:
# st.pl.cluster_plot(Sample1_SME,use_label="louvain_", spot_size=6.5,
#                    output=str(OUT_PATH), name="Sample1_clustering.pdf")

In [17]:
Sample1_ps = st.spatial.SME.pseudo_spot(Sample1,crop_size=299, 
                                        tile_path=TILE_PATH / list(Sample1.uns["spatial"].keys())[0] / "p_spot")
st.em.run_pca(Sample1_ps,n_comps=50)

Tiling image: 100%|██████████ [ time left: 00:00 ]
Extract feature: 100%|██████████ [ time left: 00:00 ]  


The morphology feature is added to adata.obsm['X_morphology']!


Adjusting data: 100%|██████████ [ time left: 00:00 ]
  if not is_categorical(df_full[k]):


Done


In [None]:
Sample2_ps = st.spatial.SME.pseudo_spot(Sample2,crop_size=299, 
                                        tile_path=TILE_PATH / list(Sample1.uns["spatial"].keys())[0] / "p_spot")
st.em.run_pca(Sample2_ps,n_comps=50)

In [3]:
# # K-means clustering on stSME normalised PCA
# st.tl.clustering.kmeans(Sample1_ps,n_clusters=11, use_data="X_pca", 
#                         key_added="X_pca_kmeans")
# st.pl.cluster_plot(Sample1_ps, use_label="X_pca_kmeans", spot_size=3)

In [4]:
# st.pp.neighbors(Sample1_ps,n_neighbors=30,use_rep='X_pca')
# st.tl.clustering.louvain(Sample1_ps, resolution=0.5)
# st.pl.cluster_plot(Sample1_ps,use_label="louvain", spot_size=1.5,
#                    output=str(OUT_PATH), name="ps_impute_clustering_zoom.pdf")

In [20]:
all_adata = merge(Sample1_ps, Sample1)

In [21]:
st.em.run_pca(all_adata,n_comps=50)

PCA is done! Generated in adata.obsm['X_pca'], adata.uns['pca'] and adata.varm['PCs']


In [5]:
# # K-means clustering on stSME normalised PCA
# st.tl.clustering.kmeans(all_adata,n_clusters=11, use_data="X_pca", 
#                         key_added="X_pca_kmeans")
# st.pl.cluster_plot(all_adata, use_label="X_pca_kmeans", spot_size=3)

In [6]:
# st.pp.neighbors(all_adata,n_neighbors=35,use_rep='X_pca')
# st.tl.clustering.louvain(all_adata, resolution=0.6)
# st.pl.cluster_plot(all_adata,use_label="louvain", spot_size=1.5,
#                    output=str(OUT_PATH), name="ps_all_clustering_zoom.pdf")

In [24]:
st.pp.extract_feature(all_adata)

Extract feature: 100%|██████████ [ time left: 00:00 ]  


The morphology feature is added to adata.obsm['X_morphology']!


In [7]:
#     st.tl.clustering.kmeans(all_adata,n_clusters=11, use_data="X_morphology", 
#                         key_added="X_tile_feature_kmeans")
#     st.pl.cluster_plot(all_adata, use_label="X_tile_feature_kmeans", spot_size=3)

In [8]:
# # for i in range(2,11):
# #     print(i)
#     # K-means clustering on stSME normalised PCA
# st.tl.clustering.kmeans(all_adata,n_clusters=11, use_data="X_morphology", 
#                         key_added="X_tile_feature_kmeans")
# st.pl.cluster_plot(all_adata, use_label="X_tile_feature_kmeans", spot_size=3)

In [9]:
# # for i in range(10,35,5):
# #     print(i)
# # st.pp.neighbors(all_adata,n_neighbors=35,use_rep='X_morphology')
# # st.tl.clustering.louvain(all_adata, resolution=0.7)
# st.pl.cluster_plot(all_adata,use_label="louvain", spot_size=3, output=str(OUT_PATH), name="ps_image_clustering.pdf")

In [10]:
# st.pp.neighbors(Sample1,n_neighbors=15,use_rep='X_morphology')
# st.tl.clustering.louvain(Sample1, resolution=0.8)
# st.pl.cluster_plot(Sample1,use_label="louvain", spot_size=6.5, 
#                    output=str(OUT_PATH), name="Sample1_image_clustering.pdf")

In [26]:
all_adata

AnnData object with n_obs × n_vars = 11157 × 22240
    obs: 'level_0', 'index', 'array_row', 'array_col', 'imagerow', 'imagecol', 'n_neighbour', 'tile_path', 'X_pca_kmeans', 'in_tissue', 'sum_counts', 'louvain', 'X_tile_feature_kmeans'
    uns: 'spatial', 'pca', 'X_pca_kmeans_colors', 'neighbors', 'louvain', 'louvain_colors', 'X_tile_feature_kmeans_colors'
    obsm: 'X_pca', 'X_tile_feature', 'X_morphology'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'

In [35]:
for i in range(10,35,5):
    print(i)

10
15
20
25
30


In [43]:
all_adata.write_h5ad(OUT_PATH/"all_adata.h5ad")

In [47]:
# del Sample1_SME.obsm["filtered_counts"]
# del Sample1_SME.obsm["normalized_total"]
Sample1_SME.write_h5ad(OUT_PATH/"Sample1_SME.h5ad")

In [49]:
# del Sample1_ps.obsm["filtered_counts"]
# del Sample1_ps.obsm["normalized_total"]
Sample1_ps.write_h5ad(OUT_PATH/"Sample1_ps.h5ad")

... storing 'index' as categorical


In [66]:
Sample1_imputation = Sample1.copy()
Sample1_imputation.X = Sample1_imputation.obsm["imputed_data"]
# del Sample1_imputation.obsm["filtered_counts"]
# del Sample1_imputation.obsm["normalized_total"]
Sample1_imputation.write_h5ad(OUT_PATH/"Sample1_imputation.h5ad")

  if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'feature_types' as categorical
... storing 'genome' as categorical


In [108]:
Sample2_imputation = Sample2.copy()
Sample2_imputation.X = Sample2_imputation.obsm["imputed_data"]
# del Sample2_imputation.obsm["filtered_counts"]
# del Sample2_imputation.obsm["normalized_total"]
Sample2_imputation.write_h5ad(OUT_PATH/"Sample2_imputation.h5ad")

  if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'feature_types' as categorical
... storing 'genome' as categorical


In [34]:
Sample1_SME = anndata.read_h5ad(OUT_PATH/"Sample1_SME.h5ad")
Sample1_imputation = anndata.read_h5ad(OUT_PATH/"Sample1_imputation.h5ad")
Sample1_ps = anndata.read_h5ad(OUT_PATH/"Sample1_ps.h5ad")

# Sample2_SME = anndata.read_h5ad(OUT_PATH/"Sample2_SME.h5ad")
Sample2_imputation = anndata.read_h5ad(OUT_PATH/"Sample2_imputation.h5ad")
# Sample2_ps = anndata.read_h5ad(OUT_PATH/"Sample2_ps.h5ad")

all_adata = anndata.read_h5ad(OUT_PATH/"all_adata.h5ad")

In [12]:
import scanpy as sc

In [13]:
sc.tl.rank_genes_groups(Sample1_SME, "louvain", method="wilcoxon")

In [14]:
Sample1_SME.obs["louvain"]

AAACAAGTATCTCCCA-1    0
AAACACCAATAACTGC-1    1
AAACAGAGCGACTCCT-1    4
AAACAGGGTCTATATT-1    0
AAACAGTGTTCCTGGG-1    1
                     ..
TTGTTGTGTGTCAAGA-1    9
TTGTTTCACATCCAGG-1    1
TTGTTTCATTAGTCTA-1    1
TTGTTTCCATACAACT-1    0
TTGTTTGTGTAAATTC-1    5
Name: louvain, Length: 3798, dtype: category
Categories (11, object): ['0', '1', '2', '3', ..., '7', '8', '9', '10']

In [15]:
Sample1_SME.obs["louvain"].cat.codes

AAACAAGTATCTCCCA-1    0
AAACACCAATAACTGC-1    1
AAACAGAGCGACTCCT-1    4
AAACAGGGTCTATATT-1    0
AAACAGTGTTCCTGGG-1    1
                     ..
TTGTTGTGTGTCAAGA-1    9
TTGTTTCACATCCAGG-1    1
TTGTTTCATTAGTCTA-1    1
TTGTTTCCATACAACT-1    0
TTGTTTGTGTAAATTC-1    5
Length: 3798, dtype: int8

In [16]:
df = pd.DataFrame(Sample1_SME.uns['rank_genes_groups']['names']).head(5)

In [17]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,C3,CXCL14,CRISP3,CPB1,MALAT1,SLC39A6,MGP,AC087379.2,RPL18A,LINC00645,MGP
1,TIMP1,CCND1,SLITRK6,IL6ST,C3,COX6C,DSP,PGM5-AS1,ADIRF,MUC5B,LINC00645
2,CYBA,GNG5,S100A13,FCGR3B,IGHG4,SNCG,SERPINA3,HEBP1,KRT19,PVALB,SLC39A6
3,CD74,TTLL12,PSMA6,CFB,RPL36,WFDC2,S100G,S100G,CRIP1,EXOC2,MRPS30
4,HLA-DRA,AGR2,C6orf141,LINC02224,TIMP1,FASN,TFF1,PCED1B,RPS15,SLC30A8,WFDC2


In [11]:
# for i in range(1):
#     gene = df.iloc[i,6]
#     print(gene)
#     st.pl.gene_plot(Sample1,genes=gene, spot_size=1.5,
#                    output=str(OUT_PATH), name="Sample1_gene_{}_6.png".format(gene),dpi=1200)
#     st.pl.gene_plot(Sample1_SME,genes=gene, spot_size=1.5,
#                    output=str(OUT_PATH), name="Sample1_SME_gene_{}_6.png".format(gene),dpi=1200)
#     st.pl.gene_plot(Sample1_ps,genes=gene, spot_size=1.5,
#                    output=str(OUT_PATH), name="Sample1_ps_gene_{}_6.png".format(gene),dpi=1200)
#     st.pl.gene_plot(all_adata,genes=gene, spot_size=1.5,
#                    output=str(OUT_PATH), name="Sample1_all_gene_{}_6.png".format(gene),dpi=1200)

In [12]:
# for i in range(5):
#     gene = df.iloc[i,0]
#     print(gene)
#     st.pl.gene_plot(Sample1,genes=gene, spot_size=1.5,
#                    output=str(OUT_PATH), name="Sample1_gene_{}_0.pdf".format(gene))
#     st.pl.gene_plot(Sample1_ps,genes=gene, spot_size=1.5,
#                    output=str(OUT_PATH), name="Sample1_ps_gene_{}_0.pdf".format(gene))

In [13]:
# for i in range(5):
#     gene = df.iloc[i,2]
#     print(gene)
#     st.pl.gene_plot(Sample1,genes=gene, spot_size=1.5,
#                    output=str(OUT_PATH), name="Sample1_gene_{}_2.pdf".format(gene))
#     st.pl.gene_plot(Sample1_ps,genes=gene, spot_size=1.5,
#                    output=str(OUT_PATH), name="Sample1_ps_gene_{}_2.pdf".format(gene))

In [None]:
for i in range(5):
    gene = df.iloc[i,5]
    print(gene)
    st.pl.gene_plot(Sample1,genes=gene, spot_size=1.5,
                   output=str(OUT_PATH), name="Sample1_gene_{}_5.pdf".format(gene))
    st.pl.gene_plot(Sample1_ps,genes=gene, spot_size=1.5,
                   output=str(OUT_PATH), name="Sample1_ps_gene_{}_5.pdf".format(gene))

In [None]:
for i in range(5):
    gene = df.iloc[i,7]
    print(gene)
    st.pl.gene_plot(Sample1,genes=gene, spot_size=1.5,
                   output=str(OUT_PATH), name="Sample1_gene_{}_7.pdf".format(gene))
    st.pl.gene_plot(Sample1_ps,genes=gene, spot_size=1.5,
                   output=str(OUT_PATH), name="Sample1_ps_gene_{}_7.pdf".format(gene))

In [None]:
for i in range(5):
    gene = df.iloc[i,10]
    print(gene)
    st.pl.gene_plot(Sample1,genes=gene, spot_size=1.5,
                   output=str(OUT_PATH), name="Sample1_gene_{}_10.pdf".format(gene))
    st.pl.gene_plot(Sample1_ps,genes=gene, spot_size=1.5,
                   output=str(OUT_PATH), name="Sample1_ps_gene_{}_10.pdf".format(gene))

In [129]:
import matplotlib.pyplot as plt
from libpysal.weights.contiguity import Queen
from libpysal import examples
import numpy as np
import pandas as pd
import geopandas as gpd
import os
import splot
from splot.esda import moran_scatterplot, lisa_cluster
from esda.moran import Moran, Moran_Local
from esda.moran import Moran_BV, Moran_Local_BV
from splot.esda import plot_moran_bv_simulation, plot_moran_bv, plot_local_autocorrelation

In [19]:
from anndata import AnnData
from typing import Iterable, Union, Optional
import pandas as pd
def spatial_autocorr(adata_true: AnnData,
                     adata_pred: AnnData,
                     model_name: str,
                     p: float = 0.05,
                     save_plots: str = None,
) -> Optional[AnnData]:
    
    assert len(adata_true.var_names) == len(adata_pred.var_names)
    
    gpd_name = "gpd_{}".format(model_name)
    library_id = list(adata_true.uns["spatial"].keys())[0]
    tissue_image = adata_true.uns["spatial"][library_id]["images"]["fulres"]
    
    adata_true.obsm[gpd_name] = gpd.GeoDataFrame(adata_true.obs,
                                                 geometry=gpd.points_from_xy(
                                                          adata_true.obs.imagecol, 
                                                          adata_true.obs.imagerow))
    w = Queen.from_dataframe(adata_true.obsm[gpd_name])
    
    spatial_autocorr_label = []
    moran_i = []
    n_sigs = []
    for gene in adata_true.var_names:
        x = np.array(adata_true.to_df()[gene].values, dtype='float')
        y = np.array(adata_pred.to_df()[gene].values, dtype='float')
        
        adata_true.obsm[gpd_name]["gc_{}".format(gene)] = x
        adata_true.obsm[gpd_name]["pred_{}".format(gene)] = y
        
#         moran = Moran(x,w)
        moran_bv = Moran_BV(y, x, w)
#         moran_loc = Moran_Local(x, w)
        moran_loc_bv = Moran_Local_BV(y, x, w)
        
        labels, n_sig = hot_cold_label(moran_loc_bv, p)
        spatial_autocorr_label.append(labels)
        moran_i.append(moran_bv.I)
        n_sigs.append(n_sig)
        
        if save_plots:
            lisa_cluster(moran_loc_bv, adata_true.obsm[gpd_name], p=0.05, 
                         figsize = (9,9), markersize=12,)# **{"alpha":1})
            plt.imshow(tissue_image)
            plt.savefig(save_plots / "lisa_cluster_{}".format(gene))
            
        adata_true.obsm[gpd_name].drop(["gc_{}".format(gene),
                                        "pred_{}".format(gene)], inplace=True, axis=1)
    
    label_array = np.array(spatial_autocorr_label).transpose()
    
    n_zero = (adata_true.to_df() == 0).apply(sum, axis=0) / len(adata_true.obs_names)
    df = pd.DataFrame({
        "moran_i":moran_i,
        "n_sigs":n_sigs,
        "n_zero":n_zero
    }, index=adata_true.var_names)
    return df


def hot_cold_label(moran_loc, p):
    cluster = moran_hot_cold_spots(moran_loc, p)
    cluster_labels = ['ns', 'HH', 'LH', 'LL', 'HL']
    labels = [cluster_labels[i] for i in cluster]
    n_sig = labels.count('HH') + labels.count('LL')
    return labels, n_sig

In [125]:
from matplotlib import patches, colors
def lisa_cluster(moran_loc, gdf, p=0.05, ax=None,
                 legend=True, legend_kwds=None, **kwargs):
    """
    Create a LISA Cluster map
    Parameters
    ----------
    moran_loc : esda.moran.Moran_Local or Moran_Local_BV instance
        Values of Moran's Local Autocorrelation Statistic
    gdf : geopandas dataframe instance
        The Dataframe containing information to plot. Note that `gdf` will be
        modified, so calling functions should use a copy of the user
        provided `gdf`. (either using gdf.assign() or gdf.copy())
    p : float, optional
        The p-value threshold for significance. Points will
        be colored by significance.
    ax : matplotlib Axes instance, optional
        Axes in which to plot the figure in multiple Axes layout.
        Default = None
    legend : boolean, optional
        If True, legend for maps will be depicted. Default = True
    legend_kwds : dict, optional
        Dictionary to control legend formatting options. Example:
        ``legend_kwds={'loc': 'upper left', 'bbox_to_anchor': (0.92, 1.05)}``
        Default = None
    **kwargs : keyword arguments, optional
        Keywords designing and passed to geopandas.GeoDataFrame.plot().
    Returns
    -------
    fig : matplotlip Figure instance
        Figure of LISA cluster map
    ax : matplotlib Axes instance
        Axes in which the figure is plotted
    Examples
    --------
    Imports
    
    >>> import matplotlib.pyplot as plt
    >>> from libpysal.weights.contiguity import Queen
    >>> from libpysal import examples
    >>> import geopandas as gpd
    >>> from esda.moran import Moran_Local
    >>> from splot.esda import lisa_cluster
    Data preparation and statistical analysis
    
    >>> guerry = examples.load_example('Guerry')
    >>> link_to_data = guerry.get_path('guerry.shp')
    >>> gdf = gpd.read_file(link_to_data)
    >>> y = gdf['Donatns'].values
    >>> w = Queen.from_dataframe(gdf)
    >>> w.transform = 'r'
    >>> moran_loc = Moran_Local(y, w)
    Plotting
    
    >>> fig = lisa_cluster(moran_loc, gdf)
    >>> plt.show()
    
    """
    # retrieve colors5 and labels from mask_local_auto
    _, colors5, _, labels = mask_local_auto(moran_loc, p=p)

    # define ListedColormap
    hmap = colors.ListedColormap(colors5)

    if ax is None:
        figsize = kwargs.pop('figsize', None)
        fig, ax = plt.subplots(1, figsize=figsize)
    else:
        fig = ax.get_figure()

    gdf.assign(cl=labels).plot(column='cl', categorical=True,
                               k=2, cmap=hmap, linewidth=0.1, ax=ax,
                               edgecolor='white', legend=legend,
                               legend_kwds=legend_kwds, **kwargs)
    ax.set_axis_off()
    ax.set_aspect('equal')
    return fig, ax


def mask_local_auto(moran_loc, p=0.5):
    '''
    Create Mask for coloration and labeling of local spatial autocorrelation
    Parameters
    ----------
    moran_loc : esda.moran.Moran_Local instance
        values of Moran's I Global Autocorrelation Statistic
    p : float
        The p-value threshold for significance. Points will
        be colored by significance.
    Returns
    -------
    cluster_labels : list of str
        List of labels - ['ns', 'HH', 'LH', 'LL', 'HL']
    colors5 : list of str
        List of colours - ['#d7191c', '#fdae61', '#abd9e9',
        '#2c7bb6', 'lightgrey']
    colors : array of str
        Array containing coloration for each input value/ shape.
    labels : list of str
        List of label for each attribute value/ polygon.
    '''
    # create a mask for local spatial autocorrelation
    cluster = moran_hot_cold_spots(moran_loc, p)

    cluster_labels = ['ns', 'HH', 'LH', 'LL', 'HL']
    labels = [cluster_labels[i] for i in cluster]

    colors5 = {0: '#ffffff00',
               1: '#d7191cff',
               2: '#abd9e9ff',
               3: '#2c7bb6ff',
               4: '#fdae61ff'}
    colors = [colors5[i] for i in cluster]  # for Bokeh
    # for MPL, keeps colors even if clusters are missing:
    x = np.array(labels)
    y = np.unique(x)
    colors5_mpl = {'HH': '#d7191cff',
                   'LH': '#abd9e9ff',
                   'LL': '#2c7bb6ff',
                   'HL': '#fdae61ff',
                   'ns': '#ffffff00'}
    colors5 = [colors5_mpl[i] for i in y]  # for mpl

    # HACK need this, because MPL sorts these labels while Bokeh does not
    cluster_labels.sort()
    return cluster_labels, colors5, colors, labels


def moran_hot_cold_spots(moran_loc, p=0.05):
    sig = 1 * (moran_loc.p_sim < p)
    HH = 1 * (sig * moran_loc.q == 1)
    LL = 3 * (sig * moran_loc.q == 3)
    LH = 2 * (sig * moran_loc.q == 2)
    HL = 4 * (sig * moran_loc.q == 4)
    cluster = HH + LL + LH + HL
    return cluster

In [21]:
from scipy import stats

def plot_correlation(df, attr_1, attr_2):
    r = stats.pearsonr(df[attr_1], 
                       df[attr_2])[0] **2

    g = sns.lmplot(data=df,
        x=attr_1, y=attr_2,
        height=5, legend=True
    )
    # g.set(ylim=(0, 360), xlim=(0,360))

    g.set_axis_labels(attr_1, attr_2)
    plt.annotate(r'$R^2:{0:.2f}$'.format(r),
                (max(df[attr_1])*0.9, max(df[attr_2])*0.9))
    return g

In [22]:
import scanpy as sc

In [26]:
Sample1_imputation

AnnData object with n_obs × n_vars = 3798 × 22240
    obs: 'in_tissue', 'array_row', 'array_col', 'sum_counts', 'imagecol', 'imagerow', 'tile_path'
    var: 'gene_ids', 'feature_types', 'genome', 'n_cells'
    uns: 'gene_expression_correlation', 'morphological_distance', 'pca', 'physical_distance', 'spatial', 'weights_matrix_all', 'weights_matrix_gd_md', 'weights_matrix_pd_gd', 'weights_matrix_pd_md'
    obsm: 'X_morphology', 'X_pca', 'X_tile_feature', 'imputed_data', 'raw_SME_normalized', 'spatial', 'top_weights'
    varm: 'PCs'

In [27]:
Sample1

AnnData object with n_obs × n_vars = 3798 × 36601
    obs: 'in_tissue', 'array_row', 'array_col', 'imagecol', 'imagerow'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial'
    obsm: 'spatial'

In [107]:
gene = "COX6C"
model_name = "stSME"
adata_true = Sample1.copy()
adata_pred = Sample1_imputation.copy()
p = 0.05

gpd_name = "gpd_{}".format(model_name)
library_id = list(adata_true.uns["spatial"].keys())[0]
tissue_image = adata_true.uns["spatial"][library_id]["images"]["fulres"]
    
adata_true.obsm[gpd_name] = gpd.GeoDataFrame(adata_true.obs,
                                                 geometry=gpd.points_from_xy(
                                                          adata_true.obs.imagecol, 
                                                          adata_true.obs.imagerow))
w = Queen.from_dataframe(adata_true.obsm[gpd_name])
x = np.array(adata_true.to_df()[gene].values, dtype='float')
y = np.array(adata_pred.to_df()[gene].values, dtype='float')

adata_true.obsm[gpd_name]["gc_{}".format(gene)] = x
adata_true.obsm[gpd_name]["pred_{}".format(gene)] = y
        
#         moran = Moran(x,w)
moran_bv = Moran_BV(y, x, w)
#         moran_loc = Moran_Local(x, w)
moran_loc_bv = Moran_Local_BV(y, x, w)
        
labels, n_sig = hot_cold_label(moran_loc_bv, p)

In [14]:
# lisa_cluster(moran_loc_bv, adata_true.obsm[gpd_name], figsize = (9,9), markersize=12, **{"alpha":0.8})
# plt.imshow(adata_true.uns["spatial"]["block1"]["images"]["fulres"])
# plt.show()

In [35]:
common_genes = Sample1.var_names.intersection(Sample2.var_names)
Sample1_commom = Sample1[:,common_genes].copy()
Sample2_commom = Sample2[:,common_genes].copy()
Sample1_imputation_commom = Sample1_imputation[:,common_genes].copy()
Sample2_imputation_commom = Sample2_imputation[:,common_genes].copy()

In [36]:
sc.pp.highly_variable_genes(Sample1_commom, n_top_genes=2000)

In [122]:
df_1_ = spatial_autocorr(Sample1_commom[:,Sample1_commom.var.highly_variable], 
                      Sample1_imputation_commom[:,Sample1_commom.var.highly_variable], "stSME")

  if not is_categorical(df_full[k]):


In [123]:
df_2_ = spatial_autocorr(Sample2_commom[:,Sample1_commom.var.highly_variable], 
                      Sample2_imputation_commom[:,Sample1_commom.var.highly_variable], "stSME")

  if not is_categorical(df_full[k]):


In [125]:
df_1.to_csv(OUT_PATH / "250HVG_autocorr_S1.csv")
df_2.to_csv(OUT_PATH / "250HVG_autocorr_S2.csv")
df_1_.to_csv(OUT_PATH / "2000HVG_autocorr_S1.csv")
df_2_.to_csv(OUT_PATH / "2000HVG_autocorr_S2.csv")

In [40]:
df_1 = pd.read_csv(OUT_PATH / "250HVG_autocorr_S1.csv", index_col=0)
df_2 = pd.read_csv(OUT_PATH / "250HVG_autocorr_S2.csv", index_col=0)
df_1_ = pd.read_csv(OUT_PATH / "2000HVG_autocorr_S1.csv", index_col=0)
df_2_ = pd.read_csv(OUT_PATH / "2000HVG_autocorr_S2.csv", index_col=0)

In [41]:
df_1_["Sample"] = "Sample1"
df_2_["Sample"] = "Sample2"
df_1_["Genes"] = df_1_.index
df_2_["Genes"] = df_2_.index
df_final = pd.concat([df_1_, df_2_], axis=0,ignore_index=True)

In [89]:
bins = np.arange(0.0, 1.1, 0.1)
labels = ["<"+str(i)+"%" for i in np.arange(10, 110, 10)]
df_final["bin"] = pd.cut(df_final["n_zero"], bins=bins, labels=labels)

In [90]:
labels

['<10%',
 '<20%',
 '<30%',
 '<40%',
 '<50%',
 '<60%',
 '<70%',
 '<80%',
 '<90%',
 '<100%']

In [91]:
df_final

Unnamed: 0,moran_i,n_sigs,n_zero,Sample,Genes,bin
0,0.240875,575,0.143497,Sample1,MXRA8,<20%
1,0.105307,91,0.899421,Sample1,MMP23B,<90%
2,0.036441,3769,0.998947,Sample1,AL391845.1,<100%
3,0.050185,3656,0.994471,Sample1,PRKCZ-AS1,<100%
4,0.092601,84,0.886519,Sample1,TNFRSF14-AS1,<90%
...,...,...,...,...,...,...
3995,0.404680,963,0.108603,Sample2,FLNA,<20%
3996,0.746509,1886,0.000251,Sample2,MT-ND2,<10%
3997,0.415319,991,0.489842,Sample2,MT-ATP8,<50%
3998,0.400934,1018,0.197893,Sample2,MT-ND4L,<20%


In [92]:
import seaborn as sns
import PIL
PIL.Image.MAX_IMAGE_PIXELS = None

In [15]:
# f, ax = plt.subplots(figsize=(6, 5))
# g = sns.stripplot(x="Sample", y="moran_i", s=2, hue="bin", palette="Reds_r",
#                   data=df_final)
# # sns.boxplot(x="Sample", y="moran_i",
# #                   data=df_final)
# g.legend(loc='center left', bbox_to_anchor=(1, 0.7), ncol=1,
#          title='Percentage of "0" Spots')
# plt.title('Spatial Autocorrelation')
# # Set x-axis label
# plt.ylabel('Moran\'s I')
# # Set y-axis label
# plt.xlabel('Sample')
# # plt.plot([0, 0], [1, 1], linewidth=2)
# plt.tight_layout()
# plt.savefig(OUT_PATH / "plot_2000HVG_spaAuto.pdf", dpi=300)
# # plt.show()

In [16]:
# f, ax = plt.subplots(figsize=(6, 5))
# g = sns.scatterplot(x=df_1_["moran_i"], y=df_2_["moran_i"], s=10, hue = df_1_["n_zero"],palette="Reds_r",)
# g.legend(loc='center left', bbox_to_anchor=(1, 0.85), ncol=1,
#          title='Percentage of "0" Spots')
# plt.title('Spatial Autocorrelation')
# # Set x-axis label
# plt.xlabel('Moran Index of Sample 1')
# # Set y-axis label
# plt.ylabel('Moran Index of Sample 2')
# plt.plot([0, 0], [1, 1], linewidth=2)
# plt.tight_layout()
# # plt.savefig(OUT_PATH / "plot_2000HVG_spaAuto.pdf", dpi=300)
# plt.show()

In [17]:
# f, ax = plt.subplots(figsize=(6, 6))
# sns.scatterplot(x=df_1_["n_sigs"]/max(df_1_["n_sigs"]), y=df_2_["n_sigs"]/max(df_2_["n_sigs"]),
#                 s=10, hue = 1-df_1_["n_zero"],palette="Reds",)
# #                 color=".15")
# # sns.histplot(x=df_1_["n_zero"], y=df_2_["n_zero"], bins=50, pthresh=.1, cmap="mako")
# # sns.kdeplot(x=x, y=y, levels=5, color="w", linewidths=1)
# plt.show()