In [None]:
import sys
import stlearn as st
st.settings.set_figure_params(dpi=300)
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import sys
file = Path("../stimage").resolve()
parent= file.parent
sys.path.append(str(parent))
from PIL import Image
from stimage._utils import gene_plot, Read10X, ReadOldST, tiling
from stimage._model import CNN_NB_multiple_genes
from stimage._data_generator import DataGenerator
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input, Dropout, Lambda, LayerNormalization
from tensorflow.keras.models import Model


class PrinterCallback(tf.keras.callbacks.Callback):

    # def on_train_batch_begin(self, batch, logs=None):
    #     # Do something on begin of training batch

    def on_epoch_end(self, epoch, logs=None):
        print('EPOCH: {}, Train Loss: {}, Val Loss: {}'.format(epoch,
                                                               logs['loss'],
                                                               logs['val_loss']))

    def on_epoch_begin(self, epoch, logs=None):
        print('-' * 50)
        print('STARTING EPOCH: {}'.format(epoch))


def negative_binomial_layer(x):
    """
    Lambda function for generating negative binomial parameters
    n and p from a Dense(2) output.
    Assumes tensorflow 2 backend.

    Usage
    -----
    outputs = Dense(2)(final_layer)
    distribution_outputs = Lambda(negative_binomial_layer)(outputs)

    Parameters
    ----------
    x : tf.Tensor
        output tensor of Dense layer

    Returns
    -------
    out_tensor : tf.Tensor

    """

    # Get the number of dimensions of the input
    num_dims = len(x.get_shape())

    # Separate the parameters
    n, p = tf.unstack(x, num=2, axis=-1)

    # Add one dimension to make the right shape
    n = tf.expand_dims(n, -1)
    p = tf.expand_dims(p, -1)

    # Apply a softplus to make positive
    n = tf.keras.activations.softplus(n)

    # Apply a sigmoid activation to bound between 0 and 1
    p = tf.keras.activations.sigmoid(p)

    # Join back together again
    out_tensor = tf.concat((n, p), axis=num_dims - 1)

    return out_tensor


def negative_binomial_loss(y_true, y_pred):
    """
    Negative binomial loss function.
    Assumes tensorflow backend.

    Parameters
    ----------
    y_true : tf.Tensor
        Ground truth values of predicted variable.
    y_pred : tf.Tensor
        n and p values of predicted distribution.

    Returns
    -------
    nll : tf.Tensor
        Negative log likelihood.
    """

    # Separate the parameters
    n, p = tf.unstack(y_pred, num=2, axis=-1)

    # Add one dimension to make the right shape
    n = tf.expand_dims(n, -1)
    p = tf.expand_dims(p, -1)

    # Calculate the negative log likelihood
    nll = (
            tf.math.lgamma(n)
            + tf.math.lgamma(y_true + 1)
            - tf.math.lgamma(n + y_true)
            - n * tf.math.log(p)
            - y_true * tf.math.log(1 - p)
    )

    return nll



def CNN_NB_multiple_genes_feature_z(tile_shape, n_genes):
    tile_input = Input(shape=tile_shape, name="tile_input")
    resnet_base = ResNet50(input_tensor=tile_input, weights='imagenet', include_top=False)
    #     stage_5_start = resnet_base.get_layer("conv5_block1_1_conv")
    #     for i in range(resnet_base.layers.index(stage_5_start)):
    #         resnet_base.layers[i].trainable = False

    for i in resnet_base.layers:
        i.trainable = False
    cnn = resnet_base.output
    cnn = GlobalAveragePooling2D()(cnn)
    #     cnn = Dropout(0.5)(cnn)
    #     cnn = Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l1(0.01),
    #                 activity_regularizer=tf.keras.regularizers.l2(0.01))(cnn)
    # cnn = Dense(256, activation='relu')(cnn)
    cnn = LayerNormalization()(cnn)
    output_layers = []
    for i in range(n_genes):
        output = Dense(2)(cnn)
        output_layers.append(Lambda(negative_binomial_layer, name="gene_{}".format(i))(output))

    model = Model(inputs=tile_input, outputs=output_layers)
    #     losses={}
    #     for i in range(8):
    #         losses["gene_{}".format(i)] = negative_binomial_loss(i)
    #     optimizer = tf.keras.optimizers.RMSprop(0.001)
    optimizer = tf.keras.optimizers.Adam(0.0001)
    model.compile(loss=negative_binomial_loss,
                  optimizer=optimizer)
    return model


def CNN_NB_multiple_genes_feature_minmax(tile_shape, n_genes):
    tile_input = Input(shape=tile_shape, name="tile_input")
    resnet_base = ResNet50(input_tensor=tile_input, weights='imagenet', include_top=False)
    #     stage_5_start = resnet_base.get_layer("conv5_block1_1_conv")
    #     for i in range(resnet_base.layers.index(stage_5_start)):
    #         resnet_base.layers[i].trainable = False

    for i in resnet_base.layers:
        i.trainable = False
    cnn = resnet_base.output
    cnn = GlobalAveragePooling2D()(cnn)
    #     cnn = Dropout(0.5)(cnn)
    #     cnn = Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l1(0.01),
    #                 activity_regularizer=tf.keras.regularizers.l2(0.01))(cnn)
    # cnn = Dense(256, activation='relu')(cnn)
    cnn = Lambda(lambda x: (x - tf.math.reduce_min(x))/(tf.math.reduce_max(x) - tf.math.reduce_min(x)))(cnn)
    output_layers = []
    for i in range(n_genes):
        output = Dense(2)(cnn)
        output_layers.append(Lambda(negative_binomial_layer, name="gene_{}".format(i))(output))

    model = Model(inputs=tile_input, outputs=output_layers)
    #     losses={}
    #     for i in range(8):
    #         losses["gene_{}".format(i)] = negative_binomial_loss(i)
    #     optimizer = tf.keras.optimizers.RMSprop(0.001)
    optimizer = tf.keras.optimizers.Adam(0.0001)
    model.compile(loss=negative_binomial_loss,
                  optimizer=optimizer)
    return model


def CNN_NB_multiple_genes_feature_global(n_genes):
    inputs = Input(shape=(2048,))
    output_layers = []
    for i in range(n_genes):
        output = Dense(2)(inputs)
        output_layers.append(Lambda(negative_binomial_layer, name="gene_{}".format(i))(output))

    model = Model(inputs=inputs, outputs=output_layers)
    #     losses={}
    #     for i in range(8):
    #         losses["gene_{}".format(i)] = negative_binomial_loss(i)
    #     optimizer = tf.keras.optimizers.RMSprop(0.001)
    optimizer = tf.keras.optimizers.Adam(0.0001)
    model.compile(loss=negative_binomial_loss,
                  optimizer=optimizer)
    return model

In [None]:
BASE_PATH = Path("/clusterdata/uqxtan9/Xiao/STimage/dataset/breast_cancer_10x_visium")
TILE_PATH = Path("/tmp") / "tiles"
TILE_PATH.mkdir(parents=True, exist_ok=True)

SAMPLE = "block1"
Sample1 = st.Read10X(BASE_PATH / SAMPLE, 
                  library_id=SAMPLE, 
                  count_file="V1_Breast_Cancer_Block_A_Section_1_filtered_feature_bc_matrix.h5",
                  quality="fulres",)
                  #source_image_path=BASE_PATH / SAMPLE /"V1_Breast_Cancer_Block_A_Section_1_image.tif")
img = plt.imread(BASE_PATH / SAMPLE /"V1_Breast_Cancer_Block_A_Section_1_image.tif", 0)
Sample1.uns["spatial"][SAMPLE]['images']["fulres"] = img

SAMPLE = "block2"
Sample2 = st.Read10X(BASE_PATH / SAMPLE, 
                  library_id=SAMPLE, 
                  count_file="V1_Breast_Cancer_Block_A_Section_2_filtered_feature_bc_matrix.h5",
                  quality="fulres",)
                  #source_image_path=BASE_PATH / SAMPLE /"V1_Breast_Cancer_Block_A_Section_1_image.tif")
img = plt.imread(BASE_PATH / SAMPLE /"V1_Breast_Cancer_Block_A_Section_2_image.tif", 0)
Sample2.uns["spatial"][SAMPLE]['images']["fulres"] = img

In [None]:
# gene_list = ["ACTA2", "CNN1", "COL1A1", "MYLK", "MME", "MYH11", "KRT5", "ITGB6"]

In [None]:
# gene_plot(Sample1, genes=gene_list, spot_size=8)

In [None]:
# gene_plot(Sample2, genes=gene_list, spot_size=8)

In [None]:
from anndata import AnnData
from typing import Iterable, Union, Optional
import pandas as pd
def enrich_group(adata: AnnData,
                 gene_list: Iterable,
                 enrich_name: Union[pd.Index, list],
                 

                
) -> Optional[AnnData]:
    adata_ = adata[:,adata.var_names.isin(gene_list)].copy()
    adata_enrich = AnnData(X=adata_.X.sum(axis=1),
                       obs=adata_.obs,
                       uns=adata_.uns,
                       obsm=adata_.obsm)
    adata_enrich.var_names = enrich_name
    return adata_enrich

In [None]:
# gene_list=["SLITRK6", "PGM5", "LINC00645", 
#            "TTLL12", "COX6C", "CPB1",
#            "KRT5", "MALAT1"]
# gene_list

In [None]:
for adata in [
    Sample1,
    Sample2,
]:
#     count_df = adata.to_df()
#     count_df[count_df <=1] = 0
#     count_df[count_df >1] = 1
#     adata.X = count_df
#     adata[:,gene_list]
    st.pp.filter_genes(adata,min_cells=3)
#     st.pp.normalize_total(adata)
    st.pp.log1p(adata)
#     st.pp.scale(adata)

    # pre-processing for spot image
    TILE_PATH_ = TILE_PATH / list(adata.uns["spatial"].keys())[0]
    TILE_PATH_.mkdir(parents=True, exist_ok=True)
    tiling(adata, TILE_PATH_, crop_size=299)
#     st.pp.extract_feature(data)

In [None]:
gene_list_1 = Sample1.to_df().filter(regex=("KRT.*")).columns
gene_list_2 = Sample2.to_df().filter(regex=("KRT.*")).columns
gene_list_share = gene_list_1.intersection(gene_list_2)
len(gene_list_share)

In [None]:
gene_list=pd.Index(["KRT_enrich"])

In [None]:
Sample1 = enrich_group(Sample1, 
                       gene_list_share, 
                       gene_list)
Sample2 = enrich_group(Sample2, 
                       gene_list_share, 
                       gene_list)

In [None]:
import matplotlib.pyplot as plt
from libpysal.weights.contiguity import Queen
from libpysal import examples
import numpy as np
import pandas as pd
import geopandas as gpd
import os
import splot
from splot.esda import moran_scatterplot, lisa_cluster
from esda.moran import Moran, Moran_Local
from esda.moran import Moran_BV, Moran_Local_BV
from splot.esda import plot_moran_bv_simulation, plot_moran_bv, plot_local_autocorrelation

In [None]:
Sample1.obsm["gpd"] = gpd.GeoDataFrame(Sample1.obs,
                                             geometry=gpd.points_from_xy(
                                                 Sample1.obs.imagecol, 
                                                 Sample1.obs.imagerow))

In [None]:
Sample1.obsm["gpd"]

In [None]:
x = Sample1.to_df()[gene_list].values
y = Sample1.to_df()[gene_list].values
w = Queen.from_dataframe(Sample1.obsm["gpd"])

In [None]:
Sample1.obsm["gpd"]["gc_{}".format(gene_list.names)] = x
tissue_image = Sample1.uns["spatial"]["block1"]["images"]["fulres"]

In [None]:
moran = Moran(y,w)
moran_bv = Moran_BV(y, x, w)
moran_loc = Moran_Local(y, w, permutations=0)
moran_loc_bv = Moran_Local_BV(y, x, w, permutations=0)

In [None]:
fig, ax = moran_scatterplot(moran_loc, aspect_equal=True)
plt.show()

In [None]:
lisa_cluster(moran_loc, Sample1.obsm["gpd"], p=0.05,
             figsize = (9,9), markersize=12, **{"alpha":1 })
plt.imshow(Sample1.uns["spatial"]["block1"]["images"]["fulres"])
plt.show()

In [None]:
moran_loc.p_sim =mask

In [None]:
p_sim = moran_loc.y

In [None]:
mask = (p_sim>=20) & (p_sim<=32) & (lag >=20) & (lag<=32)

In [None]:
mask *1

In [None]:
Sample1_ = Sample1.copy()

In [None]:
Sample1_.X = (Sample1_.X >18)*1

In [None]:
gene_plot(Sample1, genes=gene_list, spot_size=8)

In [None]:
from libpysal.weights.spatial_lag import lag_spatial
lag = lag_spatial(moran_loc.w, moran_loc.y)
b, a = np.polyfit(moran_loc.y, lag, 1)

In [None]:
plt.scatter(moran_loc.y, lag)
plt.plot(moran_loc.y, a + b*moran_loc.y)
plt.show()

In [None]:
a

In [None]:
n_genes = len(gene_list)
training_index = Sample1.obs.sample(frac=0.7, random_state=1).index
training_dataset = Sample1[training_index,].copy()

valid_index = Sample1.obs.index.isin(training_index)
valid_dataset = Sample1[~valid_index,].copy()

test_dataset = Sample2.copy()

train_gen = tf.data.Dataset.from_generator(
            lambda:DataGenerator(adata=training_dataset, 
                          genes=gene_list, aug=False),
            output_types=(tf.float32, tuple([tf.float32]*n_genes)), 
            output_shapes=([299,299,3], tuple([1]*n_genes))
)
train_gen_ = train_gen.shuffle(buffer_size=500).batch(128).repeat(3).cache().prefetch(tf.data.experimental.AUTOTUNE)
valid_gen = tf.data.Dataset.from_generator(
            lambda:DataGenerator(adata=valid_dataset, 
                          genes=gene_list), 
            output_types=(tf.float32, tuple([tf.float32]*n_genes)), 
            output_shapes=([299,299,3], tuple([1]*n_genes))
)
valid_gen_ = valid_gen.shuffle(buffer_size=500).batch(128).repeat(3).cache().prefetch(tf.data.experimental.AUTOTUNE)
test_gen = tf.data.Dataset.from_generator(
            lambda:DataGenerator(adata=test_dataset, 
                          genes=gene_list), 
            output_types=(tf.float32, tuple([tf.float32]*n_genes)), 
            output_shapes=([299,299,3], tuple([1]*n_genes))
)
test_gen_ = test_gen.batch(1)

In [None]:
model = CNN_NB_multiple_genes((299, 299, 3), n_genes)
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20,
                                            restore_best_weights=False)

In [None]:
train_history = model.fit(train_gen_,
                          epochs=100,
                          validation_data=valid_gen_,
                          callbacks=[callback]
                          )

In [None]:
test_predictions = model.predict(test_gen_)
from scipy.stats import nbinom
y_preds = []
if n_genes >1:
    for i in range(n_genes):
        n = test_predictions[i][:, 0]
        p = test_predictions[i][:, 1]
        y_pred = nbinom.mean(n, p)
        y_preds.append(y_pred)
    test_dataset.obsm["predicted_gene"] = np.array(y_preds).transpose()
else:
    n = test_predictions[:, 0]
    p = test_predictions[:, 1]
    y_pred = nbinom.mean(n, p)
    test_dataset.obsm["predicted_gene"] = y_pred

In [None]:
# model.save("./CNN_NB_8genes_model.h5")

In [None]:
test_dataset_ = test_dataset[:,gene_list].copy()
test_dataset_.X = test_dataset_.obsm["predicted_gene"]

In [None]:
for i in gene_list:
    print(i)
    gene_plot(Sample1, genes=i, spot_size=8)
    gene_plot(test_dataset_, genes=i, spot_size=8)

# z score transformed features layer

In [None]:
model = CNN_NB_multiple_genes_feature_z((299, 299, 3), n_genes)
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20,
                                            restore_best_weights=False)

In [None]:
train_history = model.fit(train_gen_,
                          epochs=100,
                          validation_data=valid_gen_,
                          callbacks=[callback]
                          )

In [None]:
test_predictions = model.predict(test_gen_)
from scipy.stats import nbinom
y_preds = []
if n_genes >1:
    for i in range(n_genes):
        n = test_predictions[i][:, 0]
        p = test_predictions[i][:, 1]
        y_pred = nbinom.mean(n, p)
        # scale
        y_preds.append(y_pred)
    test_dataset.obsm["predicted_gene"] = np.array(y_preds).transpose()
else:
    n = test_predictions[:, 0]
    p = test_predictions[:, 1]
    y_pred = nbinom.mean(n, p)
    test_dataset.obsm["predicted_gene"] = y_pred

In [None]:
test_dataset_ = test_dataset[:,gene_list].copy()
test_dataset_.X = test_dataset_.obsm["predicted_gene"]

In [None]:
for i in gene_list:
    print(i)
    gene_plot(Sample1, genes=i, spot_size=8)
    gene_plot(test_dataset_, genes=i, spot_size=8)

# minmax scaler transformed features layer

In [None]:
model = CNN_NB_multiple_genes_feature_minmax((299, 299, 3), n_genes)
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20,
                                            restore_best_weights=False)

In [None]:
train_history = model.fit(train_gen_,
                          epochs=100,
                          validation_data=valid_gen_,
                          callbacks=[callback]
                          )

In [None]:
test_predictions = model.predict(test_gen_)
from scipy.stats import nbinom
y_preds = []
if n_genes >1:
    for i in range(n_genes):
        n = test_predictions[i][:, 0]
        p = test_predictions[i][:, 1]
        y_pred = nbinom.mean(n, p)
        y_preds.append(y_pred)
    test_dataset.obsm["predicted_gene"] = np.array(y_preds).transpose()
else:
    n = test_predictions[:, 0]
    p = test_predictions[:, 1]
    y_pred = nbinom.mean(n, p)
    test_dataset.obsm["predicted_gene"] = y_pred

In [None]:
test_dataset_ = test_dataset[:,gene_list].copy()
test_dataset_.X = test_dataset_.obsm["predicted_gene"]

In [None]:
for i in gene_list:
    print(i)
    gene_plot(Sample1, genes=i, spot_size=8)
    gene_plot(test_dataset_, genes=i, spot_size=8)

# z score transformed features

In [None]:
for adata in [
    training_dataset,
    valid_dataset,
    test_dataset
]:
    st.pp.extract_feature(adata)

In [None]:
model = CNN_NB_multiple_genes_feature_global(n_genes)
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20,
                                            restore_best_weights=False)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [None]:
train_X = scaler.fit_transform(training_dataset.obsm["X_tile_feature"])
train_Y = training_dataset.to_df()[gene_list]

valid_X = scaler.fit_transform(valid_dataset.obsm["X_tile_feature"])
valid_Y = valid_dataset.to_df()[gene_list]

test_X = scaler.fit_transform(test_dataset.obsm["X_tile_feature"])
test_Y = test_dataset.to_df()[gene_list]

In [None]:
train_history = model.fit(x=train_X,
                          y=train_Y,
                          epochs=100,
                          validation_data=(valid_X, valid_Y),
                          callbacks=[callback]
                          )

In [None]:
test_predictions = model.predict(test_X)
from scipy.stats import nbinom
y_preds = []
if n_genes >1:
    for i in range(n_genes):
        n = test_predictions[i][:, 0]
        p = test_predictions[i][:, 1]
        y_pred = nbinom.mean(n, p)
        y_preds.append(y_pred)
    test_dataset.obsm["predicted_gene"] = np.array(y_preds).transpose()
else:
    n = test_predictions[:, 0]
    p = test_predictions[:, 1]
    y_pred = nbinom.mean(n, p)
    test_dataset.obsm["predicted_gene"] = y_pred

In [None]:
test_dataset_ = test_dataset[:,gene_list].copy()
test_dataset_.X = test_dataset_.obsm["predicted_gene"]

In [None]:
for i in gene_list:
    print(i)
    gene_plot(Sample1, genes=i, spot_size=8)
    gene_plot(test_dataset_, genes=i, spot_size=8)

# minmax scaler transformed features

In [None]:
model = CNN_NB_multiple_genes_feature_global(n_genes)
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20,
                                            restore_best_weights=False)

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler((0, 1))

In [None]:
train_X = scaler.fit_transform(training_dataset.obsm["X_tile_feature"])
train_Y = training_dataset.to_df()[gene_list]

valid_X = scaler.fit_transform(valid_dataset.obsm["X_tile_feature"])
valid_Y = valid_dataset.to_df()[gene_list]

test_X = scaler.fit_transform(test_dataset.obsm["X_tile_feature"])
test_Y = test_dataset.to_df()[gene_list]

In [None]:
train_history = model.fit(x=train_X,
                          y=train_Y,
                          epochs=100,
                          validation_data=(valid_X, valid_Y),
                          callbacks=[callback]
                          )

In [None]:
test_predictions = model.predict(test_X)
from scipy.stats import nbinom
y_preds = []
if n_genes >1:
    for i in range(n_genes):
        n = test_predictions[i][:, 0]
        p = test_predictions[i][:, 1]
        y_pred = nbinom.mean(n, p)
        y_preds.append(y_pred)
    test_dataset.obsm["predicted_gene"] = np.array(y_preds).transpose()
else:
    n = test_predictions[:, 0]
    p = test_predictions[:, 1]
    y_pred = nbinom.mean(n, p)
    test_dataset.obsm["predicted_gene"] = y_pred

In [None]:
test_dataset_ = test_dataset[:,gene_list].copy()
test_dataset_.X = test_dataset_.obsm["predicted_gene"]

In [None]:
for i in gene_list:
    print(i)
    gene_plot(Sample1, genes=i, spot_size=8)
    gene_plot(test_dataset_, genes=i, spot_size=8)