In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa
import pandas as pd
import os
from PIL import Image
from sklearn import preprocessing

In [None]:
num_classes = 2
input_shape = (299, 299, 3)


h5ad = pd.read_csv("/home/uqomulay/90days/h5ad_obs.csv", index_col=0)
h5ad['tile_tissue_mask_path'] = h5ad['tile_tissue_mask_path'].str.split('/', expand=True)[3].str[:-5]
h5ad = h5ad.set_index(['tile_tissue_mask_path'])


gene_exp = pd.read_csv("/home/uqomulay/90days/gene_exp_2.csv", index_col=0)
gene_exp.index = h5ad.index
gene_exp['dataset'] = list(h5ad['library_id'])
gene_exp = gene_exp[["COX6C","dataset"]]

Y = gene_exp.loc[(gene_exp['dataset'] == "block1")]
test_Y_FFPE = gene_exp.loc[(gene_exp['dataset'] == "FFPE")]


xtrain = h5ad.loc[h5ad['library_id']=='block1']

X_test_FFPE =  h5ad.loc[(h5ad['library_id'] == "FFPE")]

In [None]:
os.chdir('/home/uqomulay/90days/tiles/')
x_train = np.array([np.array(Image.open(fname)) for fname in xtrain['tile_tissue_mask_path']+'.jpeg'])


os.chdir('/home/uqomulay/90days/tiles/')
X_test_FFPE = np.array([np.array(Image.open(fname)) for fname in X_test_FFPE ['tile_tissue_mask_path']+'.jpeg'])


def normalise(set_name):
    set_name = np.log(2*(set_name.iloc[:,:-1])+1)
    set_name = preprocessing.StandardScaler().fit_transform(set_name)
    set_name = pd.DataFrame(data=set_name)
    set_name = set_name.apply(lambda x: [0 if y <= 0 else 1 for y in x])
    set_name = set_name.astype('int64')
    return set_name

Y = normalise(Y)
test_Y_FFPE = normalise(test_Y_FFPE)


weight_decay = 0.0001
batch_size = 128
num_epochs = 50
dropout_rate = 0.2
image_size = 299  # We'll resize input images to this size.
patch_size = 8  # Size of the patches to be extracted from the input images.
num_patches = (image_size // patch_size) ** 2  # Size of the data array.
embedding_dim = 256  # Number of hidden units.
num_blocks = 4  # Number of blocks.

print(f"Image size: {image_size} X {image_size} = {image_size ** 2}")
print(f"Patch size: {patch_size} X {patch_size} = {patch_size ** 2} ")
print(f"Patches per image: {num_patches}")
print(f"Elements per patch (3 channels): {(patch_size ** 2) * 3}")

In [None]:
def build_classifier(blocks, positional_encoding=False):
    inputs = layers.Input(shape=input_shape)

    augmented = data_augmentation(inputs)

    patches = Patches(patch_size, num_patches)(augmented)
    x = layers.Dense(units=embedding_dim)(patches)
    if positional_encoding:
        positions = tf.range(start=0, limit=num_patches, delta=1)
        position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=embedding_dim
        )(positions)
        x = x + position_embedding
    # Process x using the module blocks.
    x = blocks(x)
    representation = layers.GlobalAveragePooling1D()(x)
    representation = layers.Dropout(rate=dropout_rate)(representation)
    logits = layers.Dense(num_classes)(representation)
    return keras.Model(inputs=inputs, outputs=logits)


def run_experiment(model):
    optimizer = tfa.optimizers.AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay,
    )
    model.compile(
        optimizer=optimizer,
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc"),
        ],
    )
    reduce_lr = keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=5
    )
    # Create an early stopping callback.
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor="val_loss", patience=10, restore_best_weights=True
    )
    history = model.fit(
        x=x_train,
        y=Y,
        batch_size=batch_size,
        epochs=num_epochs,
        validation_split=0.15,
        callbacks=[early_stopping, reduce_lr],
    )


    v_, accuracy = model.evaluate(X_test_FFPE, test_Y_FFPE)
    pred_gexp = pd.DataFrame(model.predict(X_test_FFPE))
    pred_gexp.to_csv("/home/uqomulay/90days/COX6C_pred_gexp.csv")
    model.save_weights('/home/uqomulay/90days/COX6C_pickle.h5')
    return history


data_augmentation = keras.Sequential(
    [
        layers.Normalization(),
        layers.Resizing(image_size, image_size),
        layers.RandomFlip("horizontal"),
        layers.RandomZoom(
            height_factor=0.2, width_factor=0.2
        ),
    ],
    name="data_augmentation",
)
# Compute the mean and the variance of the training data for normalization.
data_augmentation.layers[0].adapt(x_train)


In [None]:
class Patches(layers.Layer):
    def __init__(self, patch_size, num_patches):
        super(Patches, self).__init__()
        self.patch_size = patch_size
        self.num_patches = num_patches

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, self.num_patches, patch_dims])
        return patches



class gMLPLayer(layers.Layer):
    def __init__(self, num_patches, embedding_dim, dropout_rate, *args, **kwargs):
        super(gMLPLayer, self).__init__(*args, **kwargs)

        self.channel_projection1 = keras.Sequential(
            [
                layers.Dense(units=embedding_dim * 2),
                tfa.layers.GELU(),
                layers.Dropout(rate=dropout_rate),
            ]
        )

        self.channel_projection2 = layers.Dense(units=embedding_dim)

        self.spatial_projection = layers.Dense(
            units=num_patches, bias_initializer="Ones"
        )

        self.normalize1 = layers.LayerNormalization(epsilon=1e-6)
        self.normalize2 = layers.LayerNormalization(epsilon=1e-6)

    def spatial_gating_unit(self, x):
        # Split x along the channel dimensions.
        # Tensors u and v will in th shape of [batch_size, num_patchs, embedding_dim].
        u, v = tf.split(x, num_or_size_splits=2, axis=2)
        v = self.normalize2(v)
        # Apply spatial projection.
        v_channels = tf.linalg.matrix_transpose(v)
        v_projected = self.spatial_projection(v_channels)
        v_projected = tf.linalg.matrix_transpose(v_projected)
        # Apply element-wise multiplication.
        return u * v_projected

    def call(self, inputs):
        # Apply layer normalization.
        x = self.normalize1(inputs)
        # Apply the first channel projection. x_projected shape: [batch_size, num_patches, embedding_dim * 2].
        x_projected = self.channel_projection1(x)
        # Apply the spatial gating unit. x_spatial shape: [batch_size, num_patches, embedding_dim].
        x_spatial = self.spatial_gating_unit(x_projected)
        # Apply the second channel projection. x_projected shape: [batch_size, num_patches, embedding_dim].
        x_projected = self.channel_projection2(x_spatial)
        # Add skip connection.
        return x + x_projected

In [None]:
gmlp_blocks = keras.Sequential(
    [gMLPLayer(num_patches, embedding_dim, dropout_rate) for _ in range(num_blocks)]
)
learning_rate = 0.003
gmlp_classifier = build_classifier(gmlp_blocks)
history = run_experiment(gmlp_classifier)