<div class="alert alert-block alert-success">
    <h1 align="center">Spearman Rank Correlation Callback</h1>
    <h2 align="center">Keras custom callback</h2>
    <h3 align="center"> Tabular Time Series</h3>
    <h5 align="center">Github: (https://github.com/MTisMT)</h5>
</div>

Spearman rank correlation is a popular metric for evaluating the prediction results of trend prediction in quantitative finance models. Spearman rank correlation is a non-differentiable function (sorting is not differentiable) so it can't be used as the loss function of the model. However there is some [models](https://arxiv.org/abs/1904.04272) that can learn approximations of such non-differentiable objective functions.

The custom callback that coded here can monitor spearman rank correlation and early stopping the model when it's not imporoving on this metric after some epochs.

# Import libraries

In [1]:
import numpy as np
import pandas as pd
from scipy.stats.mstats import spearmanr
 
import tensorflow as tf
import keras.backend as K
from keras import optimizers
from keras import regularizers
from keras.models import Sequential
from keras.models import Model
from keras.callbacks import Callback
from keras.layers.merge import concatenate
from keras.utils.vis_utils import plot_model
from keras import callbacks 
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.layers import Dense, Dropout, Input, BatchNormalization

# Define Custom Spearman Callback
Can be used for single  or multiple outputs

In [2]:
class SpearmanRhoCallback(Callback):
    def __init__(self, training_data, validation_data, patience, model_name):
        self.x = training_data[0]
        self.y = training_data[1]
        self.x_val = validation_data[0]
        self.y_val = validation_data[1]
        
        self.patience = patience
        self.value = -1
        self.bad_epochs = 0
        self.model_name = model_name

    def on_train_begin(self, logs={}):
        return

    def on_train_end(self, logs={}):
        return

    def on_epoch_begin(self, epoch, logs={}):
        return

    def on_epoch_end(self, epoch, logs={}):
        y_pred_val = self.model.predict(self.x_val)
        
        if isinstance(self.y_val,np.ndarray):
            rho_val = np.mean([spearmanr(self.y_val.reshape(-1,1)[:, ind],
                                         y_pred_val.reshape(-1,1)[:, ind]).correlation for ind in range(y_pred_val.reshape(-1,1).shape[1])])
#         + np.random.normal(0, 1e-7, y_pred_val.reshape(-1,1).shape[0])
        else:
            rho_val = np.mean([spearmanr(self.y_val.to_numpy().reshape(-1,1)[:, ind],
                                         y_pred_val.reshape(-1,1)[:, ind]).correlation for ind in range(y_pred_val.reshape(-1,1).shape[1])])
#         + np.random.normal(0, 1e-7, y_pred_val.reshape(-1,1).shape[0])
        if rho_val >= self.value:
            self.value = rho_val
        else:
            self.bad_epochs += 1
        if self.bad_epochs >= self.patience:
            print("Epoch %05d: early stopping Threshold" % epoch)
            self.model.stop_training = True
            #self.model.save_weights(self.model_name)
        print('\rval_spearman-rho: %s' % (str(round(rho_val, 4))), end=100*' '+'\n')
        return rho_val

    def on_batch_begin(self, batch, logs={}):
        return

    def on_batch_end(self, batch, logs={}):
        return

# The model 
Determine "Spearman_patience" to  early stop the model after specific number of epochs


In [3]:
def ANN_model(X,y,X_t,y_t,lr=0.005,bs=64, ep=10, actv='relu',
                  min_lr=0.00005, f_lr=0.7, reg=0,h_layers=2,
                  multitask=False, nodes = 512, drpout=0.2,
                  Spearman_patience = 7):
    score={}
    input_sz=X.shape[1]
    model = Sequential()
    model.add(Input(shape=(input_sz,)))
    model.add(Dropout(0.2))
    model.add(Dense(nodes*2,activation=actv, kernel_regularizer=regularizers.l1_l2(l1=reg, l2=reg),
                    bias_regularizer=regularizers.l2(reg)))
    model.add(Dropout(drpout))
    for h_layer in range(int(h_layers)+1):
        model.add(Dense(nodes,activation=actv, kernel_regularizer=regularizers.l1_l2(l1=reg, l2=reg),
                        bias_regularizer=regularizers.l2(reg)))
        model.add(Dropout(drpout))
 
    if multitask:
        model.add(Dense(y.shape[1],activation='sigmoid'))
    else: 
        model.add(Dense(1,activation='sigmoid'))



    opt = tf.keras.optimizers.Adam(learning_rate=lr)
    model.compile(optimizer=opt,loss='mean_squared_logarithmic_error')

    spearmanr_callback = SpearmanRhoCallback(training_data=(X, y), validation_data=(X_t, y_t),
                                       patience=Spearman_patience, model_name='best_model_batch.h5')
    #ES = EarlyStopping(monitor='val_loss', restore_best_weights=True, patience=20)
    #reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=f_lr,
    #                          patience=3,verbose=1, min_lr=min_lr)
    print("Epoch LR: ",K.eval(model.optimizer.lr))
    model.hist = model.fit(X,y,epochs=ep, callbacks=[spearmanr_callback],
              validation_data=(X_t,y_t),shuffle=True, batch_size=bs,verbose=1)



    return model