<a href="https://colab.research.google.com/github/FrederikKober/Thesis/blob/main/NN_Func_App1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
lst = [] # !!! HERE


# Libraries
import numpy as np
from sklearn.model_selection import ParameterGrid



#######################
## GridSearchWrapper ##

def NNGridSearchWrapper(NNfunc, X, Y, no, params=None, refit=None,
                        dumploc=None, **kwargs):

    #Performs the gridsearch over the parameter dictionary params.
    
    print(no) # !!! HERE
    lst.append(no) # !!! HERE
    
    # Construct grid of parameters from dictionary, containing param ranges
    paramgrid = list(ParameterGrid(params))

    # Loop over all param grid combinations and save val_loss
    val_loss = list()
    for i, param_i in enumerate(paramgrid):
        _, val_loss_temp = NNfunc(X, Y, no,
                                     dropout_u=param_i['Dropout'],
                                     l1l2penal=param_i['l1l2'],
                                     refit=True, dumploc=dumploc,
                                     **kwargs)
        val_loss.append(val_loss_temp)

    # Determine best model according to grid-search val_loss
    bestm = np.argmin(val_loss)

    # Fit best model again
    Ypred, val_loss = NNfunc(X, Y, no, dropout_u=paramgrid[bestm]['Dropout'],
                                l1l2penal=paramgrid[bestm]['l1l2'],
                                refit=True, dumploc=dumploc,
                                **kwargs)

    return Ypred, val_loss



#######################
## NNForwardFunction ##

def NNForwardFunction(X, Y, no, dropout_u = None, l1l2penal = None,
                      refit = None, dumploc = None, **kwargs):
    """
    This model fits a vanilla neural network on the forward rates with a 
    flexible architecture. Simple case # 0 in the main paper. 
    """
    
    if dumploc == None:
        raise ValueError('Missing Dumploc argument')

    import tensorflow as tf
    from sklearn.preprocessing import MinMaxScaler
    from keras.models import Model
    from keras.layers import Input, Dense, Dropout, BatchNormalization
    from keras.callbacks import EarlyStopping, ModelCheckpoint
    from keras.layers.merge import concatenate
    from tensorflow.keras.optimizers import SGD
    from keras.models import load_model
    from keras import regularizers    
    
    Xexog = kwargs["Xexog"]
    archi = kwargs["archi"]
    
    # Split Data for Test and Training
    Xexog_train = Xexog[:-1,:]
    Y_train = Y[:-1,:]
    Xexog_test = Xexog[-1,:].reshape(1,-1)


    #Scale the predictors for training
    Xexog_scaler_train = MinMaxScaler(feature_range=(-1,1))
    Xexog_scaled_train = Xexog_scaler_train.fit_transform(Xexog_train)
    Xexog_scaled_test = Xexog_scaler_train.transform(Xexog_test)
    
    # seed numpy and tf
    tf.compat.v1.set_random_seed(no)
    np.random.seed(no)   

    # Define Model Architecture
    if refit:     
        n = len(archi)
        layers = dict()
        for i in range(n+1):
            if i == 0:
                layers['input'] = Input(shape = (Xexog_scaled_train.shape[1],))
            elif i == 1:
                layers['dropout'+str(i)] = Dropout(dropout_u)(layers['input'])
                layers['hidden'+str(i)] = Dense(archi[i-1], kernel_regularizer = regularizers.l1_l2(l1l2penal),
                                        bias_initializer = 'he_normal', kernel_initializer = 'he_normal', 
                                        activation='relu')(layers['dropout'+str(i)])
            elif i > 1 & i <= n:
                layers['dropout'+str(i)] = Dropout(dropout_u)(layers['hidden'+str(i-1)])
                layers['hidden'+str(i)] = Dense(archi[i-1], kernel_regularizer = regularizers.l1_l2(l1l2penal),
                                        bias_initializer = 'he_normal', kernel_initializer = 'he_normal', 
                                        activation='relu')(layers['dropout'+str(i)])  
                
        layers["dropout_final"] = Dropout(dropout_u)(layers["hidden" + str(n)])
        layers["BN"] = BatchNormalization()(layers["dropout_final"])
        layers["output"] = Dense(Y_train.shape[1], bias_initializer = "he_normal", 
                                 kernel_initializer = "he_normal")(layers["BN"])

        model = Model(inputs = layers["input"], outputs = layers["output"])
        
        # Compile model
        sgd_fine = SGD(learning_rate=0.01, momentum=0.9, decay=0.01, nesterov=True)
        earlystopping = EarlyStopping(monitor='val_loss',min_delta=1e-6,
                                      patience=20,verbose=0, mode='auto')
        mcp = ModelCheckpoint(dumploc+'/BestModelWeights_'+str(no)+'.hdf5', 
                              monitor='val_loss',save_best_only=True)
        
        model.compile(loss='mean_squared_error', optimizer=sgd_fine)
        history = model.fit(Xexog_scaled_train, Y_train, epochs=500,
                            callbacks=[earlystopping,mcp], validation_split=0.15,
                            batch_size=32, shuffle=True, verbose=0)

        # Retrieve the best model as per early stopping
        model.load_weights(dumploc+'/BestModelWeights_'+str(no)+'.hdf5')
        # Save model
        model.save(dumploc+'/BestModel_'+str(no)+'.hdf5')

    else:
        # Retrieve model architecture and retrain
        model = load_model(dumploc+'/BestModel_'+str(no)+'.hdf5')
        sgd_fine = SGD(learning_rate=0.01, momentum=0.9, decay=0.01, nesterov=True)
        earlystopping = EarlyStopping(monitor='val_loss', min_delta=1e-6,
                                      patience=20, verbose=0, mode='auto')
        mcp = ModelCheckpoint(dumploc+'/BestModelWeights_'+str(no)+'.hdf5',
                              monitor='val_loss', save_best_only=True)

        model.compile(loss='mean_squared_error', optimizer=sgd_fine)
        history=model.fit(Xexog_scaled_train, Y_train,
                          epochs=500, callbacks=[earlystopping,mcp],
                          validation_split=0.15, batch_size=32, shuffle=True,
                          verbose=0)

        # Retrieve the best model as per early stopping
        model.load_weights(dumploc+'/BestModelWeights_'+str(no)+'.hdf5')


    # Make out-of-sample prediction on the unseen observations
    Ypred = model.predict(Xexog_scaled_test)

    return Ypred, np.min(history.history['val_loss'])    


def NNForwardRun(X, Xexog, Y, no, params = None, refit = None, dumploc = None, archi = None): # !!! Added archi = None !!!
    # Define number of nodes for each of the layers
    #archi = [32, 16, 8] # !!! Disabled archi !!!
    
    # Perform grid-search over params
    if refit:
        Ypred, val_loss = NNGridSearchWrapper(NNForwardFunction, X, Y, no,
                                              params = params, refit = True,
                                              dumploc = dumploc, archi = archi,
                                              Xexog = Xexog)
    # Use existing model
    else:
        Ypred, val_loss = NNForwardFunction(X, Y, no, refit = False, 
                                            dumploc = dumploc, archi = archi,
                                            Xexog = Xexog)
    
    return Ypred, val_loss




##################
## LSTM_Generic ##

def LSTM_Generic(X, Y, no, dropout_u = None, l1l2penal = None, refit = None,
                 dumploc = None, other_gen = None, **kwargs): # !!! OTHER NONE
        
    # Unpack dictionary
    _len = other_gen["_len"] 
    _batch_top = other_gen["_batch_top"]
    _batch_bot = other_gen["_batch_bot"] 
    
    
    """
    This model fits a basic LSTM model for model 0
        --> only forward rates as input
    """

    if dumploc == None:
        raise ValueError('Missing Dumploc argument')

    import tensorflow as tf
    from sklearn.preprocessing import MinMaxScaler
    from keras.models import Model
    from keras.layers import Input, Dense, Dropout, BatchNormalization
    from keras.callbacks import EarlyStopping, ModelCheckpoint
    from tensorflow.keras.optimizers import SGD
    from keras.models import load_model
    from keras import regularizers

    Xexog = kwargs['Xexog']
    archi = kwargs['archi']
    
    # Data Prep --> important to not get mixed up with _Xexog and _Y
    _Xexog = Xexog[:-1,:]
    #_X = X[:-1,:]
    _Y = Y[:-1]
    
    # Xexog_test & X_test (1 observation - but trace back _len) 
    Xexog_test = Xexog[-_len:,:] # reshape? .reshape(1,-1)
    #X_test = X[-_len:,:]
    
    # Scale predictors for data set containing train and validation; Possible leakage, but minimal --> procedure akin to main code (bianchi)
    _xexog_scaler_train = MinMaxScaler(feature_range = (-1,1))
    #_x_scaler_train     = MinMaxScaler(feature_range = (-1,1))
    
    Xexog_scaled = _xexog_scaler_train.fit_transform(_Xexog) 
    #X_scaled     = _x_scaler_train.fit_transform(_X)
    
    Xexog_scaled_test = _xexog_scaler_train.transform(Xexog_test)
    #X_scaled_test     = _x_scaler_train.transform(X_test)
    
    # Need to expand dimensions for prediction
    Xexog_scaled_test = np.expand_dims(Xexog_scaled_test, axis = 0)
    #X_scaled_test = np.expand_dims(X_scaled_test, axis = 0)
    
    
    # split Xexog for validation and train data set
    _split = int(np.round(_Xexog.shape[0] * (1 - 0.15)))
    
    # seed numpy and tf
    #tf.set_random_seed(no)
    tf.compat.v1.set_random_seed(no)
    np.random.seed(no)
    print(no)
    
    # TimeseriesGenerator 
    from keras.preprocessing.sequence import TimeseriesGenerator
    Xexog_scaled_train_gen = TimeseriesGenerator(data = Xexog_scaled[:_split,:], targets = _Y[:_split,:], length = _len, # !! HERE
                                                 sampling_rate = 1, stride = 1, #start_index = 0, end_index = Xexog_scaled_train.shape[0]-1,
                                                 shuffle = False, batch_size = _batch_top) 
    Xexog_scaled_val_gen = TimeseriesGenerator(data = Xexog_scaled[_split-_len+1:,:], targets = _Y[_split-_len+1:,:], length = _len, # !!! HERE
                                                 sampling_rate = 1, stride = 1, #start_index = 0, #end_index = Xexog_scaled_val.shape[0],
                                                 shuffle = False, batch_size = _batch_top) # !!! HERE
    
    
    ## Begin LSTM ##
    from keras.layers.recurrent import LSTM
    
    # Define Model Architecture
    if refit:
        n = len(archi)
        layers = dict()
        for i in range(n+1):
            if i == 0:
                layers["ins_main"] = Input(shape = (_len, Xexog.shape[1])) # according to: https://stackoverflow.com/questions/61641048/how-to-use-keras-timeseriesgenerator  length = lookback = 12 // second entry are number of features = 10
            elif i == 1:
                layers["hidden"+str(i)] = LSTM(units = archi[0], kernel_regularizer = regularizers.l1_l2(l1l2penal),
                                 bias_initializer = "he_normal", kernel_initializer = "he_normal", activation = "relu")(layers["ins_main"])
                layers["dropout"+str(i)] = Dropout(dropout_u)(layers["hidden"+str(i)])
            elif i > 1 & i <= n:
                layers["hidden"+str(i)] = Dense(archi[1], kernel_regularizer = regularizers.l1_l2(l1l2penal), 
                                  bias_initializer = "he_normal", kernel_initializer = "he_normal", activation = "relu")(layers["dropout"+str(i-1)])    
                layers["dropout"+str(i)] = Dropout(dropout_u)(layers["hidden"+str(i)])
                
        #layers["dropout_final"] = Dropout(dropout_u)(layers["hidden"+str(n)]) # !!! NOT SURE IF NECESSARY
        layers["BN"] = BatchNormalization()(layers["dropout"+str(i)])
        layers["output"] = Dense(Y.shape[1], bias_initializer = "he_normal",
                                 kernel_initializer = "he_normal")(layers["BN"])
          
        model = Model(inputs = layers["ins_main"], outputs = layers["output"])
    
        # print model and summary
        #print(model.summary())
        #from keras.utils.vis_utils import plot_model
        #plot_model(model)
    
        # Compile model
        sgd_fine = SGD(learning_rate=0.01, momentum=0.9, decay=0.01, nesterov=True)
        
        earlystopping = EarlyStopping(monitor='val_loss',min_delta=1e-6,
                                      patience=20,verbose=0, mode='auto')
        
        mcp = ModelCheckpoint(dumploc+'/BestModelWeights_'+str(no)+'.hdf5',
                              monitor='val_loss',save_best_only=True)
        
        model.compile(loss='mean_squared_error', optimizer=sgd_fine)
        
        history = model.fit(Xexog_scaled_train_gen, epochs = 500, verbose = 0, 
                            validation_data= Xexog_scaled_val_gen, callbacks = [earlystopping, mcp],
                            shuffle = False, batch_size = _batch_bot)
    
        # Retrieve the best model as per early stopping
        model.load_weights(dumploc+'/BestModelWeights_'+str(no)+'.hdf5')
        # Save model
        model.save(dumploc+'/BestModel_'+str(no)+'.hdf5')
    
    else:
        # Retrieve model architecture and retrain
        model = load_model(dumploc+'/BestModel_'+str(no)+'.hdf5')
        sgd_fine = SGD(learning_rate=0.01, momentum=0.9, decay=0.01, nesterov=True)
        earlystopping = EarlyStopping(monitor='val_loss', min_delta=1e-6,
                                      patience=20, verbose=0, mode='auto')
        mcp = ModelCheckpoint(dumploc+'/BestModelWeights_'+str(no)+'.hdf5',
                              monitor='val_loss', save_best_only=True)
    
        model.compile(loss='mean_squared_error', optimizer=sgd_fine)
        history = model.fit(Xexog_scaled_train_gen, epochs = 500, verbose = 0, 
                            validation_data= Xexog_scaled_val_gen, callbacks = [earlystopping, mcp],
                            shuffle = False, batch_size = _batch_bot)
    
        # Retrieve the best model as per early stopping
        model.load_weights(dumploc+'/BestModelWeights_'+str(no)+'.hdf5')
    
    # make out-of-sample prediction on unseen observations
    Ypred = model.predict(Xexog_scaled_test)
    print(Ypred) # !!! CHANGE HERE 
    
    # return prediction and minimum validation loss
    return Ypred, np.min(history.history['val_loss'])


def Run_LSTM_Generic(X, Xexog, Y, no, params=None, refit=None, dumploc=None, archi = None, other_gen = None): # !!! HERE 
    
    # Define number of nodes for each of the layers. Amend no of layers here.
    #archi = [32, 16, 8] # !!! CHANGE HERE

    # Perform grid-search over params
    if refit:
        Ypred, val_loss  = NNGridSearchWrapper(LSTM_Generic, X, Y, no,
                                               params=params,
                                               refit=True, dumploc=dumploc,
                                               archi=archi, Xexog=Xexog, other_gen = other_gen) # !!! HERE
    # Use existing model
    else:
        Ypred, val_loss = LSTM_Generic(X, Y, no,
                                        refit=False, dumploc=dumploc,
                                        archi=archi, Xexog=Xexog, other_gen = other_gen) # !!! HERE

    return Ypred, val_loss


