In [None]:
# This script runs MixNet algorithm over simulated data

In [None]:
# import packages
import sys
sys.path.append('../lib/')
import WDL as wp
from datetime import datetime
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.layers as layers
import matplotlib.pyplot as plt
from tensorflow.keras import initializers
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

In [None]:
# load data
id_setting = 4 # \omega = [0.1, 0.2, 0.5, 1, 2]
X = pd.read_csv('../../../data/simulation/setting_' + str(id_setting) + '/dat_X.csv').to_numpy()
Y = pd.read_csv('../../../data/simulation/setting_' + str(id_setting) + '/dat_Y.csv').to_numpy()
loc_cv = pd.read_csv('../../../data/simulation/setting_' + str(id_setting) + '/dat_CV.csv').to_numpy().flatten()

In [None]:
# replicate data to create points
n_pts = Y.shape[1]
X_full = np.repeat(X, n_pts, axis=0)
Y_full = Y.flatten()
loc_full = np.repeat(loc_cv, n_pts)

In [None]:
# function definition
def get_mixture_coeff(output, KMIX):
    out_pi, out_sigma, out_mu = tf.split(output, num_or_size_splits=[KMIX]*3, axis=1)
    max_pi = tf.reduce_max(out_pi, 1, keepdims=True)
    out_pi = tf.subtract(out_pi, max_pi)
    out_pi = tf.exp(out_pi)
    normalize_pi = tf.math.reciprocal(tf.reduce_sum(out_pi, 1, keepdims=True))
    out_pi = tf.multiply(normalize_pi, out_pi)
    out_sigma = tf.exp(out_sigma)
    return out_pi, out_sigma, out_mu

def tf_normal(y, mu, sigma):
    result = tf.subtract(y, mu)
    result = tf.multiply(result, tf.math.reciprocal(sigma))
    result = -tf.square(result)/2
    return tf.multiply(tf.exp(result), tf.math.reciprocal(sigma))


def get_lossfunc(y, output):
    out_pi, out_sigma, out_mu = output
    result = tf_normal(y, out_mu, out_sigma)
    result = tf.multiply(result, out_pi)
    result = tf.reduce_sum(result, 1, keepdims=True)
    result = -tf.math.log(result)
    return tf.reduce_mean(result)

def KLloss(y, output):
    out_pi, out_sigma, out_mu = output
    result = (y - out_mu) / out_sigma
    result = tf.exp(-result**2/2) / out_sigma
    result = tf.reduce_sum(result*out_pi, 1, keepdims=True)
    result = -tf.math.log(result + 1e-10)
    return tf.reduce_mean(result)
    
def MixNet(KMIX, n_feature):
    inputs = tf.keras.Input(shape=(n_feature,))
    x = tf.keras.layers.Dense(8, activation=tf.nn.relu, kernel_initializer=initializers.RandomNormal(stddev=0.01),
    bias_initializer=initializers.Zeros())(inputs)
    x = tf.keras.layers.Dense(16, activation=tf.nn.relu, kernel_initializer=initializers.RandomNormal(stddev=0.01),
    bias_initializer=initializers.Zeros())(x)
    x = tf.keras.layers.Dense(8, activation=tf.nn.relu, kernel_initializer=initializers.RandomNormal(stddev=0.01),
    bias_initializer=initializers.Zeros())(x)
    x = tf.keras.layers.Dense(3*KMIX, kernel_initializer=initializers.RandomNormal(stddev=1),
    bias_initializer=initializers.RandomNormal(stddev=1))(x)
    out_pi, out_sigma, out_mu = get_mixture_coeff(x, KMIX)
    model = tf.keras.Model(inputs=inputs, outputs=[out_pi, out_sigma, out_mu])
    return model       

def train(X_train, Y_train, X_val, Y_val, lr=1e-1, K = 3,
          max_iter=300, early_stop=True, patience=5):
    model = MixNet(K, X_train.shape[1])
    model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=lr), loss=KLloss)
    tol = 0 ## early stopping
    if not early_stop:
        patience = max_iter
    loss_train_ = [KLloss(Y_train, model.predict(X_train)).numpy()]
    loss_val_ = [KLloss(Y_val, model.predict(X_val)).numpy()]
    ## start training
    early_exit = False
    for i in range(max_iter):
        with tf.GradientTape() as tape:
            y_pred = model(X_train, training=True)
            loss = KLloss(Y_train, y_pred)
        trainable_vars = model.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        model.optimizer.apply_gradients(zip(gradients, trainable_vars))
        ## make predictions over validations set
        loss_train = KLloss(Y_train, model.predict(X_train)).numpy()
        loss_val = KLloss(Y_val, model.predict(X_val)).numpy()
        ## early stopping
        if loss_val < loss_val_[-1]:
            tol = 0
        elif early_stop:
            tol += 1
        if tol < patience:
            loss_train_.append(loss_train)
            loss_val_.append(loss_val)
        else:
            early_exit = True
            break
            
    if early_exit:
        iter_best = np.argmin(np.array(loss_val_))
    else:
        iter_best = max_iter
    
    ## return outputs
    outputs = {'iter_best': iter_best, 'model': model,
               'train_loss': loss_train_, 'val_loss': loss_val_}
    return outputs

In [None]:
# nested cross validation
n_levs = 100
n_fold = np.max(loc_full) + 1
q_vec = np.arange(1, n_levs) / n_levs
## transform Y
Q_mat = np.array([np.quantile(Y[i], q_vec) for i in range(Y.shape[0])])
Q_test = np.zeros(Q_mat.shape)
K_list = [2, 3, 5]
lr_list = [1e-1, 1e-2]
n_iter = 300
## outer loop
time_start = datetime.now()
print('Start training:', time_start)
for i in range(n_fold):
    print('This is fold', str(i+1))
    X_train = X_full[loc_full != i]
    Y_train = Y_full[loc_full != i]
    X_test = X_full[loc_full == i]
    Y_test = Y_full[loc_full == i]
    
    ## inner parameter selection
    X_t_in, X_v_in, Y_t_in, Y_v_in = train_test_split(X_train, Y_train, test_size=0.25, random_state=2022)
    par_combo = [(K, lr) for K in K_list for lr in lr_list]
    loss_ = []
    iters_ = []
    for K_mix, lr in par_combo:
        print(K_mix, lr)
        y_t_in = np.tile(np.reshape(Y_t_in, (-1, 1)), (1, K_mix))
        y_v_in = np.tile(np.reshape(Y_v_in, (-1, 1)), (1, K_mix))
        res_init = train(X_t_in, y_t_in, X_v_in, y_v_in, lr=lr, K = K_mix, 
                         max_iter=n_iter, early_stop=True, patience=10)
        iters_.append(res_init['iter_best'])
        loss_.append(res_init['val_loss'][res_init['iter_best']])
    ## choose the best params
    K_best, lr_best = par_combo[np.argmin(np.array(loss_))]
    iter_best = iters_[np.argmin(np.array(loss_))]
    print('Loss:', loss_)
    print('Best:', K_best, lr_best, iter_best)
    ## retrain the model over the training set
    y_train = np.tile(np.reshape(Y_train, (-1, 1)), (1, K_best))
    y_test = np.tile(np.reshape(Y_test, (-1, 1)), (1, K_best))
    res = train(X_train, y_train, X_test, y_test, lr=lr_best, K = K_best, 
                max_iter=iter_best, early_stop=False, patience=10)
    pi_test, sigma_test, mu_test = res['model'].predict(X[loc_cv == i])
    Q_test[loc_cv == i] = [wp.qgmm1d(q_vec, mu_test[j], sigma_test[j], pi_test[j]) for j in range(X[loc_cv == i].shape[0])]
print('Done!')
print('Time:', datetime.now() - time_start)

In [None]:
# evaluate the results
RMSE = np.mean((Q_mat - Q_test)**2)
var_y = np.mean((Q_mat - np.mean(Q_mat, axis=0))**2)
R_sq = 1 - RMSE / var_y
print('Test loss:', RMSE)
print('Test R-squared:', R_sq)