# Training of Bishop's Mixture Density Network

## Prepare Training Data

In [None]:
# Initializizations #
#-------------------#
## Count Number of Centers
N_GMM_clusters = int(np.minimum(N_Quantizers_to_parameterize,Y_train.shape[1]-1))
## Timer: Start
timer_GMM_data_preparation = time.time()

# Get Training Data #
#-------------------#
for i in tqdm(range(X_train.shape[0])):
    # Train GMM
    gmm_loop = GaussianMixture(n_components=N_GMM_clusters)
    gmm_loop.fit(Y_train[i,].reshape(-1,1))
    # Get Fit Parameter(s)
    means_GMM_loop = gmm.means_.reshape(1,-1)
    sds_GMM_loop = gmm.covariances_.reshape(1,-1)
    mixture_coefficients = gmm_loop.weights_.reshape(1,-1)
    
    # Update Targets #
    #----------------#
    if i == 0:
        Y_MDN_targets_train_mean = means_GMM_loop
        Y_MDN_targets_train_sd = sds_GMM_loop
        Y_MDN_targets_train_mixture_weights = mixture_coefficients
    else:
        Y_MDN_targets_train_mean = np.append(Y_MDN_targets_train_mean,means_GMM_loop,axis=0)
        Y_MDN_targets_train_sd = np.append(Y_MDN_targets_train_sd,sds_GMM_loop,axis=0)
        Y_MDN_targets_train_mixture_weights = np.append(Y_MDN_targets_train_mixture_weights,mixture_coefficients,axis=0)

# Timer: Stop
timer_GMM_data_preparation = time.time() - timer_GMM_data_preparation

## Train Model

### Train Means Network

### Train SDs Network

#### Define Architecture and Network Builder

In [2]:
# Affine Readout post-composed with UAP-preserving readout map to G_d
class SD_output(tf.keras.layers.Layer):

    def __init__(self, units=16, input_dim=32):
        super(SD_output, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(name='Weights_ffNN',
                                 shape=(input_shape[-1], self.units),
                               initializer='random_normal',
                               trainable=True)
        self.b = self.add_weight(name='bias_ffNN',
                                 shape=(self.units,),
                               initializer='random_normal',
                               trainable=True)

    def call(self, inputs):
        parameters = tf.matmul(inputs, self.w) + self.b
        sd_out = tf.math.exp(parameters)
        return sd_out

NameError: name 'tf' is not defined

In [None]:
def get_MDN_Means_SubNetwork(height, depth, learning_rate, input_dim, output_dim):
    #----------------------------#
    # Maximally Interacting Layer #
    #-----------------------------#
    # Initialize Inputs
    input_layer = tf.keras.Input(shape=(input_dim,))
   
    
    #------------------#
    #   Core Layers    #
    #------------------#
    core_layers = fullyConnected_Dense(height)(input_layer)
    # Activation
    core_layers = tf.nn.swish(core_layers)
    # Train additional Depth?
    if depth>1:
        # Add additional deep layer(s)
        for depth_i in range(1,depth):
            core_layers = fullyConnected_Dense(height)(core_layers)
            # Activation
            core_layers = tf.nn.swish(core_layers)
    
    #------------------#
    #  Readout Layers  #
    #------------------# 
    # Gaussian Splitter Layer
    output_layers = SD_output(output_dim)(core_layers)  
    # Define Input/Output Relationship (Arch.)
    trainable_layers_model = tf.keras.Model(input_layer, output_layers)
    
    
    #----------------------------------#
    # Define Optimizer & Compile Archs.
    #----------------------------------#
    opt = Adam(lr=learning_rate)
    trainable_layers_model.compile(optimizer=opt, loss="mae", metrics=["mse", "mae", "mape"])

    return trainable_layers_model



def build_MDN_Means_SubNetwork(n_folds , n_jobs, n_iter, param_grid_in, X_train, y_train,X_test):
    # Update Dictionary
    param_grid_in_internal = param_grid_in
    param_grid_in_internal['input_dim'] = [(X_train.shape[1])]
    
    # Deep Feature Network
    ffNN_CV = tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn=get_MDN_Means_SubNetwork, 
                                                            verbose=True)
    
    # Randomized CV
    ffNN_CVer = RandomizedSearchCV(estimator=ffNN_CV, 
                                    n_jobs=n_jobs,
                                    cv=KFold(n_folds, random_state=2020, shuffle=True),
                                    param_distributions=param_grid_in_internal,
                                    n_iter=n_iter,
                                    return_train_score=True,
                                    random_state=2020,
                                    verbose=10)
    
    # Fit Model #
    #-----------#
    ffNN_CVer.fit(X_train,y_train)

    # Write Predictions #
    #-------------------#
    y_hat_train = ffNN_CVer.predict(X_train)
    
    eval_time_ffNN = time.time()
    y_hat_test = ffNN_CVer.predict(X_test)
    eval_time_ffNN = time.time() - eval_time_ffNN
    
    # Counter number of parameters #
    #------------------------------#
    # Extract Best Model
    best_model = ffNN_CVer.best_estimator_
    # Count Number of Parameters
    N_params_best_ffNN = np.sum([np.prod(v.get_shape().as_list()) for v in best_model.model.trainable_variables])
    
    
    # Return Values #
    #---------------#
    return y_hat_train, y_hat_test, N_params_best_ffNN, eval_time_ffNN

# Update User
#-------------#
print('Deep Feature Builder - Ready')

### Train Mixture Coefficients Network

# Get Performance Metric(s)

## Predictive Performance Metrics

## Model Complexity Metrics