In [1]:
from time import perf_counter 
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as sopt
import tensorflow as tf

In [2]:
# Code to use the scipy optimizers

# use float64 by default
tf.keras.backend.set_floatx("float64")

# Reshape 1D arrays to 2D arrays
def reshape_2d(x):
    return tf.reshape(x,(x.shape[0],1))

def function_factory(model, x_train, y_train, validation_data=None,
                     iprint=-1):
    """A factory to create a function required by scipy.optimize.
    Args:
        model [in]: an instance of `tf.keras.Model` or its subclasses.
        loss [in]: a function with signature loss_value = loss(pred_y, true_y).
        x_train [in]: input for training data.
        y_train [in]: output for training data.
        validation_data [in]: tuple (x_val,y_val) with validation data.
        iprint [in]: sets the frequency with which the loss info is printed out
    Returns:
        A function that has a signature of:
            loss_value, gradients = f(model_parameters)
    """

    # obtain the shapes of all trainable parameters in the model
    shapes = tf.shape_n(model.trainable_variables)
    n_tensors = len(shapes)

    # we'll use tf.dynamic_stitch and tf.dynamic_partition later, so we need to
    # prepare required information first
    count = 0
    idx = []  # stitch indices
    part = []  # partition indices

    for i, tensor in enumerate(model.trainable_variables):
        n = np.product(tensor.shape)
        idx.append(tf.reshape(
            tf.range(count, count+n, dtype=tf.int32), tensor.shape))
        part.extend([i]*n)
        count += n

    part = tf.constant(part)

    @tf.function
    def assign_new_model_parameters(params_1d):
        """A function updating the model's parameters with a 1D tf.Tensor.
        Args:
            params_1d [in]: a 1D tf.Tensor representing the model's trainable parameters.
        """
        params = tf.dynamic_partition(params_1d, part, n_tensors)
        for i, (shape, param) in enumerate(zip(shapes, params)):
            model.trainable_variables[i].assign(tf.reshape(param, shape))

    # function to calculate loss value and gradient
    @tf.function
    def tf_tape_grad(params_1d):

        # update the parameters in the model
        assign_new_model_parameters(params_1d)       

        if not (validation_data is None):
          # compute validation loss
          loss_value = model.loss(validation_data[1], model(validation_data[0]))
          # store validation value so we can retrieve later        
          tf.py_function(value_and_grad.hist_loss_val.append,
                        inp=[loss_value], Tout=[])         
              
        # use GradientTape so that we can calculate the gradient of loss w.r.t. parameters
        with tf.GradientTape() as tape:
            tape.watch(model.trainable_variables)
            # run the model
            y_model = model(x_train, training=True)            
            # loss value
            pred_loss = model.loss(y_train, y_model)
            # regularization loss
            regularization_loss = tf.cast( tf.reduce_sum(model.losses), tf.float64)            
            # total loss
            total_value = pred_loss + regularization_loss

        # calculate gradients and convert to 1D tf.Tensor
        grads = tape.gradient(total_value, model.trainable_variables)
        grads = tf.dynamic_stitch(idx, grads)

        # increment iteration counter
        value_and_grad.iter.assign_add(1)

        # print out iteration & loss
        if (iprint >= 0 and value_and_grad.iter % iprint == 0):
            tf.print("Loss function eval:", value_and_grad.iter,
                     "loss:", pred_loss, 
                     "loss_reg:", regularization_loss,
                     "total:", total_value)

        # store loss value so we can retrieve later
        tf.py_function(value_and_grad.hist_loss.append,
                       inp=[total_value], Tout=[])       

        return total_value, grads

    # create function that will be returned by this factory
    def value_and_grad(params_1d):
        """A function that can be used by optimizer.
        This function is created by function_factory.
        Args:
           params_1d [in]: a 1D tf.Tensor.
        Returns:
            A scalar loss and the gradients w.r.t. the `params_1d`.
        """
        return [vv.numpy().astype(np.float64) for vv in tf_tape_grad(tf.constant(params_1d, dtype=tf.float64))]

    # store this information as members so we can use it outside the scope
    value_and_grad.iter = tf.Variable(0)
    value_and_grad.idx = idx
    value_and_grad.part = part
    value_and_grad.shapes = shapes
    value_and_grad.assign_new_model_parameters = assign_new_model_parameters
    value_and_grad.hist_loss = []
    value_and_grad.hist_loss_val = []

    return value_and_grad


def model_fit(model, x_t, y_t, validation_data=None, epochs=1000, iprint=-1,
              figname=None):
    """ 
    Fit a DNN model using scipy optimizers 
  
  
    Parameters: 
    model: tensorflow DNN model
    x_t: input training data
    y_t: output training data
    validation_data: tuple (x_val,y_val) with validation data
    epochs: maximum number of iterations in optimizer
    iprint: frequency for printing the loss function information. 
            Do not print anything if negative. Otherwise, print
            a line every iprint iteration.
    figname [str]: file name to save the figure of the training loss  
    """

    value_and_grad = function_factory(model, x_t, y_t, validation_data,
                                      iprint)

    # convert initial model parameters to a 1D tf.Tensor
    init_params = tf.dynamic_stitch(
        value_and_grad.idx, model.trainable_variables)

    if (iprint>=0):
        print()

    # train the model
    method = 'L-BFGS-B'
    results = sopt.minimize(fun=value_and_grad, x0=init_params,
                            jac=True, method=method,
                            options={'maxiter': epochs})

    print("\nConvergence information:")
    print('loss:', results.fun)
    print('number function evaluations:', results.nfev)
    print('number iterations:', results.nit)
    print('success flag:', results.success)
    print('convergence message:', results.message)

    value_and_grad.assign_new_model_parameters(results.x)

    # Plot history of loss
    plt.figure()   
    plt.plot(value_and_grad.hist_loss, label='loss')
    if not (validation_data is None): 
        plt.plot(value_and_grad.hist_loss_val, label='validation')
    plt.legend()
    plt.xlabel('epoch')
    plt.yscale('log')
    if x_val is None:    
        plt.title('Training loss')
    else:
        plt.title('Training and validation losses')

    if not (figname is None):
        plt.savefig(figname,dpi=300)

    return results

In [3]:
# Function we are trying to learn
def f(x, u):
    return np.exp(-(x.dot(u)**2))

In [4]:
# Number of dimensions
d = 300
# Load u vector and normalize it
u = np.load("u.npy")
u /= np.linalg.norm(u, axis=0)

# Codes to generate training and regular validation set. DO NOT modify this cell.

# Number of training samples
N_train = 10000
N_val = 1000

# Training set
x_train = np.asarray([np.random.normal(size=d) for _ in range(N_train)])
y_train = f(x_train,u)

# Validation set
x_val = np.asarray([np.random.normal(size=d) for _ in range(N_val)])
y_val = f(x_val,u)

# Q1

In [5]:
# Build you model and train it using the function model_fit() provided above 

Written answers for Q1

# Q2

Written answers for Q2

# Q3

Written answers for Q3

In [6]:
# Code to generate data along different lines parallel and orthogonal to u

n_lines = 4
n_sample_plot = 128

# dir_otg are random directions orthogonal to u
dir_otg = np.random.rand(d, n_lines)
# x = x - (x.u) * u
dir_otg -= np.squeeze( np.transpose(u).dot(dir_otg) ) * u
dir_otg /= np.linalg.norm(dir_otg, axis=0) # normalize vectors
    
# Adding a shift or bias    
bias = np.random.uniform(size=(n_lines)) * 2 - 1

def uniform_distribution(j,n,scale):
    return (2*j/n-1)*scale

# Generate different lines, where in each line the variance of data is orthogonal to u 
x_val_otg = np.zeros( (n_lines, n_sample_plot, d) )
for i in range(n_lines):
    for j in range(n_sample_plot):
        x_val_otg[i,j,:] = bias[i] * np.squeeze(u) + uniform_distribution(j,n_sample_plot,1) * dir_otg[:,i]

# Generate different lines, where in each line the variance of data is parallel with u 
x_val_prl = np.zeros( (n_lines, n_sample_plot, d) )
for i in range(n_lines):
    for j in range(n_sample_plot):
        x_val_prl[i,j,:] = bias[i] * dir_otg[:,i] + uniform_distribution(j,n_sample_plot,1) * np.squeeze(u)

In [7]:
# Plot the predicted and ground-truth values for all lines
# color = ['r','b','g','k']
# x_index = np.linspace(-1,1,n_sample_plot)
# y_pred = model.predict(x_val_otg[i,:,:])
# plt.plot(x_index, y_pred, color[i])
# y_true = f(x_val_otg[i,:,:], u)
# plt.plot(x_index, y_true, color[i] + '--')

# Q4

In [8]:
# Plot the predicted and ground-truth values for all lines

Written answers for Q4

# Q5

In [9]:
# Repeat the experiment of Q4 with a new choice for x_val_prl[i,j,:]

Written answers for Q5