In [3]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # avoid tensorflow warning
import tensorflow as tf
import numpy as np
import re
import sys

if "notebooks" in os.getcwd():
    main_dir = os.getcwd()[:-10]
    os.chdir("../scripts/GPA_NN")
elif "/scripts/GPA_NN" in os.getcwd():
    main_dir = os.getcwd()[:-15]
sys.path.append(main_dir)

# input parameters --------------------------------------
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, default='Learning_gaussian')
parser.add_argument('--generative_model', type=str, default="GPA_NN")
p, _ = parser.parse_known_args()

import yaml
from yaml.loader import SafeLoader
yaml_file = "../../configs/{dataset}-{generative_model}.yaml".format(
    dataset=p.dataset, generative_model=p.generative_model)


with open(yaml_file, 'r') as f:
    param = yaml.load(f, Loader=SafeLoader)
    #print(param)
    
updated_param = vars(p)
for param_key, param_val in updated_param.items():
    if type(param_val) == type(None):
        continue
    param[param_key] = param_val
    
if param['alpha']:
    par = [param['alpha']]
    param['exptype'] = '%s=%05.2f-%s' % (param['f'], param['alpha'], param['Gamma'])
else: 
    par = []
    param['exptype'] = '%s-%s' % (param['f'], param['Gamma'])
if param['L'] == None:
    param['expname'] = '%s_%s' % (param['exptype'], 'inf')
else:
    param['expname'] = '%s_%.4f' % (param['exptype'], param['L'])

In [4]:
# Data generation ----------------------------------------
from scripts.util.generate_data import generate_data
param, X_, Y_, X_label, Y_label = generate_data(param)

Q = tf.constant(X_, dtype=tf.float32) # constant
P = tf.Variable(Y_, dtype=tf.float32) # variable
    
Q_label, P_label = None, None
data_par = {'P_label': P_label, 'Q_label': Q_label, 'mb_size_P': param['mb_size_P'],
            'mb_size_Q': param['mb_size_Q'], 'N_samples_P': param['N_samples_P'], 
            'N_samples_Q': param['N_samples_Q'], }

print("Data prepared.")

#from scripts.util.plot_result import plot_initial_data
#plot_initial_data(X_ = X_, Y_ = Y_, proj_axes = [0,1], x_lim = [None, None], y_lim = [None, None], show = True)

Metal device set to: Apple M2
Data prepared.


In [5]:
# Discriminator learning  -----------------------------------------
# Discriminator construction using Neural Network
from scripts.GPA_NN.lib.construct_NN import check_nn_topology, initialize_NN, model

N_fnn_layers, N_cnn_layers, param['activation_ftn'] = check_nn_topology(
    param['NN_model'], param['N_fnn_layers'], param['N_cnn_layers'], param['N_dim'], param['activation_ftn'])

NN_par = {'NN_model':param['NN_model'], 'activation_ftn':param['activation_ftn'], 
          'N_dim': param['N_dim'], 'N_cnn_layers':N_cnn_layers, 'N_fnn_layers':N_fnn_layers, 
          'N_conditions': param['N_conditions'], 'constraint': param['constraint'], 
          'L': param['L'], 'eps': param['eps']}

W, b = initialize_NN(NN_par)
phi = model(NN_par)  # discriminator

# scalar optimal value optimization for f-divergence
nu = tf.Variable(0.0, dtype=tf.float32)

parameters = {'W':W, 'b':b, 'nu':nu} # Learnable parameters for the discriminator phi

# Train setting
from scripts.GPA_NN.lib.train_NN import train_disc
lr_phi = tf.Variable(param['lr_phi'], trainable=False) # lr for training a discriminator function

# (Discriminator) Loss ----------------------------------------------
loss_par = {'f': param['f'], 'formulation': param['formulation'], 'par': par, 
            'reverse': param['reverse'], 'lamda': param['lamda']}


In [6]:
# Transporting particles --------------------------------------------
# ODE solver setting
from lib.transport_particles import calc_vectorfield, solve_ode
dPs = []
if param['ode_solver'] in ['forward_euler', 'AB2', 'AB3', 'AB4', 'AB5']:
    aux_params = []
else:
    aux_params = {'parameters': parameters, 'phi': phi, 'Q': Q, 'lr_phi': lr_phi,'epochs_phi': param['epochs_phi'], 'loss_par': loss_par, 'NN_par': NN_par, 'data_par': data_par, 'optimizer': param['optimizer']}

# Train setting
lr_P_init = param['lr_P'] # Assume that deltat = deltat(t)
lr_P = tf.Variable(lr_P_init, trainable=False)
lr_Ps = []


In [7]:
# Evaluating Wasserstein-1 metric ----------------------------------
if param['calc_Wasserstein1'] == True:
    NN_par2 = {'NN_model':param['NN_model'], 'activation_ftn':param['activation_ftn'], 
               'N_dim': param['N_dim'], 'N_cnn_layers':N_cnn_layers, 'N_fnn_layers':N_fnn_layers, 
               'N_conditions': param['N_conditions'], 'constraint': param['constraint'], 
               'L': 1.0, 'eps': param['eps']}

    W2, b2 = initialize_NN(NN_par2)
    phi2 = model(NN_par2)  # discriminator for Wasserstein 1 metric
    parameters2 = {'W':W2, 'b':b2} # Learnable parameters for the discriminator phi2
    
    # Train setting
    from lib.train_NN import train_wasserstein1

In [8]:
# Save & plot settings -----------------------------------------------
# Metrics to calculate
from scripts.util.evaluate_metric import calc_ke, calc_grad_phi
if np.prod(param['N_dim']) <= 12:
    from scripts.util.evaluate_metric import calc_sinkhorn
trajectories = []
vectorfields = []
divergences = []
wasserstein1s = []
KE_Ps = []
FIDs = []

# saving/plotting parameters
if param['save_iter'] >= param['epochs']:
    param['save_iter'] = 1

if param['plot_result'] == True:
    from scripts.util.plot_result import plot_result

if not os.path.exists(main_dir + '/assets/' + param['dataset']):
    os.makedirs(main_dir + '/assets/' + param['dataset'])

param['expname'] = param['expname']+'_%04d_%04d_%02d_%s' % (param['N_samples_Q'], param['N_samples_P'], 
                                                            param['random_seed'], param['exp_no'])
filename = main_dir + '/assets/' + param['dataset']+'/%s.pickle' % (param['expname'])

if param['plot_intermediate_result'] == True:
    if 'gaussian' in param['dataset'] and 'Extension' not in param['dataset']:
         r_param = param['sigma_Q']
    elif 'student_t' in param['dataset']:
        r_param = param['nu']
    elif param['dataset'] == 'Extension_of_gaussian':
        r_param = param['a']
    else:
        r_param = None
    

In [9]:
# Train ---------------------------------------------------------------
import matplotlib.pyplot as plt
import time 
t0 = time.time()

for it in range(1, param['epochs']+1): # Loop for updating particles P
    parameters, current_loss, dW_norm = train_disc(parameters, phi, P, Q, lr_phi, param['epochs_phi'],
                                                   loss_par, NN_par, data_par, param['optimizer'], 
                                                   print_vals=True)
    
    if param['calc_Wasserstein1'] == True:
        parameters2, current_wass1, _ = train_wasserstein1(parameters2, phi2, P, Q, lr_phi, 
                                                           param['epochs_phi'], NN_par2, data_par, 
                                                           param['optimizer'], print_vals=True)
    
    dPs.append( calc_vectorfield(phi, P, parameters, NN_par, loss_par, data_par) )
    P, dPs, dP = solve_ode(P, lr_P, dPs, param['ode_solver'], aux_params) # update P

    lr_Ps.append(lr_P.numpy())
    
    # save results
    divergences.append(current_loss)
    KE_P = calc_ke(dP, param['N_samples_P'])
    KE_Ps.append(KE_P)
    grad_phi = calc_grad_phi(dP)
    #print("grad", grad_phi)
    if param['calc_Wasserstein1'] == True:
        wasserstein1s.append(current_wass1)
    
    if param['epochs']<=100 or it%param['save_iter'] == 0:
        trajectories.append(P.numpy())
        if np.prod(param['N_dim']) < 500:
            vectorfields.append(dP.numpy())
        elif np.prod(param['N_dim']) >= 784:  # image data
            FIDs.append( calc_fid(pred=P.numpy(), real=Q.numpy()) )
    
    
    if it % (param['epochs']/10) == 0:
        display_msg = 'iter %6d: loss = %.10f, norm of dW = %.2f, ' % (it, current_loss, dW_norm)
        display_msg = display_msg + 'kinetic energy of P = %.10f, ' % KE_P
        display_msg = display_msg + 'average learning rate for P = %.6f' % tf.math.reduce_mean(lr_P).numpy()
        if len(FIDs) > 0 :
            display_msg = display_msg + ', FID = %.3f' % FIDs[-1]   
        print(display_msg)
        print("grad", grad_phi)
        
        if param['plot_intermediate_result'] == True:
            data = {'trajectories': trajectories, 'divergences': divergences, 'wasserstein1s':wasserstein1s, 
                    'KE_Ps': KE_Ps, 'FIDs':FIDs, 'X_':X_, 'Y_':Y_, 'X_label':X_label, 'Y_label':Y_label,
                    'dt': lr_Ps, 'dataset': param['dataset'], 'r_param': r_param, 'vectorfields': vectorfields, 
                    'save_iter':param['save_iter']}
            if param['N_dim'] ==2:
                data.update({'phi': phi, 'W':W, 'b':b, 'NN_par':NN_par})
            plot_result(filename, intermediate=True, epochs = it, iter_nos = None, data = data, show=False)
            

total_time = time.time() - t0
print(f'total time {total_time:.3f}s')


iter     50: loss = -0.0000367761, norm of dW = 0.00, kinetic energy of P = 0.0000027737, average learning rate for P = 1.000000
grad 0.0021760582
iter    100: loss = 0.0000112057, norm of dW = 0.00, kinetic energy of P = 0.0000138084, average learning rate for P = 1.000000
grad 0.0052533494
iter    150: loss = 0.0005307198, norm of dW = 0.00, kinetic energy of P = 0.0000257846, average learning rate for P = 1.000000
grad 0.00671273
iter    200: loss = -0.0001094341, norm of dW = 0.01, kinetic energy of P = 0.0001318022, average learning rate for P = 1.000000
grad 0.010807219
iter    250: loss = -0.0001478195, norm of dW = 0.00, kinetic energy of P = 0.0000672681, average learning rate for P = 1.000000
grad 0.011518562
iter    300: loss = 0.0000367165, norm of dW = 0.00, kinetic energy of P = 0.0000173353, average learning rate for P = 1.000000
grad 0.0054884893
iter    350: loss = 0.0000181198, norm of dW = 0.00, kinetic energy of P = 0.0000034126, average learning rate for P = 1.0000

In [10]:
# Save result ------------------------------------------------------
import pickle
if param['N_dim'] == 1:
    X_ = np.concatenate((X_, np.zeros(shape=X_.shape)), axis=1)
    Y_ = np.concatenate((Y_, np.zeros(shape=Y_.shape)), axis=1)
    
    trajectories = [np.concatenate((x, np.zeros(shape=x.shape)), axis=1) for x in trajectories]
    vectorfields = [np.concatenate((x, np.zeros(shape=x.shape)), axis=1) for x in vectorfields]
  
if param['L'] == None:
    param['L'] = 'inf'
param.update({'X_': X_, 'Y_': Y_, 'lr_Ps':lr_Ps,})
result = {'trajectories': trajectories, 'vectorfields': vectorfields, 'divergences': divergences, 'KE_Ps': KE_Ps, 'FIDs': FIDs, 'wasserstein1s': wasserstein1s}
        
# Save trained data
with open(filename,"wb") as fw:
    pickle.dump([param, result] , fw)
print("Results saved at:", filename)


Results saved at: /Users/hyemin/Documents/source_code/Lipschitz-regularized-GPA-github/assets/Learning_gaussian/KL-Lipschitz_1.0000_1.00_0200_0200_00_a_trial.pickle


In [None]:
# Plot final result ------------------------------------------------------
from scripts.util.plot_result import plot_result