In [4]:

# TensorFlow 1.x compatibility
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import numpy as np
import os
import matplotlib.pyplot as plt
from tensorflow.keras import regularizers 
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load FX/EQ dataset
df = pd.read_csv("raw (FX + EQ).csv").drop(columns=["Date"], errors="ignore").dropna()
data = df.values

# Normalize
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

# Create rolling windows
window_size = 30
num_series = data.shape[1]
X = []
for i in range(len(scaled_data) - window_size):
    window = scaled_data[i:i+window_size]
    X.append(window)
X = np.array(X).reshape(-1, window_size, num_series, 1)

# Save to disk for FlowGAN
os.makedirs("data", exist_ok=True)
np.save("data/fxeq_train.npy", X)

def construct_model_spec(scale_init=2, no_of_layers=8, add_scaling=True):
  global layers
  num_scales = scale_init
  for scale in range(num_scales-1):    
    layers.append(CouplingLayer('checkerboard0', name='Checkerboard%d_1' % scale, 
      num_residual_blocks=no_of_layers, scaling=add_scaling))
    layers.append(CouplingLayer('checkerboard1', name='Checkerboard%d_2' % scale, 
      num_residual_blocks=no_of_layers, scaling=add_scaling))
    layers.append(CouplingLayer('checkerboard0', name='Checkerboard%d_3' % scale, 
      num_residual_blocks=no_of_layers, scaling=add_scaling))
    layers.append(SqueezingLayer(name='Squeeze%d' % scale))
    layers.append(CouplingLayer('channel0', name='Channel%d_1' % scale, 
      num_residual_blocks=no_of_layers, scaling=add_scaling))
    layers.append(CouplingLayer('channel1', name='Channel%d_2' % scale, 
      num_residual_blocks=no_of_layers, scaling=add_scaling))
    layers.append(CouplingLayer('channel0', name='Channel%d_3' % scale, 
      num_residual_blocks=no_of_layers, scaling=add_scaling))
    layers.append(FactorOutLayer(scale, name='FactorOut%d' % scale))

  # # final layer
  scale = num_scales-1
  layers.append(CouplingLayer('checkerboard0', name='Checkerboard%d_1' % scale,
      num_residual_blocks=no_of_layers, scaling=add_scaling))
  layers.append(CouplingLayer('checkerboard1', name='Checkerboard%d_2' % scale,
      num_residual_blocks=no_of_layers, scaling=add_scaling))
  layers.append(CouplingLayer('checkerboard0', name='Checkerboard%d_3' % scale,
      num_residual_blocks=no_of_layers, scaling=add_scaling))
  layers.append(CouplingLayer('checkerboard1', name='Checkerboard%d_4' % scale,
      num_residual_blocks=no_of_layers, scaling=add_scaling))
  layers.append(FactorOutLayer(scale, name='FactorOut%d' % scale))

def construct_nice_spec(init_type="uniform", hidden_layers=1000, no_of_layers=1):
  global layers

  layers.append(NICECouplingLayer('checkerboard0', name='Checkerboard_1', seed=0, 
    init_type=init_type, hidden_layers=hidden_layers, no_of_layers=no_of_layers))
  layers.append(NICECouplingLayer('checkerboard1', name='Checkerboard_2', seed=1, 
    init_type=init_type, hidden_layers=hidden_layers, no_of_layers=no_of_layers))
  layers.append(NICECouplingLayer('checkerboard0', name='Checkerboard_3', seed=2, 
    init_type=init_type, hidden_layers=hidden_layers, no_of_layers=no_of_layers))
  layers.append(NICECouplingLayer('checkerboard1', name='Checkerboard_4', seed=3, 
    init_type=init_type, hidden_layers=hidden_layers, no_of_layers=no_of_layers))
  layers.append(NICEScaling(name='Scaling', seed=4))


class Config:
    input_file = "data/fxeq_train.npy"
    batch_size = 64
    input_height = 30
    input_width = X.shape[2]
    c_dim = 1
    epoch = 20
    learning_rate = 0.0001
    beta1 = 0.5
    dataset = "fxeq"
    checkpoint_dir = "checkpoint_fxeq"
    sample_dir = "samples_fxeq"
    flow_log_dir = "logs"
    f_div = "wgan"
    prior = "gaussian"
    alpha = 1e-7
    lr_decay = 1.0
    min_lr = 0.0
    reg = 10.0
    model_type = "real_nvp"
    init_type = "normal"
    n_critic = 5
    batch_norm_adaptive = 1
    no_of_layers = 8
    hidden_layers = 1000
    df_dim = 64
    like_reg = 1.0

FLAGS = Config()

layers = []
final_latent_dimension = []
def model_spec(x, reuse=True, model_type="nice", train=False, 
               alpha=1e-7, init_type="uniform", hidden_layers=1000, 
               no_of_layers=1, batch_norm_adaptive=0):
    global layers, final_latent_dimension

    if not reuse:
        layers = []  # <-- THIS FIXES THE PROBLEM

    counters = {}
    xs = int_shape(x)
    sum_log_det_jacobians = tf.zeros(xs[0])

    # corrupt data (Tapani Raiko's dequantization)
    y = x * 255.0
    corruption_level = 1.0
    y = y + corruption_level * tf.random_uniform(xs)
    y = y / (255.0 + corruption_level)

    jac = 0
    y = y * (1 - 2 * alpha) + alpha
    if model_type == "nice":
        jac = tf.reduce_sum(-tf.log(y) - tf.log(1 - y) + tf.log(1 - 2 * alpha), [1]) 
    else:
        jac = tf.reduce_sum(-tf.log(y) - tf.log(1 - y) + tf.log(1 - 2 * alpha), [1, 2, 3])
    y = tf.log(y) - tf.log(1 - y)
    sum_log_det_jacobians += jac

    if len(layers) == 0:
        if model_type == "nice":
            construct_nice_spec(init_type=init_type, hidden_layers=hidden_layers, no_of_layers=no_of_layers)
        else:
            construct_model_spec(no_of_layers=no_of_layers, add_scaling=(batch_norm_adaptive != 0))

    z = None
    jac = sum_log_det_jacobians
    for layer in layers:
        y, jac, z = layer.forward_and_jacobian(y, jac, z, reuse=reuse, train=train)

    if model_type == "nice":
        z = y
    else:
        z = tf.concat(axis=3, values=[z, y])

    final_latent_dimension = int_shape(z)

    return z, jac


def inv_model_spec(y, reuse=False, model_type="nice", train=False, alpha=1e-7):
  # construct inverse pass for sampling
  if model_type == "nice":
    z = y
  else:
    shape = final_latent_dimension
    z = tf.reshape(y, [-1, shape[1], shape[2], shape[3]])
    y = None

  for layer in reversed(layers):
    y,z = layer.backward(y,z, reuse=reuse, train=train)

  # inverse logit
  x = y

  x = tf.sigmoid(y)
  x = (x-alpha)/(1-2*alpha)
  return x
    

# Computes log_likelihood of the network
def log_likelihood(z, sum_log_det_jacobians, prior):
  return -tf.reduce_sum(compute_log_density_x(z, sum_log_det_jacobians, prior))

def int_shape(x):
    return list(map(int, x.get_shape()))

# Abstract class that can propagate both forward/backward,
# along with jacobians.
class Layer():
  def __init__(self, mask_type, name='Coupling'):
    tf.set_random_seed(0)
    np.random.seed(0)
  
  def forward_and_jacobian(self, x, sum_log_det_jacobians, z):
    raise NotImplementedError(str(type(self)))

  def backward(self, y, z):
    raise NotImplementedError(str(type(self)))

def batch_norm(input_,
                name,
                train=True,
                epsilon=1e-6, 
                decay=.1,
                axes=[0, 1],
                reuse=None,
                bn_lag=0.,
                dim=[],
                scaling = True):
  """Batch normalization with corresponding log determinant Jacobian."""
  if reuse is None:
      reuse = not train
  # create variables
  with tf.variable_scope(name) as scope:
      if reuse:
          scope.reuse_variables()
      var = tf.get_variable(
          "var", dim, tf.float32, tf.constant_initializer(1.), trainable=False)
      mean = tf.get_variable(
          "mean", dim, tf.float32, tf.constant_initializer(0.), trainable=False)
      step = tf.get_variable("step", [], tf.float32, tf.constant_initializer(0.), trainable=False)
      if scaling:
        scale_g = tf.get_variable("g_scale", dim, tf.float32, tf.constant_initializer(1.))
        shift_b = tf.get_variable("g_shift", dim, tf.float32, tf.constant_initializer(0.))
  # choose the appropriate moments
  if train:
      used_mean, used_var = tf.moments(input_, axes, name="batch_norm")
      cur_mean, cur_var = used_mean, used_var
      if bn_lag > 0.:
          used_var = stable_var(input_=input_, mean=used_mean, axes=axes)
          cur_var = used_var
          used_mean -= (1 - bn_lag) * (used_mean - tf.stop_gradient(mean))
          used_mean /= (1. - bn_lag**(step + 1))
          used_var -= (1 - bn_lag) * (used_var - tf.stop_gradient(var))
          used_var /= (1. - bn_lag**(step + 1))
  else:
      used_mean, used_var = mean, var
      cur_mean, cur_var = used_mean, used_var

  # update variables
  if train:
      with tf.name_scope(name, "AssignMovingAvg", [mean, cur_mean, decay]):
          with ops.colocate_with(mean):
              new_mean = tf.assign_sub(
                  mean,
                  tf.check_numerics(
                      decay * (mean - cur_mean), "NaN in moving mean."))
      with tf.name_scope(name, "AssignMovingAvg", [var, cur_var, decay]):
          with ops.colocate_with(var):
              new_var = tf.assign_sub(
                  var,
                  tf.check_numerics(decay * (var - cur_var),
                                    "NaN in moving variance."))
      with tf.name_scope(name, "IncrementTime", [step]):
          with ops.colocate_with(step):
              new_step = tf.assign_add(step, 1.)
      used_var += 0. * new_mean * new_var * new_step
  used_var += epsilon
  if scaling:
    return ((input_- used_mean)/tf.sqrt(used_var)) * scale_g + shift_b
  else:
    return ((input_- used_mean)/tf.sqrt(used_var))

def get_weight(name, weights_shape, init_type):
  if init_type == "uniform":
    weights = tf.get_variable(name, weights_shape, tf.float32,
      tf.random_uniform_initializer(-0.01, 0.01, seed=0))
  elif init_type == "normal":
    weights = tf.get_variable(name, weights_shape, tf.float32,
      tf.truncated_normal_initializer(stddev=0.02, seed =0))
  elif init_type == "orthogonal":
    weights = tf.get_variable(name, weights_shape, tf.float32,
      tf.orthogonal_initializer(seed=0))
  elif init_type == "xavier":
    weights = tf.get_variable(name, weights_shape, tf.float32,
      tf.keras.initializers.GlorotUniform(seed=0))
  return weights

#The NICE coupling layer
#Only for MNIST and checkerboard masking is done be rordering
class NICECouplingLayer(Layer):

  # |mask_type| can be 'checkerboard0', 'checkerboard1', 'channel0', 'channel1'
  def __init__(self, mask_type, name='Coupling', seed=0, init_type= "uniform",
    hidden_layers=1000, no_of_layers=1):
    self.mask_type = mask_type
    self.name = name
    self.seed = seed
    self.init_type = init_type
    self.hidden_states = hidden_layers
    self.no_of_layers = no_of_layers
    tf.set_random_seed(seed)
    np.random.seed(seed)

  # performs the operation described in the NICE paper
  def function_l_m(self,x,mask,name='function_l_m', reuse=False, train=False):
    with tf.variable_scope(name, reuse=reuse):
      hidden_states = self.hidden_states
      inp_shape = int((x.get_shape())[1])

      y = x
      #l1      
      bn = batch_norm(name="g_bn_in" % r, epsilon=1e-4)
      y = bn.__call__(y, train=train)
      weights_shape = [inp_shape, hidden_states]
      weights = get_weight("g_weights_input", weights_shape, self.init_type)
      bias = tf.get_variable("g_bias_input", [hidden_states], tf.float32, tf.constant_initializer(0.0))
      y = tf.nn.relu(tf.matmul(y, weights) + bias)

      for r in range(self.no_of_layers):
        bn = batch_norm(name="g_bn_%d" % r, epsilon=1e-4)
        y = bn.__call__(y, train=train)

        weights_shape = [hidden_states, hidden_states]
  
        weights = get_weight("g_weights_%d" % r, weights_shape, self.init_type)
  
        bias = tf.get_variable("g_bias_%d" % r, [hidden_states], tf.float32, tf.constant_initializer(0.0))
        y = tf.nn.relu(tf.matmul(y, weights) + bias)
      
      #lout
      bn = batch_norm(name="g_bn_out" % r, epsilon=1e-4)
      y = bn.__call__(y, train=train) 
      weights_shape = [hidden_states, inp_shape]
      weights = get_weight("g_weights_out", weights_shape, self.init_type)
      bias = tf.get_variable("g_bias_out", [inp_shape], tf.float32, tf.constant_initializer(0.0))
      y = tf.matmul(y, weights) + bias

      return y

  # corresponds to the coupling layer of the NICE paper
  def forward_and_jacobian(self, x, sum_log_det_jacobians, z, reuse, train):
    tf.set_random_seed(self.seed)
    np.random.seed(self.seed) 
    with tf.variable_scope(self.name, reuse=reuse):
      
      split_value = int(int((x.get_shape())[1])/2)
      
      # masked half of x
      y = x
      x1 = x[:,:split_value]
      x2 = x[:,split_value:]
      if self.mask_type == 'checkerboard0':
        mx1 = self.function_l_m(x1, reuse=reuse, train=train)
        delta = tf.concat([tf.zeros_like(x1),mx1], axis=1)
      else:
        mx2 = self.function_l_m(x2, reuse=reuse, train=train)
        delta = tf.concat([mx2, tf.zeros_like(x2)], axis=1)
      y += delta
      
      return y, sum_log_det_jacobians, z

  def backward(self, y, z, reuse, train):
    tf.set_random_seed(self.seed)
    np.random.seed(self.seed) 
    with tf.variable_scope(self.name, reuse=reuse):
      
      split_value = int(int((y.get_shape())[1])/2)
      
      # masked half of x
      x = y
      y1 = y[:,:split_value]
      y2 = y[:,split_value:]
      if self.mask_type == 'checkerboard0':
        my1 = self.function_l_m(y1, reuse=reuse, train=train)
        delta = tf.concat([tf.zeros_like(y1), my1], axis=1)
      else:
        my2 = self.function_l_m(y2, reuse=reuse, train=train)
        delta = tf.concat([my2, tf.zeros_like(y2)], axis=1)
      x -= delta
      
      return x, z

#The NICE coupling layer
class NICEScaling(Layer):

  def __init__(self, name='scaling', seed=0):
    self.name = name  
    self.seed = 0
    tf.set_random_seed(seed)
    np.random.seed(seed)

  # Split elements in a checkerboard fashion
  def forward_and_jacobian(self, x, sum_log_det_jacobians, z, reuse, train=None):
    tf.set_random_seed(self.seed)
    np.random.seed(self.seed) 
    with tf.variable_scope(self.name, reuse=reuse):
      
      no_of_factors = int((x.get_shape())[1]) #xs[1]
      scale_factor = tf.get_variable("g_scale", [1, no_of_factors], tf.float32, tf.constant_initializer(0.0))
      
      y = tf.multiply(x, tf.exp(scale_factor)) 

      return y, sum_log_det_jacobians + tf.reduce_sum(scale_factor), z

  def backward(self, y, z,reuse, train=None):
    tf.set_random_seed(0)
    np.random.seed(0) 
    with tf.variable_scope(self.name, reuse=reuse):
      
      no_of_factors = int((y.get_shape())[1]) #ys[1]
      scale_factor = tf.get_variable("g_scale", [1, no_of_factors], tf.float32, tf.constant_initializer(0.0))
      
      x = tf.multiply(y, tf.exp(-scale_factor)) 

      return x, z

def simple_batch_norm(x):
    mu = tf.reduce_mean(x)
    sig2 = tf.reduce_mean(tf.square(x-mu))    
    x = (x-mu)/tf.sqrt(sig2 + 1.0e-6)
    return x

# The coupling layer.
# Contains code for both checkerboard and channelwise masking.
class CouplingLayer(Layer):

  # |mask_type| can be 'checkerboard0', 'checkerboard1', 'channel0', 'channel1'
  def __init__(self, mask_type, name='Coupling', num_residual_blocks=8, scaling=True):
    self.mask_type = mask_type
    self.name = name
    self.num_residual_blocks = num_residual_blocks
    self.scaling = scaling
    if self.scaling == False:
      print("No scaling")

    tf.set_random_seed(0)
    np.random.seed(0)

  # Weight normalization technique
  def get_normalized_weights(self, name, weights_shape):
        weights = tf.get_variable(name, weights_shape, tf.float32,
                                  tf.glorot_uniform_initializer(seed=0))
        scale = tf.get_variable(name + "_scale", [weights_shape[-1]], tf.float32, 
                                tf.glorot_uniform_initializer(seed=0),
                                regularizer=tf.keras.regularizers.l2(5e-5))
        norm = tf.sqrt(tf.reduce_sum(tf.square(weights), [0, 1, 2]))
        return weights / norm * scale

    
  
  # corresponds to the function m and l in the RealNVP paper
  # (Function m and l became s and t in the new version of the paper)
  def function_l_m(self,x,mask,name='function_l_m', reuse=False, train=False):
    with tf.variable_scope(name, reuse=reuse):
      channel = 64
      padding = 'SAME'
      xs = int_shape(x)
      kernel_h = 3
      kernel_w = 3
      input_channel = xs[3]
      y = x

      # y = batch_norm(input_=y, name="g_bn_in1", train=train, scale=False)
      if not self.scaling:
        y = simple_batch_norm(y)
      else:
        bn = batch_norm(epsilon=1e-4, name="g_bn_in")
        y = bn(y, train=train)
      weights_shape = [1, 1, input_channel, channel]
      weights = self.get_normalized_weights("g_weights_input", weights_shape)
      
      y = tf.nn.conv2d(y, weights, [1, 1, 1, 1], padding=padding)
      if not self.scaling:
        print("this")
        y = simple_batch_norm(y)
        # bn = batch_norm(name="g_bn_in2", epsilon=1e-4)
        # y = bn(y, train=train)

        #   train=train, epsilon=1e-4, axes=[0,1,2], reuse=reuse, scaling=False)
      else:
        biases = tf.get_variable('g_biases_input', [channel], initializer=tf.constant_initializer(0.0))
        y = tf.reshape(tf.nn.bias_add(y, biases), y.get_shape())
      y = tf.nn.relu(y)
      if self.scaling:
        bn = batch_norm(name="g_bn_in2", epsilon=1e-4)
        y = bn(y, train=train)

      skip = y
      # Residual blocks
      num_residual_blocks = self.num_residual_blocks
      for r in range(num_residual_blocks):
        weights_shape = [kernel_h, kernel_w, channel, channel]
        weights = self.get_normalized_weights("g_weights%d_1" % r, weights_shape)
        y = tf.nn.conv2d(y, weights, [1, 1, 1, 1], padding=padding)
        if not self.scaling:
          y = simple_batch_norm(y)
          # y = batch_norm(input_=y, dim=channel, name="g_bn%d_1" % r,
          #   train=train, epsilon=1e-4, axes=[0,1,2], reuse=reuse, scaling=False)
        else:
          biases = tf.get_variable('g_biases_%d_1' % r, [channel], initializer=tf.constant_initializer(0.0))
          y = tf.reshape(tf.nn.bias_add(y, biases), y.get_shape())
        y = tf.nn.relu(y)
        if self.scaling:
          bn = batch_norm(name="g_bn%d_1" % r, epsilon=1e-4)
          y = bn(y, train=train)
        
        weights_shape = [kernel_h, kernel_w, channel, channel]
        weights = self.get_normalized_weights("g_weights%d_2" % r, weights_shape)
        y = tf.nn.conv2d(y, weights, [1, 1, 1, 1], padding=padding)

        if not self.scaling:
          y = simple_batch_norm(y)
          # y = batch_norm(input_=y, dim=channel, name="g_bn%d_2" % r,
          #   train=train, epsilon=1e-4, axes=[0,1,2], reuse=reuse, scaling=False)
        else:
          biases = tf.get_variable('g_biases_%d_2' % r, [channel], initializer=tf.constant_initializer(0.0))
          y = tf.reshape(tf.nn.bias_add(y, biases), y.get_shape())

        y += skip
        y = tf.nn.relu(y)
        if self.scaling:
            bn = batch_norm(name="g_bn%d_2" % r, epsilon=1e-4)
            y = bn(y, train=train)

        skip = y

        
      # 1x1 convolution for reducing dimension
      weights = self.get_normalized_weights("g_weights_output", 
                                            [1, 1, channel, input_channel*2])
      y = tf.nn.conv2d(y, weights, [1, 1, 1, 1], padding=padding)    
      biases = tf.get_variable('g_biases_output', [input_channel*2], initializer=tf.constant_initializer(0.0))
      y = tf.reshape(tf.nn.bias_add(y, biases), y.get_shape())
      # For numerical stability, apply tanh and then scale
      y = tf.tanh(y)
      
      if 'checkerboard' in self.mask_type:
        scale_factor = tf.get_variable("g_weights_tanh_scale", [1], tf.float32,
            initializer=tf.constant_initializer(0.),
            regularizer=regularizers.l2(5e-5))
      else:
        scale_factor = tf.get_variable("g_weights_tanh_scale", [1], tf.float32, \
          tf.constant_initializer(1.))
      scale_shift = tf.get_variable("g_weights_scale_shift", [1], tf.float32, \
          tf.constant_initializer(0.))
      
      

      # The first half defines the l function
      # The second half defines the m function
      l = (y[:,:,:,:input_channel] * scale_factor + scale_shift) * (-mask+1)
      m = y[:,:,:,input_channel:] * (-mask+1)

      return l,m


  
  # returns constant tensor of masks
  # |xs| is the size of tensor
  # |mask_type| can be 'checkerboard0', 'checkerboard1', 'channel0', 'channel1'
  # |b| has the dimension of |xs|
  def get_mask(self, x, mask_type):
    shape = tf.shape(x)
    batch_size = shape[0]
    height = shape[1]
    width = shape[2]
    channels = shape[3]

    even_height = height - tf.math.mod(height, 2)
    even_width = width - tf.math.mod(width, 2)

    if 'checkerboard' in mask_type:
        unit0 = tf.constant([[0.0, 1.0], [1.0, 0.0]], dtype=tf.float32)
        unit1 = 1.0 - unit0
        unit = unit0 if mask_type == 'checkerboard0' else unit1
        unit = tf.reshape(unit, [1, 2, 2, 1])

        b = tf.tile(unit, [batch_size, even_height // 2, even_width // 2, channels])

        # Pad mask back to original height and width
        pad_h = height - even_height
        pad_w = width - even_width
        paddings = [[0, 0], [0, pad_h], [0, pad_w], [0, 0]]
        b = tf.pad(b, paddings, "CONSTANT")
        
    elif 'channel' in mask_type:
        half_channels = tf.cast(channels // 2, tf.int32)
        full_shape = tf.stack([batch_size, height, width, half_channels])
        white = tf.ones(full_shape, dtype=tf.float32)
        black = tf.zeros(full_shape, dtype=tf.float32)
        b = tf.concat([white, black], axis=3) if mask_type == 'channel0' else tf.concat([black, white], axis=3)
    else:
        raise ValueError(f"Unknown mask_type {mask_type}")

    return b


  # corresponds to the coupling layer of the RealNVP paper
  # |mask_type| can be 'checkerboard0', 'checkerboard1', 'channel0', 'channel1'
  # log_det_jacobian is a 1D tensor of size (batch_size)
  def forward_and_jacobian(self, x, sum_log_det_jacobians, z, reuse=False, train=False):
    with tf.variable_scope(self.name, reuse=reuse):
      b = self.get_mask(x, self.mask_type)


      # masked half of x
      x1 = x * b
      l,m = self.function_l_m(x1, b, reuse=reuse, train=train)
      l = tf.clip_by_value(l, -5.0, 5.0)  # Avoid large exponentials
      y = x1 + tf.multiply(-b+1.0, x * tf.exp(l) + m)
      log_det_jacobian = tf.reduce_sum(l, [1,2,3])
      sum_log_det_jacobians += log_det_jacobian

      return y,sum_log_det_jacobians, z

  def backward(self, y, z, reuse=False, train=False):    
    with tf.variable_scope(self.name, reuse=True):
        b = self.get_mask(y, self.mask_type)  # <- FIXED HERE

        y1 = y * b
        l, m = self.function_l_m(y1, b, reuse=reuse, train=train)
        x = y1 + tf.multiply(y * (-b + 1.0) - m, tf.check_numerics(tf.exp(-l), "exp has NaN"))
        return x, z


# The layer that performs squeezing.
# Only changes the dimension.
# The Jacobian is untouched and just passed to the next layer
class SqueezingLayer(Layer):
  def __init__(self, name="Squeeze"):
    self.name = name

  def forward_and_jacobian(self, x, sum_log_det_jacobians, z, reuse=False, train=False):
    xs = int_shape(x)
    assert xs[1] % 2 == 0 and xs[2] % 2 == 0
    y = tf.space_to_depth(x, 2)
    if z is not None:
      z = tf.space_to_depth(z, 2)      

    return y,sum_log_det_jacobians, z

  def backward(self, y, z, reuse=False, train=False):
    ys = int_shape(y)
    assert ys[3] % 4 == 0
    x = tf.depth_to_space(y,2)

    if z is not None:
      z = tf.depth_to_space(z,2)

    return x, z

# The layer that factors out half of the variables
# directly to the latent space.  
class FactorOutLayer(Layer):
  def __init__(self, scale, name='FactorOut'):
    self.scale = scale
    self.name = name
  
  def forward_and_jacobian(self, x, sum_log_det_jacobians, z, reuse=False, train=False):

    xs = int_shape(x)
    split = xs[3]//2

    # The factoring out is done on the channel direction.
    # Haven't experimented with other ways of factoring out.
    new_z = x[:,:,:,:split]
    x = x[:,:,:,split:]

    if z is not None:
      z = tf.concat(axis=3, values=[z, new_z])
    else:
      z = new_z
    
    return x, sum_log_det_jacobians, z
  
  def backward(self, y, z, reuse=False, train=False):

    # At scale 0, 1/2 of the original dimensions are factored out
    # At scale 1, 1/4 of the original dimensions are factored out
    # ....
    # At scale s, (1/2)^(s+1) are factored out
    # Hence, at backward pass of scale s, (1/2)^(s) of z should be factored in
    
    zs = int_shape(z)
    if y is None:
      split = zs[3] // (2**self.scale)
    else:
      split = int_shape(y)[3]
    new_y = z[:,:,:,-split:]
    z = z[:,:,:,:-split]

    assert (int_shape(new_y)[3] == split)

    if y is not None:
      x = tf.concat(axis=3, values=[new_y, y])
    else:
      x = new_y

    return x, z


# Given the output of the network and all jacobians, 
# compute the log probability.
def compute_log_density_x(z, sum_log_det_jacobians, prior):

  zs = int_shape(z)
  if len(zs) == 4:
    K = zs[1]*zs[2]*zs[3] #dimension of the Gaussian distribution
    z = tf.reshape(z, (-1, K))
  else:
    K = zs[1]
  if prior == "gaussian":
    log_density_z = -0.5*tf.reduce_sum(tf.square(z), [1]) -0.5*K*np.log(2*np.pi)
  elif prior == "logistic":
    log_density_z = -tf.reduce_sum(-z + 2*tf.softplus(z),[1])
  elif prior == "uniform":
    log_density_z = 0
  log_density_x = log_density_z + sum_log_det_jacobians

  return log_density_x



class DCGAN(object):
  def __init__(self, sess, input_height=32, input_width=32,
         batch_size=64, sample_num = 64, z_dim=100, gf_dim=64, df_dim=64,
         gfc_dim=1024, dfc_dim=1024, c_dim=3, dataset_name='default', checkpoint_dir=None,
         f_div='cross-ent', prior="logistic", min_lr=0.0, lr_decay=1.0,
         model_type="nice", alpha=1e-7, log_dir=None,
         init_type="uniform",reg=0.5, n_critic=1.0, hidden_layers=1000,
         no_of_layers= 8, like_reg=0.1, just_sample=False, batch_norm_adaptive=1, flags = None):
    """

    Args:
      sess: TensorFlow session
      batch_size: The size of batch. Should be specified before training.
      y_dim: (optional) Dimension of dim for y. [None]
      z_dim: (optional) Dimension of dim for Z. [100]
      gf_dim: (optional) Dimension of gen filters in first conv layer. [64]
      df_dim: (optional) Dimension of discrim filters in first conv layer. [64]
      gfc_dim: (optional) Dimension of gen units for for fully connected layer. [1024]
      dfc_dim: (optional) Dimension of discrim units for fully connected layer. [1024]
      c_dim: (optional) Dimension of image color. For grayscale input, set to 1. [3]
    """
    self.sess = sess
    self.is_grayscale = (c_dim == 1)

    self.batch_size = batch_size
    self.sample_num = batch_size
    
    self.input_height = input_height
    self.input_width = input_width
    self.prior = prior

    self.z_dim = z_dim
    self.flags = flags 

    self.gf_dim = gf_dim
    self.df_dim = df_dim

    self.gfc_dim = gfc_dim
    self.dfc_dim = dfc_dim

    self.c_dim = c_dim

    self.lr_decay = lr_decay
    self.min_lr = min_lr
    self.model_type = model_type
    self.log_dir = log_dir
    self.alpha = alpha
    self.init_type = init_type
    self.reg = reg
    self.n_critic = n_critic
    self.hidden_layers = hidden_layers
    self.no_of_layers = no_of_layers
    
    # batch normalization : deals with poor initialization helps gradient flow
    self.d_bn1 = batch_norm(name='d_bn1')
    self.d_bn2 = batch_norm(name='d_bn2')
    self.dataset_name = dataset_name
    self.like_reg = like_reg
    if self.dataset_name != 'mnist':
      self.d_bn3 = batch_norm(name='d_bn3')

    self.checkpoint_dir = checkpoint_dir
    self.f_div = f_div
    
    seed = 0
    np.random.seed(seed)
    tf.set_random_seed(seed)
    
    self.build_model()

  def build_model(self):
    seed =0
    np.random.seed(seed)
    tf.set_random_seed(seed)

    image_dims = [self.input_height, self.input_width, self.c_dim]

    self.inputs = tf.placeholder(
      tf.float32, [self.batch_size] + image_dims, name='real_images')
    self.sample_inputs = tf.placeholder(
      tf.float32, [self.sample_num] + image_dims, name='sample_inputs')
    self.image_size = np.prod(image_dims)
    self.image_dims = image_dims
    if self.dataset_name == "cifar":
      inputs = tf.map_fn(lambda img: tf.image.random_flip_left_right(img), self.inputs)
    else:
      inputs = self.inputs

    sample_inputs = self.sample_inputs

    self.z = tf.placeholder(
      tf.float32, [self.batch_size, self.z_dim], name='z')
    self.z_sum = histogram_summary("z", self.z)

    #### f: Image Space to Latent space #########
    batch_norm_adaptive = self.flags.batch_norm_adaptive
    # Forward flow model: shared template for both training and inference
    self.flow_model_template = tf.make_template(
        'flow_model',
        lambda x, train: model_spec(
            x,
            reuse=tf.AUTO_REUSE,
            model_type=self.model_type,
            train=train,
            alpha=self.alpha,
            init_type=self.init_type,
            hidden_layers=self.hidden_layers,
            no_of_layers=self.no_of_layers,
            batch_norm_adaptive=self.flags.batch_norm_adaptive
        )
    )
    # self.flow_model = tf.make_template('model', 
    #   lambda x: model_spec(x, reuse=False, model_type=self.model_type, train=False, 
    #     alpha=self.alpha, init_type=self.init_type, hidden_layers=self.hidden_layers,
    #     no_of_layers=self.no_of_layers, batch_norm_adaptive=batch_norm_adaptive), unique_name_='model')

    #### f: Image Space to Latent space for training #########
    def trainable_flow_model_fn(x):
        return model_spec(
            x,
            reuse=tf.AUTO_REUSE,
            model_type=self.model_type,
            train=True,
            alpha=self.alpha,
            init_type=self.init_type,
            hidden_layers=self.hidden_layers,
            no_of_layers=self.no_of_layers,
            batch_norm_adaptive=self.flags.batch_norm_adaptive
        )


    self.trainable_flow_model = trainable_flow_model_fn

    # Use flow_model_template for both:
    self.flow_model = lambda x: self.flow_model_template(x, train=False)
    self.trainable_flow_model = lambda x: self.flow_model_template(x, train=True)


    # ##### f^-1: Latent to image (trainable)#######
    
    # self.flow_inv_model = tf.make_template('model', 
    #   lambda x: inv_model_spec(x, reuse=True, model_type=self.model_type,
    #    train=True,alpha=self.alpha), unique_name_='model')
    # Inverse flow model (used for generation)
    self.flow_inv_template = tf.make_template(
        'flow_inv_model',
        lambda z, train: inv_model_spec(
            z,
            reuse=tf.AUTO_REUSE,
            model_type=self.model_type,
            train=train,
            alpha=self.alpha
        )
    )
    
    
    # ##### f^-1: Latent to image (not-trainable just for sampling)#######
    self.sampler_function = tf.make_template('model', 
      lambda x: inv_model_spec(x, reuse=True, model_type=self.model_type, 
        alpha=self.alpha,train=False), unique_name_='model')


    self.flow_inv_model = lambda z: self.flow_inv_template(z, train=True)
    self.sampler_function = lambda z: self.flow_inv_template(z, train=False)
    self.generator_train_batch = self.flow_inv_model
    
    ############### SET SIZE FOR TEST BATCH DEPENDING ON WHETHER WE USE Linear or Conv arch##########
    if self.model_type == "nice":
      self.log_like_batch = tf.placeholder(\
        tf.float32, [self.batch_size, self.image_size], name='log_like_batch')
    elif self.model_type == "real_nvp":
      self.log_like_batch = tf.placeholder(\
        tf.float32, [self.batch_size] + self.image_dims, name='log_like_batch')
    ###############################################

    # Normalize input to prevent NaNs
    mean, variance = tf.nn.moments(self.log_like_batch, axes=[0])
    normalized_input = (self.log_like_batch - mean) / (tf.sqrt(variance) + 1e-8)

    # Now use normalized_input instead
    gen_para, jac = self.flow_model_template(normalized_input, train=False)

    if self.dataset_name == "mnist":
      self.log_likelihood = log_likelihood(gen_para, jac, self.prior)/(self.batch_size)
    else:
      # to calculate values in bits per dim we need to
      # multiply the density by the width of the 
      # discrete probability area, which is 1/256.0, per dimension.
      # The calculation is performed in the log space.
      self.log_likelihood = log_likelihood(gen_para, jac, self.prior)/(self.batch_size)
      self.log_likelihood = 8. + self.log_likelihood / (np.log(2)*self.image_size)

    self.G_before_postprocessing = self.flow_inv_template(self.z, train=True)
    self.sampler_before_postprocessing = self.flow_inv_template(self.z, train=False)


    if self.model_type == "real_nvp":
      ##For data dependent init (not completely implemented)
      self.x_init = tf.placeholder(tf.float32, shape=[self.batch_size] + image_dims)
      # run once for data dependent initialization of parameters
      self.trainable_flow_model(self.x_init)
    
    inputs_tr_flow = inputs
    if self.model_type == "nice":
      split_val = int(self.image_size /2)
      self.permutation = np.arange(self.image_size)
      tmp = self.permutation.copy()
      self.permutation[:split_val] = tmp[::2]
      self.permutation[split_val:] = tmp[1::2]
      self.for_perm = np.identity(self.image_size)
      self.for_perm = tf.constant(self.for_perm[:,self.permutation], tf.float32)
      self.rev_perm = np.identity(self.image_size)
      self.rev_perm = tf.constant(self.rev_perm[:,np.argsort(self.permutation)], tf.float32)
      self.G_before_postprocessing \
      = tf.matmul(self.G_before_postprocessing,self.rev_perm)
      self.sampler_before_postprocessing \
      = tf.clip_by_value(tf.matmul(self.sampler_before_postprocessing, self.rev_perm) , 0., 1.)
      inputs_tr_flow = tf.matmul(tf.reshape(inputs, [self.batch_size, self.image_size]), self.for_perm)

    train_gen_para, train_jac = self.flow_model_template(inputs_tr_flow, train=True)
    self.train_log_likelihood = log_likelihood(train_gen_para, train_jac, self.prior) / self.batch_size
    
    self.sampler = tf.reshape(self.sampler_before_postprocessing, [self.batch_size] + image_dims)
    self.G = tf.reshape(self.G_before_postprocessing, [self.batch_size] + image_dims)

    inputs = inputs*255.0
    corruption_level = 1.0
    inputs = inputs + corruption_level * tf.random_uniform([self.batch_size] + image_dims)
    inputs = inputs/(255.0 + corruption_level)

    self.D, self.D_logits = self.discriminator(inputs, reuse=False)

    self.D_, self.D_logits_ = self.discriminator(self.G, reuse=True)

    self.d_sum = histogram_summary("d", self.D)
    self.d__sum = histogram_summary("d_", self.D_)
    self.G_sum = image_summary("G", self.G)

    def sigmoid_cross_entropy_with_logits(x, y):
      try:
        return tf.sigmoid_cross_entropy_with_logits(logits=x, labels=y)
      except:
        return tf.sigmoid_cross_entropy_with_logits(logits=x, targets=y)

    ### Vanilla gan loss
    if self.f_div == 'ce':
      self.d_loss_real = tf.reduce_mean(
        sigmoid_cross_entropy_with_logits(self.D_logits, tf.ones_like(self.D)))
      self.d_loss_fake = tf.reduce_mean(
        sigmoid_cross_entropy_with_logits(self.D_logits_, tf.zeros_like(self.D_)))
      self.g_loss = tf.reduce_mean(
        sigmoid_cross_entropy_with_logits(self.D_logits_, tf.ones_like(self.D_)))
    else:
    ### other gan losses
      if self.f_div == 'hellinger':
        self.d_loss_real = tf.reduce_mean(tf.exp(-self.D_logits))
        self.d_loss_fake = tf.reduce_mean(tf.exp(self.D_logits_) - 2.)
        self.g_loss = tf.reduce_mean(tf.exp(-self.D_logits_))
      elif self.f_div == 'rkl':
        self.d_loss_real = tf.reduce_mean(tf.exp(self.D_logits))
        self.d_loss_fake = tf.reduce_mean(-self.D_logits_ - 1.)
        self.g_loss = -tf.reduce_mean(-self.D_logits_ - 1.)
      elif self.f_div == 'kl':
        self.d_loss_real = tf.reduce_mean(-self.D_logits)
        self.d_loss_fake = tf.reduce_mean(tf.exp(self.D_logits_ - 1.))
        self.g_loss = tf.reduce_mean(-self.D_logits_)
      elif self.f_div == 'tv':
        self.d_loss_real = tf.reduce_mean(-0.5 * tf.tanh(self.D_logits))
        self.d_loss_fake = tf.reduce_mean(0.5 * tf.tanh(self.D_logits_))
        self.g_loss = tf.reduce_mean(-0.5 * tf.tanh(self.D_logits_))
      elif self.f_div == 'lsgan':
        self.d_loss_real = 0.5 * tf.reduce_mean((self.D_logits-1)**2)
        self.d_loss_fake = 0.5 * tf.reduce_mean(self.D_logits_**2)
        self.g_loss = 0.5 * tf.reduce_mean((self.D_logits_-1)**2)
      elif self.f_div == "wgan":
        self.g_loss = -tf.reduce_mean(self.D_logits_)
        self.d_loss_real = -tf.reduce_mean(self.D_logits)
        self.d_loss_fake = tf.reduce_mean(self.D_logits_)
        alpha = tf.random_uniform(
            shape=[1, self.batch_size], 
            minval=0.,
            maxval=1.
        )
        fake_data = self.G
        real_data = inputs
        differences = fake_data - real_data
        interpolates = real_data + \
        tf.transpose(alpha*tf.transpose(differences, perm=[1,2,3,0]), [3,0,1,2])
        _, d_inter = self.discriminator(interpolates, reuse=True) 
        gradients = tf.gradients(d_inter, [interpolates])[0]
        slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
        self.gradient_penalty = tf.reduce_mean((slopes-1.)**2)
      else:
        print("ERROR: Unrecognized f-divergence...exiting")
        exit(-1)

    self.d_loss_real_sum = scalar_summary("d_loss_real", self.d_loss_real)
    self.d_loss_fake_sum = scalar_summary("d_loss_fake", self.d_loss_fake)
                          
    if self.f_div == "wgan":
      self.d_loss = self.d_loss_real + self.d_loss_fake + self.reg * self.gradient_penalty
    else:
      self.d_loss = self.d_loss_real + self.d_loss_fake

    self.g_loss_sum = scalar_summary("g_loss", self.g_loss)
    self.d_loss_sum = scalar_summary("d_loss", self.d_loss)

    t_vars = tf.trainable_variables()

    self.d_vars = [var for var in t_vars if '/d_' in var.name]
    self.g_vars = [var for var in t_vars if '/g_' in var.name]
    print("gen_vars:")
    for var in self.g_vars:
      print(var.name)

    print("disc_vars:")
    for var in self.d_vars:
      print(var.name)
    
    self.saver = tf.train.Saver(max_to_keep=0)

  def evaluate_neg_loglikelihood(self, data, config):
    log_like_batch_idxs = len(data) // config.batch_size
    lli_list = []
    inter_list = []
    for idx in xrange(0, log_like_batch_idxs):
      batch_images = data[idx*config.batch_size:(idx+1)*config.batch_size]
      batch_images = np.cast[np.float32](batch_images)
      
      if self.model_type == "nice":
        batch_images = batch_images[:,self.permutation]

      lli = self.sess.run([self.log_likelihood],
        feed_dict={self.log_like_batch: batch_images})
      
      lli_list.append(lli)

    return np.mean(lli_list)

  def train(self, config, train_data=None, val_data=None):
    data_X = train_data 
    seed = 0
    np.random.seed(seed)
    tf.set_random_seed(seed)
    """Train DCGAN"""
    if config.dataset == "mnist":
      data_X, val_data, test_data, train_dist = mnist_data.load_mnist()
    elif config.dataset == "cifar":
      data_X, val_data, test_data = cifar_data.load_cifar()

    if self.model_type == "nice":
      val_data = np.reshape(val_data, (-1,self.image_size))
      test_data = np.reshape(test_data, (-1, self.image_size))

    lr = config.learning_rate
    self.learning_rate = tf.placeholder(tf.float32, [], name='lr')

    d_optim_ = tf.train.AdamOptimizer(self.learning_rate, beta1=config.beta1, beta2=0.9)
    d_grad = d_optim_.compute_gradients(self.d_loss, var_list=self.d_vars)
    d_grad_mag = tf.global_norm(d_grad)
    d_optim = d_optim_.apply_gradients(d_grad)          

    g_optim_ = tf.train.AdamOptimizer(self.learning_rate, beta1=config.beta1, beta2=0.9)
    if self.n_critic <= 0:
      g_grad = g_optim_.compute_gradients(self.train_log_likelihood\
          , var_list=self.g_vars)
    else:
      if self.like_reg > 0:
        if self.model_type == "real_nvp":
          g_grad_1 = g_optim_.compute_gradients(self.g_loss / self.like_reg, var_list=self.g_vars)
          g_grad_2 = g_optim_.compute_gradients(self.train_log_likelihood, var_list=self.g_vars)
          grads_1, _ = zip(*g_grad_1)
          grads_2, _ = zip(*g_grad_2)
          sum_grad = [g1+g2 for g1, g2 in zip(grads_1, grads_2)]
          g_grad = [pair for pair in zip(sum_grad, [var for grad, var in g_grad_1])]
        else:
          g_grad = g_optim_.compute_gradients(self.g_loss/self.like_reg + self.train_log_likelihood ,var_list=self.g_vars)  
      else:
        g_grad = g_optim_.compute_gradients(self.g_loss, var_list=self.g_vars)

    
    g_grad_mag = tf.global_norm(g_grad)
    g_optim = g_optim_.apply_gradients(g_grad)         

    try: ##for data-dependent init (not implemented)
      if self.model_type == "real_nvp":
        self.sess.run(tf.global_variables_initializer(),
          {self.x_init: data_X[0:config.batch_size]})
      else:
        self.sess.run(tf.global_variables_initializer())
    except:
      if self.model_type == "real_nvp":
        self.sess.run(tf.global_variables_initializer(),
          {self.x_init: data_X[0:config.batch_size]})
      else:
        self.sess.run(tf.global_variables_initializer())

    self.g_sum = merge_summary([self.z_sum, self.d__sum,
      self.G_sum, self.d_loss_fake_sum, self.g_loss_sum])
    self.d_sum = merge_summary(
        [self.z_sum, self.d_sum, self.d_loss_real_sum, self.d_loss_sum])
    log_dir = "./" + self.log_dir
    if os.path.exists(log_dir):
        shutil.rmtree(log_dir)
    os.makedirs(log_dir)
    # self.writer = SummaryWriter(log_dir)

    counter = 1
    start_time = time.time()
    could_load, checkpoint_counter = self.load(self.checkpoint_dir)
    if could_load:
      counter = checkpoint_counter
      print(" [*] Load SUCCESS")
    else:
      print(" [!] Load failed...")

    ############## A FIXED BATCH OF Zs FOR GENERATING SAMPLES ######################
    if self.prior == "uniform":
      sample_z = np.random.uniform(-1, 1, size=(self.sample_num , self.z_dim))
    elif self.prior == "logistic":
      sample_z = np.random.logistic(loc=0., scale=1., size=(self.sample_num , self.z_dim))
    elif self.prior == "gaussian":
      sample_z = np.random.normal(0.0, 1.0, size=(self.sample_num , self.z_dim))
    else:
        print("ERROR: Unrecognized prior...exiting")
        exit(-1)

    ################################ Evaluate initial model lli ########################

    val_nlli = self.evaluate_neg_loglikelihood(val_data, config)
    # train_nlli = self.evaluate_neg_loglikelihood(train_data, config)

    # curr_inception_score = self.calculate_inception_and_mode_score()
    # print("INITIAL TEST: val neg logli: %.8f,incep score: %.8f" % (val_nlli,\
    #  curr_inception_score[0]))
    if counter > 1:
      old_data = np.load("./"+config.sample_dir+'/graph_data.npy') 
      self.best_val_nlli = old_data[2]
      self.best_model_counter = old_data[3]
      self.best_model_path = old_data[4]
      self.val_nlli_list = old_data[1]
      self.counter_list = old_data[5]
      self.batch_train_nlli_list = old_data[-4]
      self.inception_list = old_data[-2]
      self.samples_list = old_data[0]
      self.loss_list = old_data[-1]
      manifold_h, manifold_w = old_data[6]
    # else:
    #   try:
    #     self.writer.add_summary(tf.Summary(\
    #           value=[tf.Summary.Value(tag="Val Neg Log-likelihood", simple_value=val_nlli)]), counter)
    #   except Exception as e:
    #     print("[!] Failed to write summary:", e)  
      # self.writer.add_summary(tf.Summary(\
      #         value=[tf.Summary.Value(tag="Train Neg Log-likelihood", simple_value=train_nlli)]), counter)

      self.best_val_nlli = val_nlli
      # self.best_model_train_nlli = train_nlli
      self.best_model_counter = counter
      self.best_model_path = self.save(config.checkpoint_dir, counter)
      # self.train_nlli_list = [train_nlli]
      self.val_nlli_list = [val_nlli]
      self.counter_list = [1]
      self.batch_train_nlli_list = []
      self.inception_list = [None]
      self.samples_list = self.sess.run([self.sampler],
              feed_dict={
                  self.z: sample_z,
              }
            )
      sample_inputs = data_X[0:config.batch_size]
      samples = self.samples_list[0]
      manifold_h = int(np.ceil(np.sqrt(samples.shape[0])))
      manifold_w = int(np.floor(np.sqrt(samples.shape[0])))
      self.loss_list = self.sess.run(
              [self.d_loss_real, self.d_loss_fake],
              feed_dict={
                  self.z: sample_z,
                  self.inputs: sample_inputs,
              })
    ##################################################################################

    for epoch in xrange(config.epoch):
      np.random.shuffle(data_X)
      batch_idxs = len(data_X) // config.batch_size
      
      for idx in xrange(0, batch_idxs):
        sys.stdout.flush()
        batch_images = data_X[idx*config.batch_size:(idx+1)*config.batch_size]
        
        if self.prior == "uniform":
          batch_z = np.random.uniform(-1, 1, [config.batch_size, self.z_dim]) \
              .astype(np.float32)
        elif self.prior == "logistic":
          batch_z = np.random.logistic(loc=0.,scale=1.0,size=[config.batch_size, self.z_dim]) \
              .astype(np.float32)
        elif self.prior == "gaussian":
          batch_z = np.random.normal(0.0, 1.0, size=(config.batch_size , self.z_dim))
        else:
          print("ERROR: Unrecognized prior...exiting")
          exit(-1)

        for r in range(self.n_critic):
          _, d_g_mag, errD_fake, errD_real ,summary_str = self.sess.run([d_optim, d_grad_mag, 
            self.d_loss_fake, self.d_loss_real, self.d_sum],
            feed_dict={ 
              self.inputs: batch_images,
              self.z: batch_z,
              self.learning_rate:lr,
            })
        # if self.n_critic > 0:
        #   self.writer.add_summary(summary_str, counter)

        # Update G network
        if self.like_reg > 0 or self.n_critic <= 0:
          _, g_g_mag, errG, summary_str = self.sess.run([g_optim, g_grad_mag, self.g_loss, self.g_sum],
            feed_dict={
              self.z: batch_z, 
              self.learning_rate:lr,
              self.inputs: batch_images,
            })
        else:
          _, g_g_mag ,errG, summary_str = self.sess.run([g_optim, g_grad_mag, self.g_loss, self.g_sum],
            feed_dict={
              self.z: batch_z, 
              self.learning_rate:lr,
            })
        # self.writer.add_summary(summary_str, counter)

        batch_images_nl = batch_images
        if self.model_type == "nice":
          batch_images_nl = np.reshape(batch_images_nl,(self.batch_size, -1))[:,self.permutation]
        b_train_nlli = self.sess.run([self.log_likelihood], feed_dict={
          self.log_like_batch: batch_images_nl,
          })
        b_train_nlli = b_train_nlli[0]

        self.batch_train_nlli_list.append(b_train_nlli)
        # if self.n_critic > 0:
        #   self.loss_list.append([errD_real, errD_fake])
        #   self.writer.add_summary(tf.Summary(\
        #   value=[tf.Summary.Value(tag="training loss", simple_value=-(errD_fake+errD_real))]) ,counter)
        # self.writer.add_summary(tf.Summary(\
        #   value=[tf.Summary.Value(tag="Batch train Neg Log-likelihood", simple_value=b_train_nlli)]) ,counter)
        counter += 1


        lr = max(lr * self.lr_decay, self.min_lr)

        if np.mod(counter, 703) == 1: #340
          if self.n_critic > 0:
            print("Epoch: [%2d] [%4d/%4d] time: %4.4f, d_loss: %.8f, g_loss: %.8f, d_grad_mag: %.8f, g_grad_mag: %.8f, lr: %.8f" \
          % (epoch, idx, batch_idxs,
            time.time() - start_time, errD_fake+errD_real, errG, d_g_mag, g_g_mag, lr))
          else:
            print("Epoch: [%2d] [%4d/%4d] time: %4.4f, g_loss: %.8f, g_grad_mag: %.8f, lr: %.8f" \
          % (epoch, idx, batch_idxs,
            time.time() - start_time, errG, g_g_mag, lr))
          curr_model_path = self.save(config.checkpoint_dir, counter)

          val_nlli=self.evaluate_neg_loglikelihood(val_data, config)

          # train_nlli = self.evaluate_neg_loglikelihood(train_data, config)
          curr_inception_score = self.calculate_inception_and_mode_score()

          print("[LogLi (%d,%d)]: val neg logli: %.8f, ince: %.8f, train lli: %.8f" % (epoch, idx,val_nlli,\
           curr_inception_score[0], np.mean(self.batch_train_nlli_list[-700:])))

          # self.writer.add_summary(tf.Summary(\
          #         value=[tf.Summary.Value(tag="Val Neg Log-likelihood", simple_value=val_nlli)]), counter)
          # self.writer.add_summary(tf.Summary(\
          #         value=[tf.Summary.Value(tag="Train Neg Log-likelihood", simple_value=train_nlli)]), counter)
          if val_nlli < self.best_val_nlli:
            self.best_val_nlli = val_nlli
            self.best_model_counter = counter
            self.best_model_path = curr_model_path
            # self.best_model_train_nlli = train_nlli
          # self.train_nlli_list.append(train_nlli)
          self.val_nlli_list.append(val_nlli)
          self.counter_list.append(counter)

          samples, d_loss, g_loss = self.sess.run(
            [self.sampler, self.d_loss, self.g_loss],
            feed_dict={
                self.z: sample_z,
                self.inputs: sample_inputs,
            }
          )
          self.samples_list.append(samples)
          self.samples_list[-1].shape[1]
          manifold_h = int(np.ceil(np.sqrt(samples.shape[0])))
          manifold_w = int(np.floor(np.sqrt(samples.shape[0])))
          self.inception_list.append(curr_inception_score)
          save_images(samples, [manifold_h, manifold_w],
                './{}/train_{:02d}_{:04d}.png'.format(config.sample_dir, epoch, idx))
          print("[Sample] d_loss: %.8f, g_loss: %.8f" % (d_loss, g_loss))

          np.save("./"+config.sample_dir+'/graph_data', 
            [self.samples_list, self.val_nlli_list, self.best_val_nlli, self.best_model_counter,\
             self.best_model_path, self.counter_list, [manifold_h, manifold_w], \
             self.batch_train_nlli_list, self.inception_list, self.loss_list])

    
    np.save("./"+config.sample_dir+'/graph_data', 
            [self.samples_list, self.val_nlli_list, self.best_val_nlli, self.best_model_counter,\
             self.best_model_path, self.counter_list, [manifold_h, manifold_w], \
             self.batch_train_nlli_list, self.inception_list, self.loss_list])
    self.test_model(test_data, config)

  def test_model(self, test_data, config):
    print("[*] Restoring best model counter: %d, val neg lli: %.8f" 
      % (self.best_model_counter, self.best_val_nlli))
    self.saver.restore(self.sess, self.best_model_path)
    print("[*] Best model restore from: " + self.best_model_path)
    print("[*] Evaluating on the test set")
    test_nlli = self.evaluate_neg_loglikelihood(test_data, config)
    print("[*] Test negative log likelihood: %.8f" % (test_nlli))

  def calculate_inception_and_mode_score(self):
    #to get mode scores add code to load your favourite mnist classifier in inception_score.py
    if self.dataset_name == "mnist": 
      return [0.0, 0.0, 0.0, 0.0]
    sess = self.sess
    all_samples = []
    for i in range(18):
        if self.prior == "uniform":
          batch_z = np.random.uniform(-1, 1, [self.batch_size, self.z_dim]) \
              .astype(np.float32)
        elif self.prior == "logistic":
          batch_z = np.random.logistic(loc=0.,scale=1.0,size=[self.batch_size, self.z_dim]) \
              .astype(np.float32)
        elif self.prior == "gaussian":
          batch_z = np.random.normal(0.0, 1.0, size=(self.batch_size , self.z_dim))
        else:
          print("ERROR: Unrecognized prior...exiting")
          exit(-1)
        samples_curr = self.sess.run(
            [self.sampler],
            feed_dict={
                self.z: batch_z,}
          )
        all_samples.append(samples_curr[0])
    all_samples = np.concatenate(all_samples, axis=0)
    # return all_samples
    all_samples = (all_samples*255.).astype('int32')
    
    return inception_score.get_inception_and_mode_score(list(all_samples), sess=sess)
  
  def discriminator(self, image, y=None, reuse=False):
    with tf.variable_scope("discriminator") as scope:
      tf.set_random_seed(0)
      np.random.seed(0)
      if reuse:
        scope.reuse_variables()

      if self.dataset_name != "mnist":
        if self.f_div == "wgan":
          hn1 = image
         
          h0 = Layernorm('d_ln_1', [1,2,3], lrelu(conv2d(hn1, self.df_dim , name='d_h0_conv')))
          h1 = Layernorm('d_ln_2', [1,2,3], lrelu(conv2d(h0, self.df_dim*2, name='d_h1_conv')))
          h2 = Layernorm('d_ln_3', [1,2,3], lrelu(conv2d(h1, self.df_dim*4, name='d_h2_conv')))
          h3 = Layernorm('d_ln_4', [1,2,3], lrelu(conv2d(h2, self.df_dim*8, name='d_h3_conv')))
          h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h3_lin')
      
          return tf.sigmoid(h4), h4
        else:
          h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv'))
          h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim*2, name='d_h1_conv')))
          h2 = lrelu(self.d_bn2(conv2d(h1, self.df_dim*4, name='d_h2_conv')))
          h3 = lrelu(self.d_bn3(conv2d(h2, self.df_dim*8, name='d_h3_conv')))
          h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h3_lin')

          return tf.sigmoid(h4), h4
      else:
        if self.f_div == "wgan":
          x = image

          h0 = lrelu(conv2d(x, self.c_dim, name='d_h0_conv'))

          h1 = lrelu(conv2d(h0, self.df_dim , name='d_h1_conv'))
          h1 = tf.reshape(h1, [self.batch_size, -1])      

          h2 = lrelu(linear(h1, self.dfc_dim, 'd_h2_lin'))

          h3 = linear(h2, 1, 'd_h3_lin')

          return tf.sigmoid(h3), h3
        else:
          x = image
          
          h0 = lrelu(conv2d(x, self.c_dim, name='d_h0_conv'))
          
          h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim , name='d_h1_conv')))
          h1 = tf.reshape(h1, [self.batch_size, -1])      
          
          h2 = lrelu(self.d_bn2(linear(h1, self.dfc_dim, 'd_h2_lin')))
          
          h3 = linear(h2, 1, 'd_h3_lin')
            
          return tf.sigmoid(h3), h3


  @property
  def model_dir(self):
    return "{}_{}_{}_{}".format(
        self.dataset_name, self.batch_size,
        self.input_height, self.input_width)
      
  def save(self, checkpoint_dir, step):
    model_name = "DCGAN.model"
    checkpoint_dir = os.path.join(checkpoint_dir, self.model_dir)

    if not os.path.exists(checkpoint_dir):
      os.makedirs(checkpoint_dir)

    return self.saver.save(self.sess,
            os.path.join(checkpoint_dir, model_name),
            global_step=step)

  def load(self, checkpoint_dir):
    import re
    print(" [*] Reading checkpoints...")
    checkpoint_dir = os.path.join(checkpoint_dir, self.model_dir)

    ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
      ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
      self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name))
      counter = int(next(re.finditer(r"(\d+)(?!.*\d)", ckpt_name)).group(0))
      print(" [*] Success to read {}".format(ckpt_name))
      return True, counter
    else:
      print(" [*] Failed to find a checkpoint")
      return False, 0





class batch_norm(object):
    def __init__(self, epsilon=1e-5, momentum=0.9, name="batch_norm"):
        self.epsilon = epsilon
        self.momentum = momentum
        self.name = name
        self.bn_layer = tf.keras.layers.BatchNormalization(
            momentum=self.momentum,
            epsilon=self.epsilon,
            name=self.name
        )

    def __call__(self, x, train=True):
        return self.bn_layer(x, training=train)






try:
  image_summary = tf.image_summary
  scalar_summary = tf.scalar_summary
  histogram_summary = tf.histogram_summary
  merge_summary = tf.merge_summary
  SummaryWriter = tf.train.SummaryWriter
  tf.set_random_seed(0)
except:
  image_summary = tf.summary.image
  scalar_summary = tf.summary.scalar
  histogram_summary = tf.summary.histogram
  merge_summary = tf.summary.merge
  SummaryWriter = tf.summary.FileWriter

tf.reset_default_graph()
sess = tf.Session()

# Load reshaped data
data_X = np.load(FLAGS.input_file)
data_X = data_X.reshape([-1, FLAGS.input_height, FLAGS.input_width, FLAGS.c_dim])
train_data = data_X[:-500]
val_data = data_X[-500:]

dcgan = DCGAN(
    sess,
    input_width=FLAGS.input_width,
    input_height=FLAGS.input_height,
    batch_size=FLAGS.batch_size,
    sample_num=FLAGS.batch_size,
    c_dim=FLAGS.c_dim,
    z_dim=FLAGS.input_height * FLAGS.input_width * FLAGS.c_dim,
    dataset_name=FLAGS.dataset,
    checkpoint_dir=FLAGS.checkpoint_dir,
    f_div=FLAGS.f_div,
    prior=FLAGS.prior,
    lr_decay=FLAGS.lr_decay,
    min_lr=FLAGS.min_lr,
    model_type=FLAGS.model_type,
    log_dir=FLAGS.flow_log_dir,
    alpha=FLAGS.alpha,
    batch_norm_adaptive=FLAGS.batch_norm_adaptive,
    init_type=FLAGS.init_type,
    reg=FLAGS.reg,
    n_critic=FLAGS.n_critic,
    hidden_layers=FLAGS.hidden_layers,
    no_of_layers=FLAGS.no_of_layers,
    like_reg=FLAGS.like_reg,
    df_dim=FLAGS.df_dim,
    flags=FLAGS
)

dcgan.train(FLAGS, train_data=train_data, val_data=val_data)


z_sample = np.random.normal(0.0, 1.0, size=(FLAGS.batch_size, FLAGS.input_height * FLAGS.input_width * FLAGS.c_dim))
synth_returns = sess.run(dcgan.sampler, feed_dict={dcgan.z: z_sample})
synth_returns = synth_returns.squeeze()
print("Synthetic Returns Shape:", synth_returns.shape)


from postprocess import reconstruct_prices

last_real_price = df.iloc[window_size - 1].values
synthetic_prices = reconstruct_prices(synth_returns, last_real_price)

# Plot samples
for i in range(5):
    plt.plot(synthetic_prices[i])
plt.title("Synthetic Price Paths (sample)")
plt.grid(True)
plt.show()
