In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
from cdwiener import array_fptd
import os
import pandas as pd
import time
from datetime import datetime
import pickle
import yaml
import keras_to_numpy as ktnp

from kde_training_utilities import kde_load_data
from kde_training_utilities import kde_make_train_test_split

In [2]:
# CHOOSE ---------
method = "weibull_cdf_ndt" # ddm, linear_collapse, ornstein, full, lba
machine = 'x7'
# ----------------

# INITIALIZATIONS ----------------------------------------------------------------
stats = pickle.load(open("kde_stats.pickle", "rb"))[method]
dnn_params = yaml.load(open("hyperparameters.yaml"))

if machine == 'x7':
    data_folder = stats["data_folder_x7"]
    model_path = stats["model_folder_x7"]
else:
    data_folder = stats["data_folder"]
    model_path = stats["model_folder"]
    
model_path += dnn_params["model_type"] + "_{}_".format(method) + datetime.now().strftime('%m_%d_%y_%H_%M_%S') + "/"

print('if it does not exist, make model path')

if not os.path.exists(model_path):
    os.makedirs(model_path)
    
# Copy hyperparameter setup into model path
if machine == 'x7':
    os.system("cp {} {}".format("/media/data_cifs/afengler/git_repos/nn_likelihoods/hyperparameters.yaml", model_path))
else:
    os.system("cp {} {}".format("/users/afengler/git_repos/nn_likelihoods/hyperparameters.yaml", model_path))
    
# set up gpu to use
if machine == 'x7':
    os.environ["CUDA_DEVICE_ORDER"]= "PCI_BUS_ID"   # see issue #152
    os.environ["CUDA_VISIBLE_DEVICES"] = dnn_params['gpu_x7'] 

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

# Load the training data
print('loading data.... ')

# X, y, X_val, y_val = kde_load_data(folder = data_folder, 
#                                    return_log = True, # Dont take log if you want to train on actual likelihoods
#                                    prelog_cutoff = 1e-7 # cut out data with likelihood lower than 1e-7
#                                   )

# X = np.array(X)
# X_val = np.array(X_val)
# --------------------------------------------------------------------------------

  


if it does not exist, make model path
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 1043153159348957180
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 14641980330218019517
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 12048649421
locality {
  bus_id: 1
  links {
  }
}
incarnation: 1268403537854229946
physical_device_desc: "device: 0, name: GeForce GTX TITAN X, pci bus id: 0000:03:00.0, compute capability: 5.2"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 99138637536522749
physical_device_desc: "device: XLA_GPU device"
]
loading data.... 


In [3]:
# MAKE MODEL ---------------------------------------------------------------------
print('Setting up keras model')

input_shape = 6 #X.shape[1]
model = keras.Sequential()

for i in range(len(dnn_params['hidden_layers'])):
    if i == 0:
        model.add(keras.layers.Dense(units = dnn_params["hidden_layers"][i], 
                                     activation = dnn_params["hidden_activations"][i], 
                                     input_dim = input_shape))
    else:
        model.add(keras.layers.Dense(units = dnn_params["hidden_layers"][i],
                                     activation = dnn_params["hidden_activations"][i]))
        
# Write model specification to yaml file        
spec = model.to_yaml()
open(model_path + "model_spec.yaml", "w").write(spec)


print('STRUCTURE OF GENERATED MODEL: ....')
print(model.summary())

if dnn_params['loss'] == 'huber':
    model.compile(loss = tf.losses.huber_loss, 
                  optimizer = "adam", 
                  metrics = ["mse"])

if dnn_params['loss'] == 'mse':
    model.compile(loss = 'mse', 
                  optimizer = "adam", 
                  metrics = ["mse"])
# ---------------------------------------------------------------------------

W1029 15:20:36.278412 140113072776960 deprecation.py:506] From /home/afengler/miniconda3/envs/pytorch/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Setting up keras model


W1029 15:20:39.191444 140113072776960 deprecation.py:323] From /home/afengler/miniconda3/envs/pytorch/lib/python3.7/site-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


STRUCTURE OF GENERATED MODEL: ....
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               700       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 120)               12120     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 121       
Total params: 23,041
Trainable params: 23,041
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
# FIT MODEL -----------------------------------------------------------------
print('Starting to fit model.....')

# Define callbacks
ckpt_filename = model_path + "model.h5"

checkpoint = keras.callbacks.ModelCheckpoint(ckpt_filename, 
                                             monitor = 'val_loss', 
                                             verbose = 1, 
                                             save_best_only = False)
                               
earlystopping = keras.callbacks.EarlyStopping(monitor = 'val_loss', 
                                              min_delta = 0, 
                                              verbose = 1, 
                                              patience = 2)

reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', 
                                              factor = 0.1,
                                              patience = 1, 
                                              verbose = 1,
                                              min_delta = 0.0001,
                                              min_lr = 0.0000001)

history = model.fit(X, y, 
                    validation_data = (X_val, y_val), 
                    epochs = dnn_params["n_epochs"],
                    batch_size = dnn_params["batch_size"], 
                    callbacks = [checkpoint, reduce_lr, earlystopping], 
                    verbose = 2)
# ---------------------------------------------------------------------------

In [5]:
# def generate_samples(path):
#     while True:
#         files_ = os.listdir(path)
#         files_ = np.random.permutation(files_)
#         file in files_:
#             with open(path + files) as f:
#                 data = pickle.load(f, 'rb')
#                 np.random.shuffle(data.values)
#                 data.reset_index(drop = True, inplace = True)
#                 n = data.shape[0]
#                 n_cols = data_shape[1]
#                 batch_size = 1024
#                 i = 0
#                 while (i * batch_size < (n - batch_size)):
#                     yield (data.iloc[(i * batch_size): ((i + 1) * batch_size )), :(n_cols - 1)], 
#                            data.iloc[(i * batch_size): ((i + 1) * batch_size )), (n_cols - 1)])
                
                
def generate_samples(data = []):
    n = data.shape[0]
    n_cols = data.shape[1]
    batch_size = 1024
    while True:
        i = 0
        while (i * batch_size < (n - batch_size)):
            yield (data.iloc[(i * batch_size): ((i + 1) * batch_size ), :(n_cols - 1)].to_numpy(), 
                   np.expand_dims(data.iloc[(i * batch_size): ((i + 1) * batch_size ), (n_cols - 1)].to_numpy() , axis = 1))
            i += 1


# def generate_arrays_from_file(path):
#     while True:
#         with open(path) as f:
#             for line in f:
#                 # create numpy arrays of input data
#                 # and labels, from each line in the file
#                 x1, x2, y = process_line(line)
#                 yield ({'input_1': x1, 'input_2': x2}, {'output': y})

# model.fit_generator(generate_arrays_from_file('/my_file.txt'),
#                     steps_per_epoch=10000, epochs=10)

In [None]:

# SAVING --------------------------------------------------------------------
# print('Saving model and relevant data...')
# # Log of training output
# pd.DataFrame(history.history).to_csv(model_path + "training_history.csv")

# # Save Model
# model.save(model_path + "model_final.h5")

# # Extract model architecture as numpy arrays and save in model path
# __, ___, ____, = ktnp.extract_architecture(model, save = True, save_path = model_path)

# # Update model paths in model_path.yaml
# model_paths = yaml.load(open("model_paths.yaml"))
# model_paths[method] = model_path
# yaml.dump(model_paths, open("model_paths.yaml", "w"))
# ----------------------------------------------------------------------------

In [None]:
t = pickle.load(open('/media/data_cifs/afengler/data/kde/' + \
                     'weibull_cdf/train_test_data_ndt/weibull_cdf_ndt_base_simulations_24.pickle', 'rb'))

In [8]:
t = pd.DataFrame(np.random.uniform(size = (100000, 10)))

In [10]:
my_gen = generate_samples(data = t)

In [15]:
t2 = next(my_gen)

In [17]:
t2[0].shape

(1024, 9)

In [None]:
t2[0].to_numpy().shape

In [None]:
t2[1].values.shape

In [None]:
np.expand_dims(t2[1].to_numpy(), axis = 1).shape

In [9]:
t

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.205961,0.752730,0.793832,0.931900,0.022322,0.560578,0.356262,0.222053,0.165362,0.946916
1,0.546661,0.594928,0.678584,0.464412,0.367392,0.475764,0.124166,0.889422,0.201139,0.502694
2,0.139117,0.904341,0.392153,0.040919,0.776070,0.862407,0.366219,0.084257,0.666566,0.380420
3,0.267858,0.016171,0.307106,0.408035,0.553078,0.024696,0.910081,0.284892,0.620891,0.894443
4,0.678581,0.488243,0.380064,0.348321,0.531539,0.389896,0.155341,0.463131,0.686823,0.653478
5,0.181558,0.457969,0.428100,0.361580,0.985968,0.168864,0.827005,0.820099,0.559832,0.144888
6,0.117905,0.179908,0.634849,0.062925,0.306602,0.002233,0.210654,0.211758,0.890701,0.463268
7,0.702963,0.034494,0.864116,0.706103,0.307866,0.928858,0.600351,0.663327,0.865569,0.534681
8,0.983945,0.278483,0.766428,0.019837,0.254173,0.345996,0.210059,0.073721,0.347150,0.569863
9,0.318207,0.235657,0.117392,0.629639,0.016978,0.098106,0.808621,0.047932,0.642748,0.428816
