In [1]:
# Load packages
import tensorflow as tf
import pandas as pd
from tensorflow import keras
import numpy as np
import pandas as pd
import os
import scipy as scp
import scipy.stats as scps
import time
from datetime import datetime


# Load my own functions
import dnnregressor_train_eval_keras as dnnk
from kde_training_utilities import kde_load_data
from kde_training_utilities import kde_make_train_test_split
import make_data_wfpt as mdw

In [2]:
# Handle some cuda business

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="2"

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 11040554043858317424
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 2774437801049157407
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 12048649421
locality {
  bus_id: 2
  numa_node: 1
  links {
  }
}
incarnation: 2718372372120431713
physical_device_desc: "device: 0, name: GeForce GTX TITAN X, pci bus id: 0000:82:00.0, compute capability: 5.2"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 9390410980285239116
physical_device_desc: "device: XLA_GPU device"
]


In [3]:
# Make dnnk class (cpm for choice probability model)
cpm = dnnk.dnn_trainer()

# Define folder in which dataset lies
data_folder = '/media/data_cifs/afengler/data/kde/full_ddm/train_test_data/'

In [4]:
# Make train test split
kde_make_train_test_split(folder = data_folder,
                          p_train = 0.8)

get files in folder
check if we have a train and test sets already
folder clean so proceeding...
read, concatenate and shuffle data
get training and test indices
writing to file...


'success'

In [5]:
# Load train test split
cpm.data['train_features'], cpm.data['train_labels'], cpm.data['test_features'], cpm.data['test_labels'] = kde_load_data(folder = data_folder)

In [6]:
cpm.data['test_features'].shape

(111566113, 7)

In [8]:
cpm.data['train_features'].shape

(446278279, 7)

In [None]:
cpm.data['train_features'].iloc[171247010, :]

In [None]:
cpm.data['train_features']['log_l'] = cpm.data['train_labels']

In [None]:
cpm.data['train_features'].sort_values(by = 'log_l')

In [None]:
cpm.data['train_features']

In [None]:
cpm.data['train_features'].iloc[22428, :]

In [None]:
cpm.data['train_labels'][22428, ]

In [17]:
# Make all parameters we can specify explicit
# Model parameters
cpm.model_params

{'input_shape': 7,
 'output_shape': 1,
 'output_activation': 'linear',
 'hidden_layers': [20, 40, 60, 80, 100, 120],
 'hidden_activations': ['relu', 'relu', 'relu', 'relu', 'relu', 'relu'],
 'l1_activation': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'l2_activation': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'l1_kernel': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'l2_kernel': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'optimizer': 'Nadam',
 'loss': 'mse',
 'metrics': ['mse']}

In [18]:
# Parameters governing training
cpm.train_params

{'callback_funs': ['ReduceLROnPlateau', 'EarlyStopping', 'ModelCheckpoint'],
 'plateau_patience': 10,
 'min_delta': 0.0001,
 'early_stopping_patience': 15,
 'callback_monitor': 'loss',
 'min_learning_rate': 1e-07,
 'red_coef_learning_rate': 0.1,
 'ckpt_period': 10,
 'ckpt_save_best_only': True,
 'ckpt_save_weights_only': True,
 'max_train_epochs': 200,
 'batch_size': 200000,
 'warm_start': False,
 'checkpoint': 'ckpt',
 'model_cnt': 0}

In [19]:
# Parameters concerning data storage
cpm.data_params

{'data_type': 'kde',
 'model_directory': '/media/data_cifs/afengler/data/kde/linear_collapse/keras_models',
 'checkpoint': 'ckpt',
 'model_name': 'dnnregressor',
 'data_type_signature': '_ddm_linear_collapse_',
 'timestamp': '06_22_19_23_20_30',
 'training_data_size': 143268157,
 'timestep': '06_22_19_23_16_39'}

In [11]:
# If necessary, specify new set of parameters here:
# Model params
cpm.model_params['output_activation'] = 'linear'
cpm.model_params['hidden_layers'] = [20, 40, 60, 80, 100, 120]
cpm.model_params['hidden_activations'] = ['relu', 'relu', 'relu', 'relu', 'relu', 'relu']
cpm.model_params['input_shape'] = cpm.data['train_features'].shape[1]
# cpm.model_params['l1_activation'] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
# cpm.model_params['l2_activation'] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
cpm.model_params['l1_kernel'] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
cpm.model_params['l2_kernel'] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
# Train params
cpm.train_params['batch_size'] = 1000000
cpm.train_params['max_train_epochs'] = 250
cpm.train_params['min_delta'] = 0.00001


# Data params
cpm.data_params['data_type'] = 'kde'
cpm.data_params['data_type_signature'] = '_full_ddm_'
cpm.data_params['training_data_size'] = cpm.data['train_features'].shape[0]
cpm.data_params['timestamp'] = datetime.now().strftime('%m_%d_%y_%H_%M_%S')
cpm.data_params['model_directory'] = '/media/data_cifs/afengler/data/kde/full_ddm/keras_models/'

In [12]:
# Make model
cpm.keras_model_generate(save_model = True)

In [None]:
# Train model
cpm.run_training(save_history = True, 
                 warm_start = False)

Train on 446278279 samples, validate on 111566113 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 00010: val_loss improved from inf to 0.00090, saving model to /media/data_cifs/afengler/data/kde/full_ddm/keras_models//dnnregressor_full_ddm_06_28_19_00_48_00/ckpt_0_10

Consider using a TensorFlow optimizer from `tf.train`.
Instructions for updating:
Use tf.train.CheckpointManager to manage checkpoints rather than manually editing the Checkpoint proto.
Epoch 11/250
Epoch 12/250