In [1]:
import os
import random
import tensorflow as tf
keras = tf.keras
layers = tf.keras.layers
initializers = tf.keras.initializers

from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error as mse
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression

from data_generator import generator
from encoding.layers import IntegerBaseEncoder, FloatBaseEncoder

2023-02-09 13:17:53.198813: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
def set_seed(seed=42):
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    
    tf.random.set_seed(seed)
    tf.config.threading.set_inter_op_parallelism_threads(1)
    tf.config.threading.set_intra_op_parallelism_threads(1) 

In [3]:
seed = 123
set_seed(seed)
n_samples = 1000
n_features = 1
mean = 1
std = 0.6

for dist_name in ['normal', 'exponential', 'lognormal']:
    gen = generator.DataGenerator(mean=mean, std=std, dist=dist_name)
    x, y = gen.generate(n_features=n_features, n_samples=n_samples)
    x = np.ravel(x)
    order = np.argsort(x).reshape(-1, 1)
    x, y = x[order], y[order]
    exec(f'x_{dist_name}=x\ny_{dist_name}=y')

In [4]:
x_array = ['x_normal', 'x_exponential', 'x_lognormal']
y_array = ['y_normal', 'y_exponential', 'y_lognormal']

In [5]:
standard = lambda x:(x - x.mean())/x.std()
base_array = range(2, 37)
norm_array = [True, False]
encode_sign_array = [True, False]
only_integers_array = [True, False]
seeds = range(5)

In [6]:
tranformations = {
                  'intact': [{'params':{'base':10, 'norm':False, 'encode_sign':None, 'only_integers':None}, 'func':lambda x: x}],
                  'standardization': [{'params':{'base':10, 'norm':True, 'encode_sign':None, 'only_integers':None,}, 'func':standard}],
                  'higher dimensionality': [{'params':{'base':10, 'norm':False, 'encode_sign':None, 'only_integers':None}, 'func':lambda x, n=5: np.power(x, np.arange(1, n))}],
                  'numerical encoding':[]
                 }

In [7]:
for base in base_array:
    for norm in norm_array:
        for encode_sign in encode_sign_array:
            for only_integers in only_integers_array:
                params = {'base':base, 'norm':norm, 'encode_sign':encode_sign, 'only_integers': only_integers}
                transform_layer = FloatBaseEncoder(**params)
                curr_dict = {'params':params, 'func': lambda x: transform_layer(x).squeeze(1)}
                tranformations['numerical encoding'].append(curr_dict)

In [8]:
class _MLPBlock(keras.layers.Layer):
    def __init__(self, width, droprate=0, regularization=keras.regularizers.L1L2(0), **kwargs):
        super(_MLPBlock, self).__init__()
        self.dense = layers.Dense(width, 
                                  kernel_initializer=initializers.RandomNormal(seed=seed),
                                  kernel_regularizer=regularization, **kwargs)
#         self.activation = layers.ReLU()
#         self.drop = layers.Dropout(droprate)
#         self.bn = layers.BatchNormalization()
        
    def call(self, inputs, **kwargs):
        x = self.dense(inputs)
#         x = self.activation(x)
#         x = self.drop(x)
#         x = self.bn(x)
        return x

# implement policy network
class MLP(keras.Model):
    def __init__(self, input_dim, output_dim, hidden_dim=64, depth=1, **kwargs):
        super().__init__()
        self.depth = depth
        self.hidden_0 = _MLPBlock(hidden_dim, input_shape=(input_dim,), **kwargs)

        for i in range(1, depth):
            setattr(self, f'hidden_{i}', _MLPBlock(hidden_dim))
        self.out = _MLPBlock(output_dim)

    
    def call(self, inputs):
        x = inputs
        for i in range(self.depth):
            x = getattr(self, f'hidden_{i}')(x)
        x = self.out(x)
        return x

In [9]:
epochs = 1
activation = activation=keras.activations.relu

results = []

for seed in seeds:
    set_seed(seed)
    for idx, x_name in enumerate(x_array):
        for idy, y_name in enumerate(y_array):
            cur_x, cur_y = eval(x_name), eval(y_name)
            for transformation_name, transormation_loaders in tranformations.items():
                for transormation_loader in transormation_loaders:
                    print(x_name, y_name, transformation_name, transormation_loader['params'])
                    transformed_x = transormation_loader['func'](cur_x)
                    model = MLP(transformed_x.shape[1], 1, 128, depth=2, activation=activation)
                    model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-4), loss='mse')
                    history = model.fit(transformed_x, cur_y, epochs=epochs, verbose=0)
                    y_hat = model.predict(transformed_x)
                    score = mse(y_hat, cur_y)
                    results.append({'x':x_name, 'y':y_name, 'transformation_name':transformation_name, 'params': transormation_loader['params'], 'score':score, 'history':history.history, 'seed':seed})

x_normal y_normal intact {'base': 10, 'norm': False, 'encode_sign': None, 'only_integers': None}


2023-02-09 13:17:58.222842: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-02-09 13:17:58.223529: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2023-02-09 13:17:58.295347: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-02-09 13:17:58.295585: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1050 Ti computeCapability: 6.1
coreClock: 1.62GHz coreCount: 6 deviceMemorySize: 3.95GiB deviceMemoryBandwidth: 104.43GiB/s
2023-02-09 13:17:58.295600: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2023-02-09 13:17:58.297020: I tensorflow/stream_executor/platform/d

x_normal y_normal standardization {'base': 10, 'norm': True, 'encode_sign': None, 'only_integers': None}
x_normal y_normal higher dimensionality {'base': 10, 'norm': False, 'encode_sign': None, 'only_integers': None}
x_normal y_normal numerical encoding {'base': 2, 'norm': True, 'encode_sign': True, 'only_integers': True}


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (1000, 1) + inhomogeneous part.

In [10]:
cur_x

array([[-9.38633005e-01],
       [-9.00233197e-01],
       [-6.79153463e-01],
       [-6.76683379e-01],
       [-6.72867730e-01],
       [-5.78862822e-01],
       [-5.34232762e-01],
       [-4.65845472e-01],
       [-4.63076338e-01],
       [-4.56007546e-01],
       [-4.54901795e-01],
       [-3.63570627e-01],
       [-3.50920992e-01],
       [-3.41850410e-01],
       [-3.41701305e-01],
       [-3.12251558e-01],
       [-3.01644775e-01],
       [-2.91496067e-01],
       [-2.73860210e-01],
       [-2.73379330e-01],
       [-2.62957058e-01],
       [-2.31439043e-01],
       [-2.30130606e-01],
       [-2.25257849e-01],
       [-2.15027458e-01],
       [-2.05151281e-01],
       [-1.86732759e-01],
       [-1.65289726e-01],
       [-1.59061387e-01],
       [-1.55352134e-01],
       [-1.54229438e-01],
       [-1.37105356e-01],
       [-1.33095428e-01],
       [-1.26121194e-01],
       [-1.22238487e-01],
       [-1.18330580e-01],
       [-1.17185426e-01],
       [-1.05219103e-01],
       [-9.8

In [None]:
# fig, ax = plt.subplots(2, len(y_array), figsize=(16,10))
# kwargs = {'bins': 20}

# for idx, name in enumerate(y_array):
#     for idy, standard_name in enumerate(['', 'standardized']):
#         cur = eval(name)
#         if standard_name:
#             cur = standard(cur)
#         ax[idy, idx].hist(cur, alpha=0.7, **kwargs)
#         ax[idy, idx].set_title(name + f' {standard_name}')
    
# ax[0, 0].set(ylabel='Count')
# ax[1, 0].set(ylabel='Count')

# plt.show()

In [None]:
# fig, ax = plt.subplots(2, len(x_array), figsize=(16,10))
# kwargs = {'bins': 20}

# for idx, name in enumerate(x_array):
#     for idy, standard_name in enumerate(['', 'standardized']):
#         cur = eval(name)
#         if standard_name:
#             cur = standard(cur)
#         ax[idy, idx].hist(cur, alpha=0.7, **kwargs)
#         ax[idy, idx].set_title(name + f' {standard_name}')
    
# ax[0, 0].set(ylabel='Count')
# ax[1, 0].set(ylabel='Count')

# plt.show()

In [None]:
# fig, ax = plt.subplots(len(x_array), len(y_array), figsize=(16, 9))
# fig.tight_layout()


# activation = activation=keras.activations.relu


# for idx, x_name in enumerate(x_array):
#     for idy, y_name in enumerate(y_array):
#         cur_x, cur_y = eval(x_name), eval(y_name)
        

#         for name_transformation, func_transformation in tranformations.items():
#             transformed_x = func_transformation(cur_x)
#             try:
#                 model = MLP(transformed_x.shape[1], 1, 1024, depth=2, activation=activation)
#                 model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-4), loss='mse')
#                 model.fit(transformed_x, cur_y, epochs=1000, verbose=0)
#                 y_hat = model.predict(transformed_x)
#                 score = mse(y_hat, cur_y)
#                 print(f"{score} : {name_transformation}")
#                 ax[idx, idy].plot(cur_x, y_hat, '-', alpha=0.7, linewidth=3, label=f'{score:0.3f} : {name_transformation}')
#             except:
#                 print(name_transformation, 'NANS')


#         ax[idx, idy].scatter(cur_x, cur_y, c='black', marker='x', label=f'Ground Truth', s=10)

#         ax[idx, idy].set_title(f'{x_name}/{y_name}')
#         ax[idx, idy].legend()
# plt.suptitle('MLP', y=1.02)
# plt.show()