In [1]:
from math import cos, sin, sqrt, fabs
import numpy as np
import csv
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model, load_model

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## Генерация датасета

In [2]:
def feature_1(x, e):
    return cos(x) + e

def feature_2(x, e):
    return -x + e

def feature_3(x, e):
    return sin(x) * x + e

def feature_4(x, e):
    return sqrt(fabs(x)) + e

def feature_5(x, e):
    return x**2 + e

def feature_6(x, e):
    return -fabs(x) + 4

def feature_7(x, e):
    return x - x**2 / 5 + e

In [3]:
#np.savetxt("train.csv", genarate_data(800), delimiter=",")
def generate_data(N = 10000):
    X = np.random.normal(0, 10, N)
    e = np.random.normal(0, .3, N)
    return np.concatenate((np.array([[feature_1(i, j) for i, j in zip(X,e)]]),
                          np.array([[feature_2(i, j) for i, j in zip(X,e)]]),
                          np.array([[feature_3(i, j) for i, j in zip(X,e)]]),
                          np.array([[feature_4(i, j) for i, j in zip(X,e)]]),
                          np.array([[feature_5(i, j) for i, j in zip(X,e)]]),
                          np.array([[feature_6(i, j) for i, j in zip(X,e)]]),
                          np.array([[feature_7(i, j) for i, j in zip(X,e)]])), axis = 0).transpose()

In [4]:
data = generate_data()
np.savetxt("generated_data.csv", data, delimiter=", ")

## train/test split

In [5]:
test_ratio = .2
train_size = round(data.shape[0] * (1 - test_ratio))

train_data = data[:train_size, :]
train_values = train_data[:, 1] #второе значение - целевое
train_data = np.delete(train_data, 1, 1)

test_data = data[train_size:, :]
test_values = test_data[:, 1] #второе значение - целевое
test_data = np.delete(test_data, 1, 1)

In [6]:
print(data.shape[0])
print(train_size)
# print(train_data, train_values)
# print(test_data, test_values)

10000
8000


## Нормализация

In [7]:
mean = np.mean(train_data, axis = 0, dtype = np.float64)
std = np.std(train_data, axis = 0, dtype = np.float64)

train_data -= mean
train_data /= std

test_data -= mean
test_data /= std

## Encoder

In [8]:
input_layer = Input(shape=(6,), name='input')
encode1 = Dense(36, activation='relu', name='encode1')(input_layer)
encode2 = Dense(18, activation='relu', name='encode2')(encode1)
encoder_output = Dense(3, name='encode_out')(encode2)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


## Decoder

In [9]:
decode1 = Dense(18, activation='relu', name='decode1')(encoder_output)
decode2 = Dense(36, activation='relu', name='decode2')(decode1)
decoder_output = Dense(6, name='decode_out')(decode2)

## Regression

In [10]:
reg1 = Dense(16, activation='relu', name='regression1')(encoder_output)
reg2 = Dense(8, activation='relu', name='regression2')(reg1)
reg_output = Dense(1, name='regression_out')(reg2)

## Models

In [11]:
model = Model(input_layer, outputs=[decoder_output, reg_output], name='main_model')

encoder_model = Model(input_layer, encoder_output, name='encoder')

decoder_input = Input(shape=(3,), name='decoder_input')
decoder = model.get_layer('decode1')(decoder_input)
decoder = model.get_layer('decode2')(decoder)
decoder = model.get_layer('decode_out')(decoder)
decoder_model = Model(decoder_input, decoder, name = 'decoder')

regression_model = Model(input_layer, reg_output, name='regression')

In [12]:
# model.summary()
# encoder_model.summary()
# decoder_model.summary()
# regression_model.summary()

## Training / testing

In [13]:
model.compile(optimizer='adam', loss={'regression_out': 'mse', 'decode_out': 'mse'})#, metrics=['accuracy'])

In [14]:
history = model.fit(train_data, {'regression_out': train_values, 'decode_out': train_data}, epochs=100, batch_size=500, verbose=0)

In [15]:
encoder_model = Model(input_layer, encoder_output, name='encoder')

decoder_input = Input(shape=(3,), name='decoder_input')
decoder = model.get_layer('decode1')(decoder_input)
decoder = model.get_layer('decode2')(decoder)
decoder = model.get_layer('decode_out')(decoder)
decoder_model = Model(decoder_input, decoder, name = 'decoder')

regression_model = Model(input_layer, reg_output, name='regression')

In [16]:
encoder_model.save('encoder.h5')
decoder_model.save('decoder.h5')
regression_model.save('regression.h5')

In [17]:
regression_results = regression_model.predict(test_data).flatten()
regression_results = np.array([test_values, regression_results]).transpose()
np.savetxt("regression.csv", regression_results, delimiter=", ")

In [18]:
encoded = encoder_model.predict(test_data)
print(encoded)
np.savetxt("encoded.csv", encoded, delimiter=", ")

[[ -0.06188484   1.3168755   -1.0527722 ]
 [ -4.2220325    5.050087     0.78082883]
 [-12.952927    15.748083    -4.482695  ]
 ...
 [ -4.246083     4.0716367   -0.19647743]
 [ -2.7008977    6.477922   -11.456446  ]
 [ -3.1610296   10.52547    -15.920053  ]]


In [19]:
decoded = decoder_model.predict(encoded)
print(decoded)
np.savetxt("decoded.csv", decoded, delimiter=", ")

[[ 1.5690784   0.03368635 -1.5686332  -0.76444477  1.2747363   0.6623248 ]
 [ 1.0773665   0.23194066  0.6625817   0.2455212  -0.53951365  0.18616351]
 [-0.69042414 -0.40127367  2.2276618   5.0743957  -3.3780608  -3.744626  ]
 ...
 [-1.1070884  -0.74341184  0.37981793  0.01776658 -0.2963129   0.30433616]
 [-0.72353977  0.5407427   1.730736    2.6389186  -2.1480625  -3.3614106 ]
 [ 0.36184913  3.0085611   2.316327    4.336832   -3.2269802  -5.020255  ]]


In [20]:
print(test_data)

[[ 1.59010067  0.03232129 -1.59354107 -0.71582025  1.28543309  0.67570478]
 [ 1.6928933  -0.79865581  1.13516057  0.3389702  -0.68590401  0.10816291]
 [-1.22826856 -0.85111847  2.42642742  5.17095749 -3.43545752 -3.91309012]
 ...
 [-1.44306591 -0.68282046  0.2940061  -0.00895808 -0.32531032  0.33694865]
 [-1.34995568  0.43978164  1.77547038  2.74445335 -2.32426846 -3.34901764]
 [ 0.88705054  3.44624747  2.4489758   4.29944866 -3.06793622 -4.95891145]]
