In [None]:
import tensorflow as tf

tf.constant([[1, 2, 3], [4, 5, 6]]) # тензор

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[1, 2, 3],
       [4, 5, 6]], dtype=int32)>

In [None]:
tf.constant(42)

<tf.Tensor: shape=(), dtype=int32, numpy=42>

In [None]:
t = tf.constant([[1., 2., 3.], [4., 5., 6.]])
print(t.shape)
print(t.dtype)

(2, 3)
<dtype: 'float32'>


In [None]:
print(t[:, 1:])
print(t[..., 1, tf.newaxis])
print(t+10)
print(tf.square(t))
print(t @ tf.transpose(t))

tf.Tensor(
[[2. 3.]
 [5. 6.]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[2.]
 [5.]], shape=(2, 1), dtype=float32)
tf.Tensor(
[[11. 12. 13.]
 [14. 15. 16.]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[ 1.  4.  9.]
 [16. 25. 36.]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[14. 32.]
 [32. 77.]], shape=(2, 2), dtype=float32)


Keras

In [None]:
from tensorflow import keras

K = keras.backend
K.square(K.transpose(t)) + 10

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[11., 26.],
       [14., 35.],
       [19., 46.]], dtype=float32)>

NumPy + Tensorflow

In [None]:
import numpy as np

a = np.array([2., 4., 5.])
tf.constant(a)

<tf.Tensor: shape=(3,), dtype=float64, numpy=array([2., 4., 5.])>

In [None]:
print(t.numpy()) # или np.array(t)
print(tf.square(a)) # float64
print(tf.square(t)) # float32

[[1. 2. 3.]
 [4. 5. 6.]]
tf.Tensor([ 4. 16. 25.], shape=(3,), dtype=float64)
tf.Tensor(
[[ 1.  4.  9.]
 [16. 25. 36.]], shape=(2, 3), dtype=float32)


In [None]:
tf.constant(2.0) + tf.constant(40.0, dtype=tf.float64) # ожидает float32

InvalidArgumentError: cannot compute AddV2 as input #1(zero-based) was expected to be a float tensor but is a double tensor [Op:AddV2] name: 

In [None]:
t2 = tf.constant(40., dtype=tf.float64)
tf.constant(2.0) + tf.cast(t2, dtype=tf.float32)

<tf.Tensor: shape=(), dtype=float32, numpy=42.0>

tf.Tensor - содержит неизменяемые значение

tf.Variable - содержит изменяемые значение

In [None]:
v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
v

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [None]:
print(v.assign(2 * v)) # изменяет все значения
print(v[0, 1].assign(42))
print(v[:, 2].assign([0., 1.]))
v.scatter_nd_update(indices=[[0, 0], [1, 1]], updates=[100., 200.])

<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]], dtype=float32)>
<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 42.,  6.],
       [ 8., 10., 12.]], dtype=float32)>
<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 42.,  0.],
       [ 8., 10.,  1.]], dtype=float32)>


<tf.Variable 'UnreadVariable' shape=(2, 3) dtype=float32, numpy=
array([[100.,  42.,   0.],
       [  8., 200.,   1.]], dtype=float32)>

Когда train зашумлен, MSE сильно штрафует оишбки, MAE меньше штрафует, но долгая сходимость

Решение - потеря Хьюберта

In [None]:
def huber_fn(y_true, y_pred):
  error = y_true - y_pred
  is_small_error = tf.abs(error) < 1
  squared_loss = tf.square(error) / 2
  linear_loss = tf.abs(error) - 0.5
  return tf.where(is_small_error, squared_loss, linear_loss)

In [None]:
model.compile(loss=huber_fn, optimizer="nadam")
model.fit(X_train, y_train, [...])

# Сохранение модели
model = keras.models.load_model("model.h5", custom_objects={"huber_fn": huber_fn}) # Или model.keras

In [None]:
def create_huber(threshold=1.0):
  def huber_fn(y_true, y_pred):
    error = y_true - y_pred
    is_small_error = tf.abs(error) < threshold
    squared_loss = tf.square(error) / 2
    linear_loss = tf.abs(error) - threshold**2 / 2
    return tf.where(is_small_error, squared_loss, linear_loss)
  return huber_fn

model.compile(loss=create_huber(2.0), optimizer="nadam")

# Сохранение модели
model = keras.models.load_model("model.h5", custom_objects={"huber_fn": create_huber(2.0)})

In [None]:
class HuberLoss(keras.losses.Loss):
  def __init__(self, threshold=1.0, **kwargs):
    self.threshold = threshold
    super().__init__(**kwargs)
  def call(self, y_true, y_pred):
    error = y_true - y_pred
    is_small_error = tf.abs(error) < self.threshold
    squared_loss = tf.square(error) / 2
    linear_loss = self.threshold * tf.abs(error) - self.threshold**2 / 2
    return tf.where(is_small_error, squared_loss, linear_loss)
  def get_config(self):
    base_config = super().get_config()
    return {**base_config, "threshold":self.threshold}

In [None]:
model.compile(loss=HuberLoss(2.), optimizer="nadam")
model = keras.models.load_model("my_model.h5", custom_objects={"HuberLoss": HuberLoss})

In [None]:
precision = keras.metrics.Precision()

In [None]:
class ReconstructingRegressor(keras.Model):
  def __init__(self, output_dim, **kwargs):
    super().__init__(**kwargs)
    self.hidden = [keras.layers.Dense(30, activation="selu", kernel_initializer="lecun_normal") for _ in range(5)]
    self.out = keras.layers.Dense(output_dim)

  def build(self, batch_input_shape):
    n_inputs = batch_input_shape[-1]
    self.reconstruct = keras.layers.Dense(n_inputs)
    super().build(batch_input_shape)

  def call(self, inputs):
    Z = inputs
    for layer in self.hidden:
      Z = layer(Z)
    reconstruction = self.reconstruct(Z)
    recon_loss = tf.reduce_mean(tf.square(reconstruction - inputs))
    self.add_loss(0.05 * recon_loss)
    return self.out(Z)

# Вычисление градиентов

В первом случае, при нахождении псевдо частной производной для w1

мы получили 105.000036 - 105 = 0.000036 * 10^6 = 36...

In [4]:
def f(w1, w2):
  print(w1, w2)
  return 3 * w1**2 + 2 * w1 * w2

w1, w2 = 5, 3
eps = 1e-6
print((f(w1 + eps, w2) - f(w1, w2)) / eps)
print((f(w1, w2 + eps) - f(w1, w2)) / eps)

5.000001 3
5 3
36.000003007075065
5 3.000001
5 3
10.000000003174137


In [5]:
import tensorflow as tf

w1, w2 = tf.Variable(5.), tf.Variable(3.) # изменяемые
with tf.GradientTape() as tape:
  z = f(w1, w2)

gradient = tape.gradient(z, [w1, w2])
gradient

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=5.0> <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=3.0>


[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [6]:
with tf.GradientTape(persistent=True) as tape: # можно вызывать многократно
  z = f(w1, w2)

dz_dw1 = tape.gradient(z, w1)
dz_dw2 = tape.gradient(z, w2)

del tape
dz_dw1

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=5.0> <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=3.0>


<tf.Tensor: shape=(), dtype=float32, numpy=36.0>

In [12]:
with tf.GradientTape() as tape:
  tape.watch(w1)
  tape.watch(w2)
  z = f(w1, w2)

tape.gradient(z, [w1, w2])

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=5.0> <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=3.0>


[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

Остановка backprop во 2 части

In [13]:
def f(w1, w2):
  return 3 * w1**2 + tf.stop_gradient(2 * w1 * w2)

with tf.GradientTape() as tape:
  z = f(w1, w2)

tape.gradient(z, [w1, w2])

[<tf.Tensor: shape=(), dtype=float32, numpy=30.0>, None]

In [21]:
def my_softplus(x):
  return tf.math.log(tf.exp(x) + 1)

x = tf.Variable([100.])

with tf.GradientTape() as tape:
  z = my_softplus(x)

tape.gradient(z, [x])

[<tf.Tensor: shape=(1,), dtype=float32, numpy=array([nan], dtype=float32)>]

Создание пользовательской функции активации

In [23]:
@tf.custom_gradient
def my_better_softplus(z):
  exp = tf.exp(z)
  def my_softplus_gradients(grad):
      return grad / (1 + 1 / exp)
  return tf.math.log(exp + 1), my_softplus_gradients

with tf.GradientTape() as tape:
  z = my_better_softplus(x)

tape.gradient(z, [x])

[<tf.Tensor: shape=(1,), dtype=float32, numpy=array([1.], dtype=float32)>]

In [24]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.regularizers import l2
from keras.models import Sequential
from keras.layers import Dense

l2_reg = l2(0.05)
model = Sequential([
    Dense(30, activation="elu", kernel_initializer="he_normal", kernel_regularizer=l2_reg),
    Dense(1, kernel_regularizer=l2_reg)
])

In [25]:
def random_batch(X, y, batch_size=32):
  idx = np.random.ranint(len(X), size=batch_size)
  return X[idx], y[idx]

In [27]:
def print_status_bar(iteration, total, loss, metrics=None):
  metrics = " - ".join(["{}:{:.4f}".format(m.name, m.result()) for m in [loss] + (metrics or [])])
  end = "" if iteration < total else "\n"
  print("\r{}/{}".format(iteration, total) + metrics, end=end)

In [None]:
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.Nadam(lr=0.01)
loss_fn = keras.losses.MeanSquaredError
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.MeanAbsoluteError()]

In [None]:
for epoch in range(1, n_epochs + 1):
  print("Epoch {}/{}".format(epoch, n_epochs))
  for step in range(1, n_steps + 1):
    X_batch, y_batch = random_batch(X_train_scaled, y_train)
    with tf.GradientTape() as tape:
      y_pred = model(X_batch, training=True)
      main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
      loss = tf.add_n([main_loss] + model.losses)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    mean_loss(loss)
    for metric in metrics:
      metric(y_batch, y_pred)
    print_status_bar(step * batch_size, len(y_train, mean_loss, metrics))

In [28]:
def cube(x):
  return x**3

tf_cube = tf.function(cube)
tf_cube(2)

<tf.Tensor: shape=(), dtype=int32, numpy=8>

In [30]:
@tf.function
def tf_cube(x):
  return x**3

tf_cube.python_function(2)

8

❌
1. tf собирает только циклы for i in tf.range(x)
2. *.pyc процесс генерации графа может иметь проблемы
3. np.random.rand() при tf.func выдает одно и тоже число, меняется если (1), ([1, 2, 3]), использовать tf.random.uniform([])
4. Применять tf.reduce_sum() вместо np.sum, tf.sort, sorted()
5. Keras автоматически преобразовывает функции в tf.function