In [1]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for g in gpus:
    tf.config.experimental.set_memory_growth(device=g, enable=True)


In [None]:
# keras接收三种输入 numpy.array,tfdataset,python generator

# Dataset:
from tensorflow import keras

keras.preprocessing.image_dataset_from_directory
keras.preprocessing.text_dataset_from_directory
...

# 已分类存放的图像或文本，将会按照文件夹字母排序来作为label
# 也可以显示的指定类
dataset = keras.preprocessing.image_dataset_from_directory('path',
                                                           batch_size=64,
                                                           image_size=(200, 200),
                                                           class_names=['class_a', 'class_b'])
dataset = keras.preprocessing.text_dataset_from_directory('path',
                                                          batch_size=64,
                                                          class_names=['class_a', 'class_b'])
# 可迭代对象有shape和type
for data, label in dataset:
    print(data.shape, data.dtype)
    print(label.shape, label.dtype)

# 除此之外,还可以从csv文件夹加载结构化数据
import tensorflow as tf
tf.data.experimental.make_csv_dataset
...

In [2]:
import numpy as np
# 理想模型是端到端的，即数据预处理也应包含在模型中

# 文本数据预处理
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
train_data = np.array([["This is the 1st sample."], ["And here's the 2nd sample."]])

# 类似于sklearn的fit，transform
vectorizer = TextVectorization(output_mode='int')  # 整数编码
vectorizer.adapt(train_data)
integer_data = vectorizer(train_data)
print(integer_data)

# 使用n-gram模型分词，并转换为one-hot，ngrmas参数是扫描窗口的MAX大小N，即将会从1-N进行所有可能的分法
vectorizer = TextVectorization(output_mode='binary', ngrams=2)
vectorizer.adapt(train_data)
print(vectorizer.get_vocabulary())
print(len(vectorizer.get_vocabulary()))
integer_data = vectorizer(train_data)
print(integer_data)

tf.Tensor(
[[4 5 2 9 3]
 [7 6 2 8 3]], shape=(2, 5), dtype=int64)
['[UNK]', 'the', 'sample', 'this is', 'this', 'the 2nd', 'the 1st', 'is the', 'is', 'heres the', 'heres', 'and heres', 'and', '2nd sample', '2nd', '1st sample', '1st']
17
tf.Tensor(
[[0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1.]
 [0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0.]], shape=(2, 17), dtype=float32)


In [6]:
# 张量数据预处理
import numpy as np
from tensorflow.keras.layers.experimental.preprocessing import Normalization

train_data = np.random.randint(low=-10, high=10, size=(1, 5, 5, 3))
train_data = train_data.astype(np.float64)

normalization = Normalization(axis=-1)
print(normalization(train_data))
normalization.adapt(train_data)

normal_data = normalization(train_data)
print(normal_data)
print(np.var(normal_data))
print(np.mean(normal_data))
print(np.std(normal_data))

tf.Tensor(
[[[[ -9.  -9.  -4.]
   [  6.  -4.   3.]
   [ -2.  -5.   8.]
   [  6. -10.  -7.]
   [-10.   5.   0.]]

  [[  7.  -9.   4.]
   [ -1.  -6.   5.]
   [ -1.   4.  -8.]
   [  9. -10.   6.]
   [  7.  -1.   5.]]

  [[  0.   5.   7.]
   [ -8.  -7.   3.]
   [  6.  -9.  -8.]
   [  6.   0.  -8.]
   [ -5.  -3.   0.]]

  [[  2.   1.   5.]
   [ -8.   5.  -6.]
   [  9.   9.  -6.]
   [ -3.   3.  -4.]
   [  5.   4.  -6.]]

  [[  6.   1.  -7.]
   [ -3.  -2.  -7.]
   [ -7.   3.   6.]
   [ -1. -10.   4.]
   [ -3.  -5.  -2.]]]], shape=(1, 5, 5, 3), dtype=float32)
tf.Tensor(
[[[[-1.5678242  -1.225245   -0.5938274 ]
   [ 0.95549804 -0.35007     0.6582184 ]
   [-0.39027384 -0.525105    1.5525368 ]
   [ 0.95549804 -1.40028    -1.1304185 ]
   [-1.7360457   1.225245    0.12162731]]

  [[ 1.1237195  -1.225245    0.837082  ]
   [-0.22205235 -0.70014     1.0159457 ]
   [-0.22205235  1.05021    -1.3092822 ]
   [ 1.4601625  -1.40028     1.1948093 ]
   [ 1.1237195   0.175035    1.0159457 ]]

  [[-0.05383087  

In [11]:
# 重新缩放和中心剪裁图像
from tensorflow.keras.layers.experimental.preprocessing import CenterCrop
from tensorflow.keras.layers.experimental.preprocessing import Rescaling

train_data = np.random.randint(low=0, high=256, size=(20, 64, 64, 3))
train_data = train_data.astype(np.float64)
print(train_data)

cropper = CenterCrop(height=32, width=32)  # 即在中心不变的情况下，剪裁至指定大小
scaler = Rescaling(scale=1.0 / 255)  #即对图像张量进行标准化

output_data = scaler(cropper(train_data))
print(output_data)
print(output_data.shape)


[[[[153.  98. 221.]
   [152.  64.  76.]
   [ 54. 198. 148.]
   ...
   [110. 158.  61.]
   [ 37.  33.  32.]
   [101.  71. 172.]]

  [[231. 253.  91.]
   [ 80.  50. 181.]
   [199. 168. 138.]
   ...
   [165. 208. 123.]
   [ 17.  36. 157.]
   [ 65.  58. 177.]]

  [[205.  91.  45.]
   [161. 241. 200.]
   [ 68.  65. 138.]
   ...
   [ 35. 237.  78.]
   [ 52.   5. 225.]
   [190.  63.  75.]]

  ...

  [[100. 254. 114.]
   [  9. 201. 205.]
   [148. 170. 202.]
   ...
   [ 83.  72. 245.]
   [147.  53.  73.]
   [206.   4. 250.]]

  [[197.  82. 127.]
   [ 14. 247. 220.]
   [154.   2. 154.]
   ...
   [ 84. 229. 241.]
   [179.   0. 167.]
   [219.  35. 116.]]

  [[ 36.  35. 132.]
   [199.  13.   2.]
   [121. 228.  82.]
   ...
   [161.  12. 225.]
   [244. 223. 102.]
   [143. 128.  72.]]]


 [[[ 70. 151.  92.]
   [182.  61. 195.]
   [ 21.  27.  29.]
   ...
   [113.  96.  37.]
   [159.  32. 157.]
   [235. 246.  77.]]

  [[159.  14. 122.]
   [251. 149.  23.]
   [255. 179.  15.]
   ...
   [222. 221.  60.]
 

In [2]:
import tensorflow.keras as keras
import numpy as np

# 模型可以看作是更大的层
input_ = keras.Input(shape=(10, 20, 20, 3),
                     name='input',
                     dtype='int32')
x = keras.layers.experimental.preprocessing.Normalization(axis=-1)(input_)
x = keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu',
                        padding='same', strides=2)(x)
x = keras.layers.Flatten()(x)
output = keras.layers.Dense(units=10, activation='softmax', use_bias=0, name='out_')(x)
model = keras.models.Model(inputs=input_, outputs=output)

data = np.random.randint(low=0, high=256, size=(10, 20, 20, 3))
data = data.astype(np.float64)

out_data = model(data)
print(out_data)

tf.Tensor(
[[1.8268564e-12 4.2995194e-10 9.5498678e-12 4.4725705e-27 4.7587763e-19
  1.6237056e-05 3.6863578e-26 4.5603317e-27 9.9997985e-01 3.9413389e-06]], shape=(1, 10), dtype=float32)


In [None]:
# 多种训练的方式
history = model.fit(x='input_data', y='output_label')
history = model.fit(x='tf.Dataset, or generator')
history = model.fit_generator('等价于fit的generator，这个端口实际上没必要使用了')

# 还可以自己重写fit方法
def train_step(self, data):
    pass  # 详见文档

In [None]:
# 指标监控可以命名
model.compile(metrics=[keras.metrics.MeanSquaredError(name='MSE')])

# 使用tensorboard监控训练
callbacks = [keras.callbacks.TensorBoard(log_dir='path')]
model.fit(callbacks=callbacks)

%tensorboard --logdir='path'  # 然后就可以查看了


# 模型被编译成静态图一般来说，但难以调试，因为不是运行写出来的代码，于是可以使用动态来调试
model.compile(run_eagerly=True)  # 高级功能，用得少
