In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
x = tf.constant([2, 1, 0.1])
layer = layers.Softmax(axis = -1)
# print(layer)
out = layer(x)
print(out)
out = tf.nn.softmax(x)
print(out)

tf.Tensor([0.6590012  0.24243298 0.09856589], shape=(3,), dtype=float32)
tf.Tensor([0.6590012  0.24243298 0.09856589], shape=(3,), dtype=float32)


In [3]:
from tensorflow.keras import layers, Sequential
network = Sequential([
    layers.Dense(3, activation=None),
    layers.ReLU(),
    layers.Dense(2, activation=None),
    layers.ReLU()
])
x = tf.random.normal([4, 3])
out = network(x)
print(out)

tf.Tensor(
[[0.4878637  0.05905623]
 [0.08357453 0.        ]
 [1.4413633  0.174478  ]
 [1.0559056  0.12781808]], shape=(4, 2), dtype=float32)


In [4]:
layers_num = 2
network = Sequential()
for _ in range(layers_num):
    network.add(layers.Dense(3))
    network.add(layers.ReLU())
network.build(input_shape=(4, 4))
network.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (4, 3)                    15        
_________________________________________________________________
re_lu_2 (ReLU)               (4, 3)                    0         
_________________________________________________________________
dense_3 (Dense)              (4, 3)                    12        
_________________________________________________________________
re_lu_3 (ReLU)               (4, 3)                    0         
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________


In [5]:
for p in network.trainable_variables:
    print(p.name, p.shape)

dense_2/kernel:0 (4, 3)
dense_2/bias:0 (3,)
dense_3/kernel:0 (3, 3)
dense_3/bias:0 (3,)


In [85]:
from tensorflow.keras import optimizers, losses, datasets, Sequential, metrics

def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32) / 255.
    x = tf.reshape(x, [28*28])
    y = tf.cast(y, dtype=tf.int32)
    y = tf.one_hot(y, depth=10)
    
    return x, y

batchsz = 128
(x, y), (x_val, y_val) = datasets.mnist.load_data()
print("datesets:", x.shape, y.shape, x.min(), x.max())

db = tf.data.Dataset.from_tensor_slices((x, y))
db = db.map(preprocess).shuffle(60000).batch(batchsz)
ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
ds_val = ds_val.map(preprocess).shuffle(60000).batch(batchsz)

sample = next(iter(db))
print(sample[0].shape, sample[1].shape)

datesets: (60000, 28, 28) (60000,) 0 255
(128, 784) (128, 10)


In [86]:
network = Sequential([
    layers.Dense(256, activation = 'relu'),
    layers.Dense(128, activation = 'relu'),
    layers.Dense(64, activation = 'relu'),
    layers.Dense(32, activation = 'relu'),
    layers.Dense(10)    
])
network.build(input_shape = (4, 28 * 28))
network.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_39 (Dense)             (4, 256)                  200960    
_________________________________________________________________
dense_40 (Dense)             (4, 128)                  32896     
_________________________________________________________________
dense_41 (Dense)             (4, 64)                   8256      
_________________________________________________________________
dense_42 (Dense)             (4, 32)                   2080      
_________________________________________________________________
dense_43 (Dense)             (4, 10)                   330       
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
_________________________________________________________________


In [87]:
network.compile(optimizer=optimizers.Adam(lr=0.01),
               loss=losses.CategoricalCrossentropy(from_logits=True),
               metrics=['accuracy'])

history = network.fit(db, epochs=5, validation_data=ds_val, validation_freq=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [88]:
network.evaluate(ds_val)



[0.12408343702554703, 0.9688000082969666]

In [89]:
sample = next(iter(ds_val))
x = sample[0]
y = sample[1] # one-hot
pred = network.predict(x) # [b, 10]
# convert back to number 
y = tf.argmax(y, axis=1)
pred = tf.argmax(pred, axis=1)

print(pred)
print(y)

tf.Tensor(
[3 8 2 4 9 5 1 7 6 2 2 7 2 3 5 6 7 6 0 2 1 3 4 8 0 4 4 5 9 7 9 2 4 1 8 7 3
 4 1 5 6 7 3 1 1 2 9 2 9 0 6 5 3 5 3 9 1 2 2 8 4 8 3 4 3 2 6 1 3 5 1 1 8 2
 8 1 6 7 2 6 2 7 3 9 2 9 5 9 4 2 4 2 8 5 3 9 0 4 8 0 5 1 9 4 4 1 4 1 4 0 0
 1 2 5 5 9 9 7 9 3 6 7 3 0 0 8 9 1], shape=(128,), dtype=int64)
tf.Tensor(
[3 8 2 4 9 5 1 7 6 2 2 7 2 3 5 4 7 6 0 2 1 3 4 8 0 4 4 5 9 7 9 2 4 1 8 7 3
 4 1 5 6 7 3 1 1 2 9 2 9 0 6 5 3 5 3 9 1 2 2 8 4 8 3 4 3 2 6 1 3 5 1 1 8 2
 8 1 6 7 2 6 2 7 3 9 2 9 5 9 4 2 4 2 8 5 3 9 0 4 8 0 5 1 9 4 4 1 4 1 4 0 0
 1 2 5 5 9 9 7 9 3 6 7 3 0 0 8 9 1], shape=(128,), dtype=int64)


In [90]:
history.history

{'loss': [0.27798792719841003,
  0.13113759458065033,
  0.1109216958284378,
  0.0910920724272728,
  0.0814114436507225],
 'accuracy': [0.9167166948318481,
  0.9621333479881287,
  0.96875,
  0.9749666452407837,
  0.977733314037323],
 'val_loss': [0.13199099898338318, 0.12044104188680649],
 'val_accuracy': [0.9652000069618225, 0.9702000021934509]}

In [60]:
network.save_weights("weights.ckpt")
print("saved weights.")
del network

saved weights.


In [83]:
network = Sequential([
    layers.Dense(256, activation = 'relu'),
    layers.Dense(128, activation = 'relu'),
    layers.Dense(64, activation = 'relu'),
    layers.Dense(32, activation = 'relu'),
    layers.Dense(10)    
])
network.build(input_shape = (4, 28 * 28))
network.compile(optimizer=optimizers.Adam(lr=0.01),
               loss=losses.CategoricalCrossentropy(from_logits=True),
               metrics=['accuracy'])

network.load_weights("weights.ckpt")
print("loaded weights!")

loaded weights!


In [70]:
network.save("model.h5")
print("saved total model.")

saved total model.


In [80]:
network = keras.models.load_model("model.h5")



In [75]:
tf.saved_model.save(network, "model-savedmodel")
print("saving savedmodel.")
# del network

INFO:tensorflow:Assets written to: model-savedmodel/assets
saving savedmodel.


In [92]:
# print("load savedmodel from file.")
# network = tf.saved_model.load("model-savedmodel")
acc_meter = metrics.CategoricalAccuracy()
for x, y in ds_val:
    pred = network(x)
    acc_meter.update_state(y_true=y, y_pred=pred)
print("Test Accucary:%f" % acc_meter.result())
# network.predict(next(iter(ds_val))[0])

Test Accucary:0.968800


In [93]:
print("load savedmodel from file.")
network_save = tf.saved_model.load("model-savedmodel")

load savedmodel from file.


In [13]:
class MyDense(layers.Layer):
    def __init__(self, inp_dim, outp_dim):
        super(MyDense, self).__init__()
        self.kernel = self.add_weight('w', [inp_dim,outp_dim], trainable = True)
        
net = MyDense(4, 3)
net.variables, net.trainable_variables

([<tf.Variable 'w:0' shape=(4, 3) dtype=float32, numpy=
  array([[-0.5406294 , -0.64397204, -0.3079378 ],
         [ 0.84875655, -0.889512  , -0.25044334],
         [ 0.6833739 , -0.7910568 , -0.8927294 ],
         [-0.6951215 , -0.02482361, -0.10442203]], dtype=float32)>],
 [<tf.Variable 'w:0' shape=(4, 3) dtype=float32, numpy=
  array([[-0.5406294 , -0.64397204, -0.3079378 ],
         [ 0.84875655, -0.889512  , -0.25044334],
         [ 0.6833739 , -0.7910568 , -0.8927294 ],
         [-0.6951215 , -0.02482361, -0.10442203]], dtype=float32)>])

In [14]:
class MyDense(layers.Layer):
    def __init__(self, inp_dim, outp_dim):
        super(MyDense, self).__init__()
        self.kernel = self.add_weight('w', [inp_dim,outp_dim], trainable = False)
        
net = MyDense(4, 3)
net.variables, net.trainable_variables

([<tf.Variable 'w:0' shape=(4, 3) dtype=float32, numpy=
  array([[-0.39678138,  0.64084554,  0.4118085 ],
         [-0.7462772 ,  0.37606716, -0.52723336],
         [ 0.7721833 ,  0.05772781, -0.33982682],
         [ 0.32219386,  0.06628978, -0.07225043]], dtype=float32)>],
 [])

In [16]:
class MyDense(layers.Layer):
    def __init__(self, inp_dim, outp_dim):
        super(MyDense, self).__init__()
        self.kernel = tf.Variable(tf.random.normal([inp_dim,outp_dim]), trainable = False)
    
    def call(self, inputs, training = None):
        out = inputs @ self.kernel
        out = tf.nn.relu(out)
        return out
        
net = MyDense(4, 3)
net.variables, net.trainable_variables

([<tf.Variable 'Variable:0' shape=(4, 3) dtype=float32, numpy=
  array([[ 0.40682504, -0.7767524 ,  0.3755346 ],
         [ 0.58661646, -0.6782297 ,  1.0860798 ],
         [-0.06641417,  0.63832235, -1.3410153 ],
         [ 0.73147297,  0.18757865, -0.33223808]], dtype=float32)>],
 [])

In [11]:
import  os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
from tensorflow import keras


def preprocess(x, y):
    # [0~255] => [-1~1]
    x = 2 * tf.cast(x, dtype=tf.float32) / 255. - 1.
    y = tf.cast(y, dtype=tf.int32)
    return x,y

batchsz = 128
# [50k, 32, 32, 3], [10k, 1]
(x, y), (x_val, y_val) = datasets.cifar10.load_data()
y = tf.squeeze(y)
y_val = tf.squeeze(y_val)
y = tf.one_hot(y, depth=10) # [50k, 10]
y_val = tf.one_hot(y_val, depth=10) # [10k, 10]
print('datasets:', x.shape, y.shape, x_val.shape, y_val.shape, x.min(), x.max())


train_db = tf.data.Dataset.from_tensor_slices((x,y))
train_db = train_db.map(preprocess).shuffle(10000).batch(batchsz)
test_db = tf.data.Dataset.from_tensor_slices((x_val, y_val))
test_db = test_db.map(preprocess).batch(batchsz)


sample = next(iter(train_db))
print('batch:', sample[0].shape, sample[1].shape)


class MyDense(layers.Layer):
    # to replace standard layers.Dense()
    def __init__(self, inp_dim, outp_dim):
        super(MyDense, self).__init__()

        self.kernel = self.add_variable('w', [inp_dim, outp_dim])
        # self.bias = self.add_variable('b', [outp_dim])

    def call(self, inputs, training=None):

        x = inputs @ self.kernel
        return x

class MyNetwork(keras.Model):

    def __init__(self):
        super(MyNetwork, self).__init__()

        self.fc1 = MyDense(32*32*3, 256)
        self.fc2 = MyDense(256, 128)
        self.fc3 = MyDense(128, 64)
        self.fc4 = MyDense(64, 32)
        self.fc5 = MyDense(32, 10)



    def call(self, inputs, training=None):
        """

        :param inputs: [b, 32, 32, 3]
        :param training:
        :return:
        """
        x = tf.reshape(inputs, [-1, 32*32*3])
        # [b, 32*32*3] => [b, 256]
        x = self.fc1(x)
        x = tf.nn.relu(x)
        # [b, 256] => [b, 128]
        x = self.fc2(x)
        x = tf.nn.relu(x)
        # [b, 128] => [b, 64]
        x = self.fc3(x)
        x = tf.nn.relu(x)
        # [b, 64] => [b, 32]
        x = self.fc4(x)
        x = tf.nn.relu(x)
        # [b, 32] => [b, 10]
        x = self.fc5(x)

        return x


network = MyNetwork()
network.compile(optimizer=optimizers.Adam(lr=1e-3),
                loss=tf.losses.CategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])
network.fit(train_db, epochs=15, validation_data=test_db, validation_freq=1)

network.evaluate(test_db)
network.save_weights('ckpt/weights.ckpt')
del network
print('saved to ckpt/weights.ckpt')


network = MyNetwork()
network.compile(optimizer=optimizers.Adam(lr=1e-3),
                loss=tf.losses.CategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])
network.load_weights('ckpt/weights.ckpt')
print('loaded weights from file.')
network.evaluate(test_db)

datasets: (50000, 32, 32, 3) (50000, 10) (10000, 32, 32, 3) (10000, 10) 0 255
batch: (128, 32, 32, 3) (128, 10)




Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
saved to ckpt/weights.ckpt
loaded weights from file.


[1.686081051826477, 0.5236999988555908]

In [3]:
resnet = keras.applications.ResNet50(weights='imagenet', include_top = False)
resnet.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, None, None, 3 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, None, None, 6 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, None, None, 6 256         conv1_conv[0][0]                 
___________________________________________________________________________________________

In [4]:
x = tf.random.normal([4, 224, 224, 3])
out = resnet(x)
print(out.shape)

global_average_layer = layers.GlobalAveragePooling2D()
x = tf.random.normal([4, 7, 7, 2048])
out = global_average_layer(x)
print(out.shape)

(4, 7, 7, 2048)
(4, 2048)


In [5]:
fc = layers.Dense(100)
x = tf.random.normal([4, 2048])
out = fc(x)
print(out.shape)

(4, 100)


In [6]:
mynet = Sequential([resnet,global_average_layer,fc])
mynet.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Functional)        (None, None, None, 2048)  23587712  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 100)               204900    
Total params: 23,792,612
Trainable params: 23,739,492
Non-trainable params: 53,120
_________________________________________________________________


In [9]:
loss_meter = metrics.Mean()
loss_meter.update_state(float(loss))
print(step, 'loss', loss_meter.result())
loss_meter.reset_states()

In [16]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers, Sequential, metrics

def preprocess(x, y):

    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)

    return x,y


batchsz = 128
(x, y), (x_val, y_val) = datasets.mnist.load_data()
print('datasets:', x.shape, y.shape, x.min(), x.max())


db = tf.data.Dataset.from_tensor_slices((x,y))
db = db.map(preprocess).shuffle(60000).batch(batchsz).repeat(10)

ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
ds_val = ds_val.map(preprocess).batch(batchsz) 




network = Sequential([layers.Dense(256, activation='relu'),
                     layers.Dense(128, activation='relu'),
                     layers.Dense(64, activation='relu'),
                     layers.Dense(32, activation='relu'),
                     layers.Dense(10)])
network.build(input_shape=(None, 28*28))
network.summary()

optimizer = optimizers.Adam(lr=0.01)

acc_meter = metrics.Accuracy()
loss_meter = metrics.Mean()


for step, (x,y) in enumerate(db):

    with tf.GradientTape() as tape:
        # [b, 28, 28] => [b, 784]
        x = tf.reshape(x, (-1, 28*28))
        # [b, 784] => [b, 10]
        out = network(x)
        # [b] => [b, 10]
        y_onehot = tf.one_hot(y, depth=10) 
        # [b]
        loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, out, from_logits=True))

        loss_meter.update_state(loss)

 

    grads = tape.gradient(loss, network.trainable_variables)
    optimizer.apply_gradients(zip(grads, network.trainable_variables))


    if step % 100 == 0:

        print(step, 'loss:', loss_meter.result().numpy()) 
        loss_meter.reset_states()


    # evaluate
    if step % 500 == 0:
        total, total_correct = 0., 0
        acc_meter.reset_states()

        for step, (x, y) in enumerate(ds_val): 
            # [b, 28, 28] => [b, 784]
            x = tf.reshape(x, (-1, 28*28))
            # [b, 784] => [b, 10]
            out = network(x) 


            # [b, 10] => [b] 
            pred = tf.argmax(out, axis=1) 
            pred = tf.cast(pred, dtype=tf.int32)
            # bool type 
            correct = tf.equal(pred, y)
            # bool tensor => int tensor => numpy
            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
            total += x.shape[0]

            acc_meter.update_state(y, pred)


        print(step, 'Evaluate Acc:', total_correct/total, acc_meter.result().numpy())

datasets: (60000, 28, 28) (60000,) 0 255
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 256)               200960    
_________________________________________________________________
dense_2 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_3 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_4 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_5 (Dense)              (None, 10)                330       
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
_________________________________________________________________
0 loss: 2.3112218
78 Evaluate Acc: 0.1622 0.1622
100 loss: 0.54319376
200

In [24]:
import tensorflow as tf
import datetime

mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

def create_model():
  return tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
  ])

model = create_model()
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

model.fit(x=x_train, 
          y=y_train, 
          epochs=5, 
          validation_data=(x_test, y_test), 
          callbacks=[tensorboard_callback])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fcb71f06880>

In [26]:
# %load_ext tensorboard
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 63294), started 0:05:57 ago. (Use '!kill 63294' to kill it.)