# Import stuff

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras

# Basic tensor creation and numpy array accessing

In [None]:
t = tf.constant([[1., 2., 3.],
                 [4., 5., 6.]])
print(t.shape, "\n")
print(t.dtype)

(2, 3) 

<dtype: 'float32'>


In [None]:
t[:, 1:] # Basically you are accessing the .shape
# Takes the all groups of the first dimension and then from 1-length in columns.

f = t[..., 1, tf.newaxis] # Takes first two groups, 1th column and adds a new axis. But t doesn't change shape
                      # It only adds a new axis to the returned array
f, t                      

(<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
 array([[2.],
        [5.]], dtype=float32)>, <tf.Tensor: shape=(2, 3), dtype=float32, numpy=
 array([[1., 2., 3.],
        [4., 5., 6.]], dtype=float32)>)

# There are the basic pemdas things like tf.multiply, tf.add, etc

***@ is the same thing as tf.matmul***

# Can do numpy operations on tensors and vice versa

In [None]:
a = np.array([1, 2, 3, 4])
a = tf.constant(a)
a = tf.square(a)
np.square(a)

array([  1,  16,  81, 256])

# Casting
- Can do operators when the two objects are two different types. 
- Have to cast manually
- ***Default type for a tensor is float32***

In [None]:
a = tf.constant([1, 2, 3, 4, 5])
b = tf.constant([1., 2., 3., 4., 5.])
# a + b you get a casting error
tf.cast(a, dtype=tf.float32) + b

<tf.Tensor: shape=(5,), dtype=float32, numpy=array([ 2.,  4.,  6.,  8., 10.], dtype=float32)>

# Build your own model(Example 1, pg 279)
- Model starts with a hidden layer
- Model then has two ResidualBlocks(which you will define)
  - Each residual block will have two dense layers and an addition operation(adds the inputs of the ResidualBlock to the output of the RB)
    - For the first RB, repeat that RB 3 times(so 1 + 3 because 1 for the first time and then 3 to repeat the same block 3 times)
    - For the second RB, just once.
- Model then has an output layer

## Create a class for the RB

In [None]:
class ResidualBlock(tf.keras.layers.Layer):                             # This is a class where you pass in the layer's attributes that come with tf Layers
  def __init__(self, n_hidden, n_units, **kwargs):                      # This is a constructor where you create the stuff.
  # Creating a RB with h_hidden layers(2 in this model) with n_units    # All constructors have to have self and the **kwargs
    super().__init__(**kwargs)                                          # Always have this line
    
    # Creating the hidden layers(2 hidden)
    self.hidden = [tf.keras.layers.Dense(30, activation="selu",       # Think of hidden has a private field for self(which is the model)
                                         kernel_initializer="lecun_normal")
                                         for _ in range(n_hidden)]
                                         # When doing this for range thing, if you want to just have 2 dense layers you can just do self.hidden1, self.hidden2
                                         # self.hidden3, . . . etc. But if you wat many, just do it this way.
  def call(self, inputs):
    Z = inputs
    for layer in self.hidden:   # Passes the inputs thru the first hidden of the model's hidden layers. Then to the next
      Z = layer(Z)
    return inputs + Z           # Adds the final output of the second hidden to inputs as stated in the context of a RB                              



## Define the model now

In [None]:
class ResidualRegressor(tf.keras.Model): # Passing in the attributes of a tf Model
  def __init__(self, output_dims, **kwargs):
    super().__init__(**kwargs)
    self.hidden1 = tf.keras.Dense(30, activation="elu", kernel_initializer="he_uniform") # Just creating all the layers and blocks(self-explanatory)
    self.block1 = ResidualBlock(2, 30)
    self.block2 = ResidualBlock(2, 30)
    self.out = tf.keras.layers.Dense(output_dims) # Creating the output layer
  
  def call(self, inputs):
    Z = self.hidden1(inputs) # Send inputs first thru the first hidden layer
    for _ in range(4):        # Send it once thru RB1 then repeat that 3 times
      Z = self.block1(Z)
    Z = self.block2(Z)      # Send the output of the reptition into block2
    return self.out(Z)      # Output the prediction

# Gradient Tape stuff
- this tape only works with variables(unless to do tf.watch).
- Can't do tape.gradient twice in a row so do persistent=True

In [25]:
def f(w1, w2):
  return 3 * w1 ** 2 + 2 * w1 * w2
  
# Use gradient tape to compute gradient at certain spots
w1, w2 = tf.Variable(5.), tf.Variable(3.)

with tf.GradientTape(persistent=True) as tape:
    z = f(w1, w2)

gradients = tape.gradient(z, [w1, w2])

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [31]:
c1, c2 = tf.constant(5.), tf.constant(3.)

with tf.GradientTape(persistent=True) as tape:
  z = f(c1, c2)

gradients = tape.gradient(z, [c1, c2]) # => [None, None]
print(gradients, "\n")

with tf.GradientTape(persistent=True) as tape:
  tape.watch(c1), tape.watch(c2)
  z = f(c1, c2)

gradients = tape.gradient(z, [c1, c2])
gradients

[None, None] 



[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

# Creating your own training loop

## Init everything

In [11]:
def batchify(x, y, batch_size):
  idx = np.random.randint(len(x), size=batch_size)
  return x[idx], y[idx]

In [2]:
l2_reg = tf.keras.regularizers.l2(0.01)
model = tf.keras.Sequential([
  tf.keras.layers.Dense(30, activation="elu",
                        kernel_initializer="he_normal", kernel_regularizer=l2_reg),
  tf.keras.layers.Dense(1, kernel_regularizer=l2_reg)                             
])

n_epochs=5
batch_size=32 
optimizer=tf.keras.optimizers.SGD(0.01)
n_steps=len(x_train) # Batch size
loss_fn=tf.keras.losses.mean_squared_error # Is a function
mean_loss=tf.keras.metrics.Mean() # Also a function
metrics=[tf.keras.metrics.MAE]

NameError: ignored

## Here is the custom training loop

In [None]:
for epoch in range(1, n_epochs + 1): # # of epochs we have
  for step in range(1, n_steps + 1): # # of batches we have within each epoch
    x_batch, y_batch = batchify(x_scaled_train, y_train) # Get some random batches
    with tf.GradientTape() as tape:
      y_pred = model(x_batch, training=True) # Predict using the model as a function
      main_loss = tf.reduced_mean(loss_fn(y_batch, y_pred)) # Find the error of this prediction
                                  # The defined loss_fn is a function so you can pass stuff into it.
      loss = tf.add_n([main_loss] + model.losses) # Add the main loss to the rest of the other losses(add_n allows sum across multiple tensors)
    gradients = tape.gradient(loss, model.trainable_variables) # Gets the gradient corresponding to a certain trainable_variable
    optimizer.apply_gradients(zip(gradients, model.trainable_variables)) # Applies gradients
    mean_loss(loss)
    for metric in metrics:
      metric(y_batch, y_pred)
    for metric in [mean_loss] + metrics:
      metric.reset_states()

# Custom training for MNIST Fashion

In [92]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full.astype(np.float32) / 255.
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test.astype(np.float32) / 255.

In [93]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [103]:
class CustomModel(keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.input_layer = keras.layers.Flatten(input_shape=(28,28))
        self.hidden1 = keras.layers.Dense(200, activation='selu', kernel_initializer="lecun_normal")
        self.hidden2 = keras.layers.Dense(100, activation='selu', kernel_initializer="lecun_normal")
        self.hidden3 = keras.layers.Dense(60, activation='selu', kernel_initializer="lecun_normal")
        self.output_layer = keras.layers.Dense(10, activation='softmax', kernel_initializer="glorot_uniform")
        self.dropout_layer = keras.layers.Dropout(rate=0.2)
        self.batchnorm = keras.layers.BatchNormalization()
    
    def call(self, input, training=None):
        input_layer = self.input_layer(input)
        input_layer = self.dropout_layer(input_layer)
        hidden1 = self.hidden1(input_layer)
        hidden2 = self.hidden2(hidden1)
        hidden3 = self.hidden3(hidden2)
        hidden3 = self.dropout_layer(hidden3, training=training)
        output_layer = self.output_layer(hidden3)
        return output_layer

In [104]:
model = CustomModel()
sgd = keras.optimizers.SGD(lr=0.01)
n_epochs = 5
batch_size = 32
n_steps = 1718 # batch_size
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [105]:
from tqdm.notebook import trange
from collections import OrderedDict
with trange(1, n_epochs + 1, desc="All epochs") as epochs:
    for epoch in epochs:
        with trange(1, n_steps + 1, desc="Epoch {}/{}".format(epoch, n_epochs)) as steps:
            for step in steps:
                X_batch, y_batch = batchify(X_train, y_train, 32)
                with tf.GradientTape(persistent=True) as tape:
                    y_pred = model(X_batch)
                    main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
                    loss = tf.add_n([main_loss] + model.losses)
                for i in range(1, 4):
                  layer = model.get_layer(index=i)
                  gradients = tape.gradient(loss, layer.trainable_variables)
                  optimizer.apply_gradients(zip(gradients, layer.trainable_variables))
                del tape
                for variable in model.variables:
                    if variable.constraint is not None:
                        variable.assign(variable.constraint(variable))                    
                status = OrderedDict()
                mean_loss(loss)
                status["loss"] = mean_loss.result().numpy()
                for metric in metrics:
                    metric(y_batch, y_pred)
                    status[metric.name] = metric.result().numpy()
                steps.set_postfix(status)
            y_pred = model(X_valid)
            status["val_loss"] = np.mean(loss_fn(y_valid, y_pred))
            status["val_accuracy"] = np.mean(keras.metrics.sparse_categorical_accuracy(
                tf.constant(y_valid, dtype=np.float32), y_pred))
            steps.set_postfix(status)
        for metric in [mean_loss] + metrics:
            metric.reset_states()

HBox(children=(FloatProgress(value=0.0, description='All epochs', max=5.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='Epoch 1/5', max=1718.0, style=ProgressStyle(description_w…





KeyboardInterrupt: ignored

In [97]:
model.summary()
model.compile(loss="sparse_categorical_crossentropy", optimizer=sgd, metrics=metrics)
model.evaluate(X_test, y_test)

Model: "custom_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            multiple                  0         
_________________________________________________________________
dense (Dense)                multiple                  157000    
_________________________________________________________________
dense_1 (Dense)              multiple                  20100     
_________________________________________________________________
dense_2 (Dense)              multiple                  6060      
_________________________________________________________________
dense_3 (Dense)              multiple                  610       
_________________________________________________________________
alpha_dropout (AlphaDropout) multiple                  0         
Total params: 183,770
Trainable params: 183,770
Non-trainable params: 0
________________________________________________

[0.3620394170284271, 0.8745999932289124]

In [88]:
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankleboot"]
x_new = X_test[:3]
y_proba = model.predict(x_new)
y_proba

array([[1.17738069e-04, 5.72795034e-06, 3.29853792e-05, 2.69197772e-05,
        4.46277454e-06, 9.91519727e-03, 7.25987047e-06, 8.83393548e-03,
        5.43656995e-07, 9.81055260e-01],
       [3.57591343e-05, 1.29459394e-07, 9.92985547e-01, 2.64103894e-09,
        1.21873431e-03, 2.84479462e-08, 5.75967785e-03, 5.50739898e-10,
        1.31832579e-07, 3.44835072e-09],
       [7.99594335e-10, 1.00000000e+00, 1.62243705e-10, 2.72764389e-09,
        3.18682719e-10, 1.47696860e-15, 6.07211642e-11, 1.18691675e-17,
        9.33207747e-11, 4.99854369e-14]], dtype=float32)

In [90]:
# Can't do this with a custom layers
y_pred = model.predict_classes(x_new)
np.array(class_names)[y_pred]

IndexError: ignored