<a href="https://colab.research.google.com/github/AndrewDavidRatnam/HandsonWorkingML/blob/main/CustomModels_and_CustomTraining_with_tensorFlow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports and Checks

In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import time
from pathlib import Path
import sys
import pandas as pd

tf.random.set_seed(42)
np.random.seed(42)

if not (tf.config.list_physical_devices('TPU') or tf.config.list_physical_devices('GPU') ):
    print("No GPU was detected. Neural nets can be very slow without a GPU.")
    if "google.colab" in sys.modules:
        print("Go to Runtime > Change runtime and select a GPU hardware "
              "accelerator.")
    if "kaggle_secrets" in sys.modules:
        print("Go to Settings > Accelerator and select GPU.")
if tf.config.list_physical_devices("TPU"):
  print("LEZZZZZZ GOOOO  TPU") #ALL GOOD
else:
  print("Using GPU")


No GPU was detected. Neural nets can be very slow without a GPU.
Go to Runtime > Change runtime and select a GPU hardware accelerator.
Using GPU


In [6]:
train = pd.read_csv("/content/sample_data/california_housing_train.csv")
test = pd.read_csv("/content/sample_data/california_housing_test.csv")


In [7]:
train.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-114.31,34.19,15.0,5612.0,1283.0,1015.0,472.0,1.4936,66900.0
1,-114.47,34.4,19.0,7650.0,1901.0,1129.0,463.0,1.82,80100.0
2,-114.56,33.69,17.0,720.0,174.0,333.0,117.0,1.6509,85700.0
3,-114.57,33.64,14.0,1501.0,337.0,515.0,226.0,3.1917,73400.0
4,-114.57,33.57,20.0,1454.0,326.0,624.0,262.0,1.925,65500.0


In [8]:
from sklearn.model_selection import train_test_split

X_train_full = train.drop("median_house_value", axis=1)
y_train_full = train["median_house_value"]

X_test = test.drop("median_house_value", axis=1)
y_test = test["median_house_value"]

X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

In [9]:
#convert dataframe/series to numpy
X_train = X_train.values
X_valid = X_valid.values
X_test = X_test.values
y_train = y_train.values
y_valid = y_valid.values
y_test = y_test.values

## Custom Layer Normalization

In [13]:
class LayerNormalization(tf.keras.layers.Layer):
  def __init__(self, eps=1e-3, **kwargs):
    super().__init__(**kwargs)
    self.eps = eps

  def build(self, batch_input_shape):
    self.alpha = self.add_weight(
        name="alpha",
        shape=batch_input_shape[-1:],
        initializer="ones")
    self.beta = self.add_weight(
        name="beta",
        shape=batch_input_shape[-1:],
        initializer="zeros")

  def call(self, X):
    mean, variance = tf.nn.moments(X, axes=-1, keepdims=True)
    return self.alpha * (X - mean) / tf.sqrt(variance + self.eps) + self.beta, mean, variance #changed this

  def get_config(self):
    base_config = super().get_config()
    return {**base_config, "eps":self.eps}


In [14]:
X = X_train.astype(np.float32)

custom_layer_norm = LayerNormalization()
keras_layer_norm = tf.keras.layers.LayerNormalization()

tf.reduce_mean(tf.keras.losses.mean_absolute_error(keras_layer_norm(X), custom_layer_norm(X)))

<tf.Tensor: shape=(), dtype=float32, numpy=4.9642363e-08>

In [15]:
#trust is in short supply these days and also double check
tf.keras.utils.set_random_seed(42)
random_alpha = np.random.rand(X.shape[-1])
random_beta = np.random.rand(X.shape[-1])

custom_layer_norm.set_weights([random_alpha, random_beta])
keras_layer_norm.set_weights([random_alpha, random_beta])

tf.reduce_mean(tf.keras.losses.mean_absolute_error(keras_layer_norm(X), custom_layer_norm(X)))

<tf.Tensor: shape=(), dtype=float32, numpy=2.9002763e-08>

## Custom Training Loop in Fashion MNIST

In [23]:
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full.astype(np.float32)/255.
X_valid, X_train = X_train_full[:5_000], X_train_full[5_000:]
y_valid, y_train = y_train_full[:5_000], y_train_full[5_000:]
X_test = X_test.astype(np.float32)/255.


In [18]:
X_train.shape

(55000, 28, 28)

In [19]:
# from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()

# X_train = scaler.fit_transform(tf.keras.layers.Flatten(input_shape=[28, 28])(X_train))
# X_valid = scaler.transform(tf.keras.layers.Flatten(input_shape=[28, 28])(X_valid))
# X_test = scaler.transform(tf.keras.layers.Flatten(input_shape=[28, 28])(X_test))

In [4]:
tf.keras.utils.set_random_seed(42)

In [20]:
X_train.shape

(55000, 784)

In [24]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=[28, 28]), #already flattened while scaling try out
    tf.keras.layers.Dense(300, activation="relu"),
    tf.keras.layers.Dense(10, activation="softmax")
])

In [None]:
def random_batch(X, y, batch_size=256):
  idx = np.random.randint(len(X), size=batch_size)
  return X[idx], y[idx]


In [39]:
def print_status_bar(step, total, loss, metrics=None):
  metrics = " - ".join([f"{m.name}: {m.result():.4f}" for m in loss + (metrics or [])]) #loss is a list of losses
  end = "" if step < total else "\n"
  print(f"\r{step}/{total} - " + metrics, end=end)

In [40]:
tf.keras.utils.set_random_seed(42)


In [47]:
len(X_train)// 32

1718

In [41]:
n_epochs = 5
batch_size = 256 # was actually 32 that's why 1718 steps lol
n_steps = len(X_train) // batch_size
optimizer1 = tf.keras.optimizers.Nadam(learning_rate=1e-2)
optimizer2 = tf.keras.optimizers.Nadam(learning_rate=1e-3)
loss_fn = tf.keras.losses.sparse_categorical_crossentropy
mean_loss = tf.keras.metrics.Mean()
valid_loss = tf.keras.metrics.Mean()
metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]

In [46]:
for epoch in range(1, n_epochs + 1):
  print(f"Epoch {epoch}/{n_epochs}")
  for step in range(1, n_steps + 1):
    X_batch, y_batch = random_batch(X_train, y_train, batch_size=256)
    with tf.GradientTape() as tape:
      y_pred = model(X_batch, training=True) # can this be outside grad tape?
      main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
      loss = tf.add_n([main_loss] + model.losses)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer1.apply_gradients(zip(gradients, model.trainable_variables))
    # TODO
    #upper layers and lower layers need a differnt learning rates

    for variable in model.variables:
      if variable.constraint is not None:
        variable.assign(variable.constraint(variable))

    mean_loss(loss)
    for metric in metrics:
      metric(y_batch, y_pred)

    print_status_bar(step, n_steps, [mean_loss], metrics)

  for metric in [mean_loss, valid_loss] + metrics:
    metric.reset_states()


Epoch 1/5
1718/1718 - mean: 0.3398 - sparse_categorical_accuracy: 0.8934
Epoch 2/5
1718/1718 - mean: 0.2130 - sparse_categorical_accuracy: 0.9203
Epoch 3/5
1718/1718 - mean: 0.1924 - sparse_categorical_accuracy: 0.9277
Epoch 4/5
1718/1718 - mean: 0.1744 - sparse_categorical_accuracy: 0.9344
Epoch 5/5
1718/1718 - mean: 0.1566 - sparse_categorical_accuracy: 0.9412


In [33]:
X_valid_batch, y_valid_batch = random_batch(X_valid, y_valid)
v_loss = tf.reduce_mean(loss_fn(y_valid_batch, model(X_valid_batch)))

In [37]:
valid_loss(tf.add_n([v_loss]))

<tf.Tensor: shape=(), dtype=float32, numpy=5.9231863>