## References 

- https://www.tensorflow.org/tutorials/quickstart/advanced
- https://www.tensorflow.org/api_docs/python/tf/GradientTape

## Building

In [1]:
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.metrics import Mean, BinaryAccuracy

In [2]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

n_features = 10
n_samples = 50_000

temp_X, temp_y = make_classification(
    n_samples=n_samples, n_classes=2, n_features=n_features, class_sep=1, random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(
    temp_X.astype("float32"), temp_y.reshape(-1, 1).astype("float32"), stratify=temp_y, test_size=0.2
)


X_train.shape, y_train.shape, X_test.shape, y_test.shape

((40000, 10), (40000, 1), (10000, 10), (10000, 1))

In [3]:
BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = 1000
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))\
    .shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(BATCH_SIZE)

In [4]:
class NeuralNetwork(Model):
    def __init__(self, n_inputs: int):
        super().__init__()
        self.input_layer = Dense(units=n_inputs, activation='relu')
        self.relu = Dense(units=n_inputs, activation='relu')
        self.output_layer = Dense(units=1, activation='sigmoid')

    def call(self, x):
        x1 = self.input_layer(x)
        x2 = self.relu(x1)
        x3 = self.relu(x2)
        output = self.output_layer(x3)
        return output
    

model = NeuralNetwork(n_inputs=n_features)
loss_fn = BinaryCrossentropy(from_logits=False)
# optimizer = SGD(learning_rate=0.01)
optimizer = Adam()

train_loss = Mean(name='train_loss')
train_accuracy = BinaryAccuracy(name='train_accuracy')

test_loss = Mean(name='test_loss')
test_accuracy = BinaryAccuracy(name='test_accuracy')


def train_step(model, dataset, optimizer, loss_fn):
    for x, y in dataset:
        with tf.GradientTape() as tape:
            predictions = model(x)
            loss = loss_fn(y, predictions)
            gradient = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradient, model.trainable_variables))

            train_loss(loss)
            train_accuracy(y, predictions)

def test_step(model, dataset):
    for x, y in dataset:
        predictions = model(x, training=False)
        loss = loss_fn(y, predictions)


        test_loss(loss)
        test_accuracy(y, predictions)

In [5]:
epochs = 5

for epoch in range(epochs):

    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    train_step(model, train_dataset, optimizer, loss_fn)
    test_step(model, test_dataset)

    print(
        f'Epoch {epoch + 1}, '
        f'Loss: {train_loss.result()}, '
        f'Accuracy: {train_accuracy.result() * 100}, '
        f'Test Loss: {test_loss.result()}, '
        f'Test Accuracy: {test_accuracy.result() * 100}'
    )

Epoch 1, Loss: 0.26982495188713074, Accuracy: 89.375, Test Loss: 0.15739412605762482, Test Accuracy: 95.59000396728516
Epoch 2, Loss: 0.14098316431045532, Accuracy: 96.10499572753906, Test Loss: 0.13387630879878998, Test Accuracy: 96.43000030517578
Epoch 3, Loss: 0.1275002360343933, Accuracy: 96.67749786376953, Test Loss: 0.12585057318210602, Test Accuracy: 96.83999633789062
Epoch 4, Loss: 0.12096977233886719, Accuracy: 96.88249969482422, Test Loss: 0.12004249542951584, Test Accuracy: 96.95999908447266
Epoch 5, Loss: 0.11742699146270752, Accuracy: 96.99749755859375, Test Loss: 0.11627758294343948, Test Accuracy: 97.0999984741211


In [6]:
x = tf.constant(3.0)
with tf.GradientTape() as g:
    g.watch(x)
    y = x * x * x
    
    dy_dx = g.gradient(y, x)
    print(dy_dx)

tf.Tensor(27.0, shape=(), dtype=float32)
