In [1]:
import numpy as np
from sys import path
path.append('../tools')
from tools import logsig

In [2]:
def get_expected_output(X):
    return logsig(np.dot(logsig(X * np.array([10, 10]) + np.array([-5, 5])), np.array([1, 1])) + -1)

In [3]:
vec_get_y = np.vectorize(get_expected_output)

In [4]:
x_train = np.linspace(-2, 2, 82)
y_train = vec_get_y(x_train)

In [5]:
x_train = x_train.reshape(len(x_train), 1)
y_train = y_train.reshape(len(y_train), 1)

In [6]:
# Create the model with keras and tensorflow
# Adapted from : https://keras.io/guides/custom_train_step_in_tensorflow/

# Imports. Need to do this environment stuff for some reason
import os

os.environ["KERAS_BACKEND"] = "tensorflow"

import tensorflow as tf
import keras
from keras import layers

In [48]:
# Create the model
class CustomModel(keras.Model):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.search_direction = None
        self.f_search_direction = None
        self.beta = None
        self.old_f_gradients = None

    def train_step(self, data):
        self.optimizer.learning_rate.assign(.1)
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        x, y = data
        
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass
            # Compute the loss value
            # (the loss function is configured in `compile()`)
            loss = self.compute_loss(y=y, y_pred=y_pred)

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        f_gradients = tf.concat([tf.reshape(gradient, [-1]) for gradient in gradients], 0)

        if self.search_direction is None:
            self.search_direction = [- x for x in gradients]
            self.f_search_direction = -f_gradients
            self.old_f_gradients = f_gradients
            print(f"{loss} Search direction: None")
    
        if self.search_direction is not None:
            gradient_delta = f_gradients - self.old_f_gradients
            self.beta = tf.tensordot(gradient_delta, f_gradients, 1) / tf.tensordot(gradient_delta, self.f_search_direction, 1)
            self.f_search_direction = -f_gradients + self.beta * self.f_search_direction
            print(f"{loss} Search direction: Not None")

        # Find interval for the best learning rate
        if self.beta is not None:
            # Interval search
            pass
        
        # Update weights
        self.optimizer.apply([-x for x in self.search_direction], trainable_vars)

        # Update metrics (includes the metric that tracks the loss)
        for metric in self.metrics:
            if metric.name == "loss":
                metric.update_state(loss)
            else:
                metric.update_state(y, y_pred)

        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

In [49]:
# Construct and compile an instance of CustomModel
inputs = keras.Input(shape=(1,))
x = keras.layers.Dense(2)(inputs)
outputs = keras.layers.Dense(1)(x)
model = CustomModel(inputs, outputs)
model.compile(optimizer="SGD", loss="mse")

model.fit(x_train, y_train, epochs=3)

Epoch 1/3
Tensor("compile_loss/add:0", shape=(), dtype=float32) Search direction: None
Tensor("compile_loss/add:0", shape=(), dtype=float32) Search direction: Not None
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 797us/step - loss: 0.8108
Epoch 2/3
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0251
Epoch 3/3
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 717us/step - loss: 0.0041


<keras.src.callbacks.history.History at 0x300608190>