In [1]:
import numpy as np
from sys import path
path.append('../tools')
from tools import logsig

In [2]:
def get_expected_output(X):
    return logsig(np.dot(logsig(X * np.array([10, 10]) + np.array([-5, 5])), np.array([1, 1])) + -1)

In [3]:
vec_get_y = np.vectorize(get_expected_output)

In [4]:
x_train = np.linspace(-2, 2, 82)
y_train = vec_get_y(x_train)

In [5]:
x_train = x_train.reshape(len(x_train), 1)
y_train = y_train.reshape(len(y_train), 1)

In [6]:
# Create the model with keras and tensorflow
# Adapted from : https://keras.io/guides/custom_train_step_in_tensorflow/

# Imports. Need to do this environment stuff for some reason
import os

os.environ["KERAS_BACKEND"] = "tensorflow"

import tensorflow as tf
import keras
from keras import layers

In [54]:
# Create the model
class CustomModel(keras.Model):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.search_direction = None
        self.f_search_direction = None
        self.beta = None
        self.old_f_gradients = None
    
    def update_weights(self, lr, weights):
        for i in range(len(self.trainable_variables)):
            self.trainable_variables[i].assign(weights[i] + lr*self.search_direction[i])

    @tf.function
    def train_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        x, y = data

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass
            # Compute the loss value
            # (the loss function is configured in `compile()`)
            loss = self.compute_loss(y=y, y_pred=y_pred)

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        f_gradients = tf.concat([tf.reshape(gradient, [-1]) for gradient in gradients], 0)

        # Get the search direction
        if self.search_direction is None:
            self.search_direction = [- x for x in gradients]
            self.f_search_direction = -f_gradients
            self.old_f_gradients = f_gradients
    
        if self.search_direction is not None:
            gradient_delta = f_gradients - self.old_f_gradients
            self.beta = tf.tensordot(gradient_delta, f_gradients, 1) / tf.tensordot(gradient_delta, self.f_search_direction, 1)
            self.f_search_direction = -f_gradients + self.beta * self.f_search_direction
            
            # Unflatten the search direction
            # Step 1: get the amount of variables needed
            # for weight/bias matrix
            shapes = [x.shape for x in gradients]
            sizes = []
            for shape in shapes:
                prod = 1
                for size in shape:
                    prod *= size
                sizes.append(prod)
            
            # Step 2: create the unflattened tensors
            # and replace in search direction
            start_slice = 0
            end_slice = 0
            for i in range(len(shapes)):
                end_slice += sizes[i]
                self.search_direction[i] = tf.reshape(self.f_search_direction[start_slice:end_slice], shapes[i])
                start_slice += sizes[i]
            
        # Find interval for the best learning rate
        if self.beta is not None:
            # Interval search
            # Initial guess for learning rate
            a = 0.01
            # Update the weights and evaluate loss
            self.update_weights(a, trainable_vars)
            e_a = self.compute_loss(y=y, y_pred=self(x, training=True))

            # Get second bound of initial guess
            b = 2*a
            self.update_weights(b, trainable_vars)
            e_b = self.compute_loss(y=y, y_pred=self(x, training=True))

            for i in tf.range(10000):
                # Update the variables
                # Keep track of furthest back
                old_a = a
                old_e_a = e_a
                # Middle bound
                a = b
                e_a = e_b
                # Furthest
                b = 2*b
                self.update_weights(b, trainable_vars)
                e_b = self.compute_loss(y=y, y_pred=self(x, training=True))

                # Check whether the error has increased
                if tf.math.greater(e_b, e_a):
                    a = old_a
                    e_a = old_e_a
                    # We found the interval, break
                    print('Interval found')
                    break
                    
        # Update weights
        # self.optimizer.apply([-x for x in self.search_direction], trainable_vars)

        # Update metrics (includes the metric that tracks the loss)
        for metric in self.metrics:
            if metric.name == "loss":
                metric.update_state(loss)
            else:
                metric.update_state(y, y_pred)

        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

In [55]:
# Construct and compile an instance of CustomModel
inputs = keras.Input(shape=(1,))
x = keras.layers.Dense(2)(inputs)
outputs = keras.layers.Dense(1)(x)
model = CustomModel(inputs, outputs)
model.compile(optimizer="SGD", loss="mse", metrics=['accuracy', 'mse'])

model.fit(x_train, y_train, epochs=3)

Epoch 1/3
Interval found
Interval found


InaccessibleTensorError: <tf.Tensor 'concat:0' shape=(7,) dtype=float32> is out of scope and cannot be used here. Use return values, explicit Python locals or TensorFlow collections to access it.
Please see https://www.tensorflow.org/guide/function#all_outputs_of_a_tffunction_must_be_return_values for more information.

<tf.Tensor 'concat:0' shape=(7,) dtype=float32> was defined here:
    File "<frozen runpy>", line 198, in _run_module_as_main
    File "<frozen runpy>", line 88, in _run_code
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/ipykernel_launcher.py", line 17, in <module>
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/traitlets/config/application.py", line 1043, in launch_instance
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 736, in start
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 195, in start
    File "/opt/homebrew/Cellar/python@3.11/3.11.4_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/base_events.py", line 607, in run_forever
    File "/opt/homebrew/Cellar/python@3.11/3.11.4_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once
    File "/opt/homebrew/Cellar/python@3.11/3.11.4_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/events.py", line 80, in _run
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 516, in dispatch_queue
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 505, in process_one
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 412, in dispatch_shell
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 740, in execute_request
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 422, in do_execute
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 546, in run_cell
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3009, in run_cell
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3064, in _run_cell
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3269, in run_cell_async
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3448, in run_ast_nodes
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
    File "/var/folders/0k/bydhdy6d4rd5bgm5xr_s4tdr0000gn/T/ipykernel_1926/3632477776.py", line 8, in <module>
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 325, in fit
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 118, in one_step_on_iterator
    File "/Users/reid/dev/PythonCode/tm/env/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 106, in one_step_on_data
    File "/var/folders/0k/bydhdy6d4rd5bgm5xr_s4tdr0000gn/T/ipykernel_1926/3431290895.py", line 29, in train_step

The tensor <tf.Tensor 'concat:0' shape=(7,) dtype=float32> cannot be accessed from FuncGraph(name=one_step_on_data, id=12924314944), because it was defined in FuncGraph(name=train_step, id=12901645888), which is out of scope.