Consider a scenario where you're tasked with predicting housing prices using linear regression. You've implemented a gradient descent-based approach for training your model. Reflect on the impact of various hyperparameters, such as learning rate and maximum iterations, on the training process and the quality of predictions. Discuss how you would experiment with these hyperparameters to optimize your model's performance, considering factors like convergence speed and prediction accuracy

In [5]:
import numpy as np
import tensorflow as tf

2024-03-04 09:48:02.394121: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # Initialize parameters
        self.W1 = tf.Variable(tf.random.normal([hidden_size, input_size], stddev=0.01))
        self.b1 = tf.Variable(tf.zeros([hidden_size, 1]))
        self.W2 = tf.Variable(tf.random.normal([output_size, hidden_size], stddev=0.01))
        self.b2 = tf.Variable(tf.zeros([output_size, 1]))

    def forward_propagation(self, X):
        # Forward pass
        Z1 = tf.matmul(self.W1, X) + self.b1
        A1 = tf.nn.tanh(Z1)
        Z2 = tf.matmul(self.W2, A1) + self.b2
        A2 = tf.nn.sigmoid(Z2)
        return A2

    def backward_propagation(self, X, Y, learning_rate):
        with tf.GradientTape() as tape:
            predictions = self.forward_propagation(X)
            loss = self.cross_entropy_loss(Y, predictions)
        
        gradients = tape.gradient(loss, [self.W1, self.b1, self.W2, self.b2])
        optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
        optimizer.apply_gradients(zip(gradients, [self.W1, self.b1, self.W2, self.b2]))

    def train(self, X, Y, num_epochs, learning_rate):
        for epoch in range(num_epochs):
            self.backward_propagation(X, Y, learning_rate)
            
            if epoch % 100 == 0:
                predictions = self.forward_propagation(X)
                loss = self.cross_entropy_loss(Y, predictions)
                print(f'Epoch {epoch}, Loss: {loss}')

    def cross_entropy_loss(self, Y, A):
        m = Y.shape[1]
        loss = -tf.reduce_mean(Y * tf.math.log(A) + (1 - Y) * tf.math.log(1 - A))
        return loss

In [None]:
nn=NeuralNetwork(2, 2, 1)