In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers,models
from tensorflow.keras.datasets import fashion_mnist

In [None]:
#loading the mnist dataset for usage
(train_images,train_labels),(test_images,test_labels)=fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [None]:
#why need to do :Neural networks often perform better when the input data has a consistent and relatively small scale.
train_images=train_images/255.0
test_images=test_images/255.0

In [None]:
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(28, 28)),  # Correct input shape
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    return model

In [None]:
# The model is compiled using the Adam optimizer with a custom learning rate
#Trains the model for 5 epochs with a batch size of 128. It uses 20% of the training data as the validation set
#The function captures the validation accuracy
def train_model(learning_rate):
    model = create_model()

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    history = model.fit(train_images, train_labels, epochs=5, batch_size=128,
                        validation_split=0.2, verbose=0)

    # Debugging statement to ensure the accuracy is being captured
    #print(f"Training complete for learning rate {learning_rate}. Validation accuracy: {history.history['val_accuracy'][-1]}")

    val_accuracy = history.history['val_accuracy'][-1]
    return val_accuracy

In [None]:
#the binary_search_learning_rate function attempts to find the optimal learning model using binary search(the learning rate is a hyperparameter)
#this hyperparameter is responsibele for how much the change the model in response to the estimal error during training  phase
#the goal of the funtion is to find the learning rate that maximize the model validation accuracy
def binary_search_learning_rate(low, high, threshold=0.0001):
    best_learning_rate = 0
    best_accuracy = 0

    # Train once with low and high learning rates to avoid repeated computation
    accuracy_low = train_model(low)
    accuracy_high = train_model(high)

    while (high - low) > threshold:
        mid = (low + high) / 2

        # Train model with mid learning rate
        accuracy_mid = train_model(mid)
        #print(f"Learning rate: {mid:.5f}, Validation accuracy: {accuracy_mid:.4f}")

        # Update the best learning rate if mid performs better
        if accuracy_mid > best_accuracy:
            best_accuracy = accuracy_mid
            best_learning_rate = mid

        # Adjust bounds based on performance of mid
        if accuracy_mid > accuracy_low:
            # Move lower bound up, no need to retrain at new low
            low = mid
            accuracy_low = accuracy_mid  # Update low accuracy to mid accuracy
        else:
            # Move upper bound down, no need to retrain at new high
            high = mid
            accuracy_high = accuracy_mid  # Update high accuracy to mid accuracy

    return best_learning_rate, best_accuracy



In [None]:
optimal_lr, optimal_acc = binary_search_learning_rate(0.0001, 1.0)

print(f"Optimal Learning Rate: {optimal_lr}, Validation Accuracy: {optimal_acc}")

Learning rate: 0.50005, Validation accuracy: 0.1028
Learning rate: 0.25007, Validation accuracy: 0.1045
Learning rate: 0.12509, Validation accuracy: 0.5602
Learning rate: 0.06259, Validation accuracy: 0.7776
Learning rate: 0.03135, Validation accuracy: 0.8494
Learning rate: 0.04697, Validation accuracy: 0.8403
Learning rate: 0.03916, Validation accuracy: 0.7859
Learning rate: 0.03525, Validation accuracy: 0.8464
Learning rate: 0.03330, Validation accuracy: 0.8393
Learning rate: 0.03232, Validation accuracy: 0.8410
Learning rate: 0.03184, Validation accuracy: 0.8399
Learning rate: 0.03159, Validation accuracy: 0.8476
Learning rate: 0.03147, Validation accuracy: 0.8314
Learning rate: 0.03141, Validation accuracy: 0.8192
Optimal Learning Rate: 0.031346874999999996, Validation Accuracy: 0.8494166731834412
