In [2]:
import tensorflow as tf
import numpy as np
import random
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

# Load and preprocess a SMALLER subset of MNIST
(train_data, train_labels), (val_data, val_labels) = mnist.load_data()
train_data, train_labels = train_data[:5000], train_labels[:5000]  # Use only 5K samples
val_data, val_labels = val_data[:1000], val_labels[:1000]  # Use only 1K samples

train_data, val_data = train_data / 255.0, val_data / 255.0
train_data = np.expand_dims(train_data, axis=-1)
val_data = np.expand_dims(val_data, axis=-1)

train_labels = keras.utils.to_categorical(train_labels, 10)
val_labels = keras.utils.to_categorical(val_labels, 10)

# Define RL controller (RNN-based policy network)
class NASController(tf.keras.Model):
    def __init__(self, action_size, hidden_size=32):  # Reduce hidden size
        super(NASController, self).__init__()
        self.lstm = layers.LSTM(hidden_size, return_sequences=True, return_state=True)
        self.dense = layers.Dense(action_size, activation="softmax")

    def call(self, inputs, states=None):
        if states is None:
            states = [tf.zeros((1, self.lstm.units)), tf.zeros((1, self.lstm.units))]
        lstm_out, state_h, state_c = self.lstm(inputs, initial_state=states)
        action_probs = self.dense(lstm_out)
        return action_probs, (state_h, state_c)

# Function to randomly sample an architecture
def sample_architecture():
    return {
        'num_layers': random.choice([2, 3, 4]),
        'layers': [random.choice([32, 64, 128]) for _ in range(random.choice([2, 3, 4]))],
        'activation': random.choice(['relu', 'tanh']),
        'optimizer': random.choice(['adam', 'sgd']),
        'batch_size': random.choice([32, 64]),
        'epochs': random.choice([5, 10]),
        'layer_type': random.choice(['dense', 'lstm', 'conv'])
    }

# Build model based on sampled architecture
def build_model(architecture, input_shape=(28, 28, 1)):
    model = keras.Sequential()
    model.add(layers.Input(shape=input_shape))
    
    for units in architecture['layers']:
        model.add(layers.Dense(units, activation=architecture['activation']))
    
    model.add(layers.Flatten())
    model.add(layers.Dense(10, activation='softmax'))
    return model

# Evaluate an architecture
def evaluate_architecture(architecture, train_data, train_labels, val_data, val_labels):
    model = build_model(architecture)
    model.compile(optimizer=architecture['optimizer'], loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(train_data, train_labels, epochs=architecture['epochs'], verbose=0, batch_size=architecture['batch_size'])
    _, val_acc = model.evaluate(val_data, val_labels, verbose=0)
    return val_acc

In [3]:
# RL-based NAS optimization (REINFORCE)
optimizer = keras.optimizers.Adam(learning_rate=0.01)
controller = NASController(action_size=4)

best_architecture, best_accuracy = None, 0.0

for episode in range(5):  # Reduce NAS iterations from 10 to 5
    architecture = sample_architecture()
    reward = evaluate_architecture(architecture, train_data, train_labels, val_data, val_labels)

    if reward > best_accuracy:
        best_accuracy, best_architecture = reward, architecture

    with tf.GradientTape() as tape:
        action_probs, _ = controller(tf.zeros((1, 1, 10)))
        loss = -tf.reduce_sum(tf.math.log(action_probs + 1e-10)) * reward  # REINFORCE loss

    grads = tape.gradient(loss, controller.trainable_variables)
    optimizer.apply_gradients(zip(grads, controller.trainable_variables))

    print(f"Episode {episode+1}: Accuracy = {reward:.4f}")

print(f"\nBest Architecture: {best_architecture}, Accuracy: {best_accuracy:.4f}")


Episode 1: Accuracy = 0.8830
Episode 2: Accuracy = 0.8910
Episode 3: Accuracy = 0.8800
Episode 4: Accuracy = 0.8720
Episode 5: Accuracy = 0.8800

Best Architecture: {'num_layers': 2, 'layers': [64, 64], 'activation': 'relu', 'optimizer': 'sgd', 'batch_size': 32, 'epochs': 10, 'layer_type': 'dense'}, Accuracy: 0.8910
