Environment Setup

In [9]:
import tensorflow as tf # Build and train ML models
from tensorflow.keras import layers, models # Construct models and neural networks
import numpy as np # Scientific computing; numerical operations and arrays
import random # Random sampling or shuffling
import math # Optimization and transformation calculations
import matplotlib.pyplot as plt # Data Visualization

import warnings
warnings.filterwarnings('ignore')

Load and Preprocess MNIST Dataset

In [10]:
# Load MNIST dataset and split into training and test sets
(train_images, train_labels), (test_images, test_labels), = tf.keras.datasets.mnist.load_data()

# Reshape training images and normalize pixel values
train_images = train_images.reshape((60000, 28, 28, 1)).astype("float32") / 255

# Reshape test images and normalize pixel values
test_images = test_images.reshape((10000, 28, 28, 1)).astype("float32") / 255

# Convert training labels to one-hot encoded format
train_labels_cat = tf.keras.utils.to_categorical(train_labels, 10)

# Convert test labels to one-hot encoded format
test_labels_cat = tf.keras.utils.to_categorical(test_labels, 10)

Build and Train Model

In [11]:
# Simple CNN Model
model = models.Sequential([
    # Convolutional layer with 32 filters, 3x3 kernel, ReLU activation
    layers.Conv2D(32, (3, 3), activation = "relu", input_shape = [28, 28, 1]),
    layers.MaxPooling2D((2, 2)), # Max pooling layer with 2x2 pool size
    layers.Flatten(), # Flatten the output from the previous layer
    layers.Dense(64, activation = "relu"), # Fully connected layer with 64 units and ReLU activation
    layers.Dense(10, activation = "softmax") # Output layer with 10 units (one for each class) and softmax activation
])
# Compile the model with Adam optimizer, categorical crossentropy loss, and accuracy metric
model.compile(optimizer = "adam", loss = "categorical_crossentropy", metrics = ["accuracy"])
# Train the model for 1 epoch with a batch size of 64 and 10% validation split
model.fit(train_images, train_labels_cat, epochs = 1, batch_size = 64, validation_split = 0.1)

[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.8568 - loss: 0.4660 - val_accuracy: 0.9780 - val_loss: 0.0816


<keras.src.callbacks.history.History at 0x1a296ac34d0>

Reinforcement Learning Environment Component Setup

In [12]:
# Rewards matrix and Q-table for RL
R = np.array([[0, 1], [1, 0]])
Q = np.zeros_like(R, dtype = float)
gamma = 0.8 # Discount factor for future rewards

CRLMM-inspired DL/RL Model

PE07 Update: track correct / incorrect predictions

In [13]:
# Logistic sigmoid function for CRLMM-inspired adjustment
def sigmoid(x): return 1 / (1 + np.exp(-x))

# Predict function + CRLMM update
def predict_and_reward(image, true_label):
  pred_probs = model.predict(image[np.newaxis, ...]) [0] # Predict probabilities for each class
  action = np.argmax(pred_probs) # Choose action with highest probability
  reward = 1 if action == true_label else -1 # 1 for correct prediction, -1 for incorrect
  Q[0, action % 2] += gamma * reward # Update Q-table with reward


  # Output the action, true label, reward, and Q-table
  return action, true_label, reward, Q

### PE07 Revisions

Tracking Variables

In [14]:
correct_preds = 0
incorrect_preds = 0

Simulation Loop

In [15]:
# Run simulation
for i in range(100):
  # Randomly select an index from the test set
  idx = random.randint(0, len(test_images) - 1)
  # Predict and update Q-table
  action, true_label, reward, Q = predict_and_reward(test_images[idx], test_labels[idx])
  # Output Results
  print(f"Predicted: {action}, True: {true_label}, Reward: {reward}, Q-table: {Q}")
  # Count correctness
  correct_preds += 1 if reward == 1 else 0
  incorrect_preds += 1 if reward == -1 else 0

total_preds = correct_preds + incorrect_preds

print(f"Correct {correct_preds}, Incorrect: {incorrect_preds}, Accuracy {(correct_preds / total_preds):.2%}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Predicted: 5, True: 5, Reward: 1, Q-table: [[0.  0.8]
 [0.  0. ]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Predicted: 1, True: 1, Reward: 1, Q-table: [[0.  1.6]
 [0.  0. ]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Predicted: 2, True: 2, Reward: 1, Q-table: [[0.8 1.6]
 [0.  0. ]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Predicted: 6, True: 6, Reward: 1, Q-table: [[1.6 1.6]
 [0.  0. ]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Predicted: 7, True: 7, Reward: 1, Q-table: [[1.6 2.4]
 [0.  0. ]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Predicted: 8, True: 8, Reward: 1, Q-table: [[2.4 2.4]
 [0.  0. ]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Predicted: 6, True: 6, Reward: 1, Q-table: [[3.2 2.4]
 [0.  0. ]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━