<a href="https://colab.research.google.com/github/MathMayhem/4644_final/blob/main/DeepGeneticAlgorithm/Neural_Network_Score_Prediction.ipynb" target="_parent">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [None]:
# Setup the API Server

# Installing Dependencies
!sudo apt-get update -qq
!sudo apt-get install -y build-essential libmicrohttpd-dev libjson-c-dev xz-utils
!pip install requests -q

# Installing API Server
!wget https://github.com/RusDoomer/SVOBODA/archive/refs/tags/v0.1.tar.gz -O svoboda.tar.gz -q
!tar -xzf svoboda.tar.gz
!cd SVOBODA-0.1 && make
!wget https://colemak.com/pub/corpus/iweb-corpus-samples-cleaned.txt.xz -q
!unxz iweb-corpus-samples-cleaned.txt.xz
!mv iweb-corpus-samples-cleaned.txt SVOBODA-0.1/data/english/corpora/shai.txt

# Starting the API Server
import subprocess
import os
import time
import requests
import signal

# --- Configuration ---
server_directory = 'SVOBODA-0.1'
executable_name = './svoboda'
api_url = "http://localhost:8888/"
startup_timeout_seconds = 60

# --- Process Handling ---
server_process = None

if not os.path.exists(os.path.join(server_directory, executable_name)):
    print("Error: Server executable not found. Please run the setup cell first.")
else:
    print("Starting C server in the background...")
    # Launch the server in the background
    server_process = subprocess.Popen(
        [executable_name],
        cwd=server_directory,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True
    )

    # --- Verification Loop ---
    server_ready = False
    start_time = time.time()
    while time.time() - start_time < startup_timeout_seconds:
        # Check if the process has already terminated
        if server_process.poll() is not None:
            print(f"Error: Server process terminated unexpectedly with exit code {server_process.poll()}.")
            # Print stderr for debugging
            for line in server_process.stderr.readlines():
                print(f"[SERVER_STDERR] {line.strip()}")
            break

        # Attempt to connect
        try:
            response = requests.get(api_url, timeout=1)
            print(f"\nSuccess: Server is responsive at {api_url}.")
            server_ready = True
            break
        except requests.exceptions.ConnectionError:
            print(".", end="")
            time.sleep(1)

    if not server_ready and server_process.poll() is None:
        print("\nError: Server did not become responsive within the timeout period.")
        print("The process is still running but may be stuck. Terminating.")
        server_process.terminate()
    elif server_ready:
        print(f"Server is running with PID: {server_process.pid}")

In [None]:
# Required Imports for the Neural Network

import pickle
import os
import random
import copy
import numpy as np
import time
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, optimizers

# The symbols whose keyboard positions we are optimizing
# Layouts will be represented as lists of indices from this symbol set
symbols = "abcdefghijklmnopqrstuvwxyz;,./"

In [None]:
# Mount your drive to use our data files or generate your own
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# The Basic Unit of our Neural Network. This defines a custom Fully Connected Layer with a Leaky ReLU activation function and skip connection around the entire layer.
class CustomLayer(layers.Layer):
    """
    A custom Keras layer that combines a Dense layer, Leaky ReLU activation,
    and a conditional skip connection.

    The skip connection adds the original input to the output of the
    Dense -> Leaky ReLU sequence ONLY if the last dimension of the input
    matches the number of units in the Dense layer.
    """
    def __init__(self, units, negative_slope=0.5, l2_reg=0.0, **kwargs):
        """
        Initializes the custom layer.

        Args:
            units (int): The number of output units for the Dense layer.
            alpha (float): The negative slope coefficient for the Leaky ReLU activation.
            **kwargs: Additional keyword arguments to pass to the base Layer class.
        """
        super().__init__(**kwargs)
        self.units = units
        self.negative_slope = negative_slope
        self.l2_reg = l2_reg

        kernel_regularizer = regularizers.l2(self.l2_reg)


        # Initialize the Dense layer and Leaky ReLU activation
        # These will create their weights when first called (or in build if explicitly built)
        self.dense_layer = layers.Dense(self.units, kernel_regularizer=kernel_regularizer, name="dense_part")
        self.leaky_relu_activation = layers.LeakyReLU(negative_slope=self.negative_slope, name="leaky_relu_part")

    def build(self, input_shape):
        """
        Builds the layer's weights. This method is called automatically
        the first time the layer is run.

        Args:
            input_shape (tf.TensorShape): The shape of the input tensor.
                                          Typically (batch_size, ..., input_dim).
        """
        # The dense layer's weights are implicitly built when it's called
        # with an input of known shape. We explicitly build it here to
        # ensure its weights are created before the first call, which can
        # be useful for inspection or if the layer is part of a larger model
        # that needs to know all its weights upfront.
        self.dense_layer.build(input_shape)

        # Determine if a skip connection is possible based on input and output dimensions.
        # The skip connection adds the original input to the processed output.
        # For this to work, the last dimension of the input must match the number of units.
        self.can_skip_connect = (input_shape[-1] == self.units)
        if not self.can_skip_connect:
            print(f"Warning: Skip connection not possible for layer '{self.name}'.")
            print(f"Input last dimension ({input_shape[-1]}) does not match Dense units ({self.units}).")
            print("The layer will function as a standard Dense + Leaky ReLU.")

        super().build(input_shape) # Call the base class's build method

    def call(self, inputs):
        """
        Defines the forward pass logic of the layer.

        Args:
            inputs (tf.Tensor): The input tensor to the layer.

        Returns:
            tf.Tensor: The output tensor after applying Dense, Leaky ReLU,
                       and the conditional skip connection.
        """
        # Step 1: Pass through the Dense layer
        dense_output = self.dense_layer(inputs)

        # Step 2: Apply Leaky ReLU activation
        activated_output = self.leaky_relu_activation(dense_output)

        # Step 3: Implement the conditional skip connection
        # We check the actual shape of the `inputs` tensor at call time.
        # Note: input_shape from build might have None for batch size,
        # but inputs.shape will have the concrete batch size.
        # We only care about the feature dimension (last dimension).
        if inputs.shape[-1] == self.units:
            # If dimensions match, add the original input to the activated output
            output = activated_output + inputs
            # print(f"Info: Skip connection applied for layer '{self.name}'.")
        else:
            # If dimensions don't match, no skip connection is applied
            output = activated_output
            # The warning about skip connection not being possible is already printed in build,
            # but we can add a runtime message here if needed for debugging.
            # print(f"Info: Skip connection NOT applied for layer '{self.name}' due to dimension mismatch.")

        return output

    def get_config(self):
        """
        Returns the serializable configuration of the layer.
        This is important for saving and loading models that use custom layers.
        """
        config = super().get_config()
        config.update({
            "units": self.units,
            "negative_slope": self.negative_slope,
            "l2_reg": self.l2_reg
        })
        return config

In [None]:
# Calls the API Server to evaluate a batch of layouts which utilize the provided string of symbols
# If metric weights is set to None, then this function will return each of the five individual metrics for each of the input layouts
# Otherwise, it will return the weighted sum of these metrics using the provided metric weights
def true_evaluation(batch, metric_weights, symbols):
  layouts = []
  for a in batch:
    layouts.append("")
    for index in a:
      layouts[-1] += symbols[index]

  api_url = "http://localhost:8888/"

  payload = [{
    "layout": layout,
    "weights":  {"sfb": 0.0, "sfs": 0.0, "lsb": 0.0, "alt": 0.0, "rolls": 0.0} if metric_weights == None else metric_weights
  } for layout in layouts]

  try:
    response = requests.post(api_url, json=payload)

    if response.status_code != 200:
      print(f"Error: Server responded with status code {response.status_code}")
      print("Response text:", response.text)
    else:
      if metric_weights == None:
        return [[r["stat_values"]["sfb"], r["stat_values"]["sfs"], r["stat_values"]["lsb"], r["stat_values"]["alt"], r["stat_values"]["rolls"]] for r in response.json()]
      else:
        return [r["score"] for r in response.json()]

  except requests.exceptions.ConnectionError as e:
    print("Connection Error: Could not connect to the C server.")
    print("Please ensure the server is running correctly from the previous cell.")

In [None]:
# Creates our Neural Network and its Optimizer
# For the sake of efficiency, the network assumes you have already created the one-hot encodings of the layouts
# The random seed will often be set at the top of each cell to ensure you can reproduce the results found in our paper
# These seed values were themselves chosen randomly with no cherry picking
np.random.seed(346)

loss_fn = tf.keras.losses.MSE
architecture = [
  CustomLayer(900),
  CustomLayer(900),
  CustomLayer(900),
  CustomLayer(900),
  CustomLayer(900),
  CustomLayer(900),
  CustomLayer(900),
  CustomLayer(900),
  layers.Dense(5)
]

model = models.Sequential(architecture)
optimizer = optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss="mse")
model.build(input_shape = (None, 900))
trainable_variables = model.trainable_variables

In [None]:
# Running this will require having your Google Drive attached!
# Requires significant compute!
# This is what we used to create the training data for the model
for t in range(1, 11):
  print(t)
  random.seed([605, 505, 108, 259, 341, 215, 971, 297, 735, 973][t - 1])

  # These are the means (offsets) and mean absolute deviations (scalars) of each of the five metrics
  # These were calculated from a set of 10,000 uniformly random layouts
  scalars = np.array([2.37272293, 1.77984022, 1.84450506, 2.34851446, 2.97247169])
  offsets = np.array([10.53548093, 10.47626445,  3.99833419, 17.61765303, 35.32753762])

  # Generates uniformly random layouts and calculates their metric scores which are normalized with the scalars and offsets lists above
  layouts = []
  scores = []
  a = [i for i in range(30)]
  for i in range(2000):
    if i % 100 == 0:
      print(i)
    batch = []
    for j in range(500):
      random.shuffle(a)
      batch.append(copy.deepcopy(a))
      layouts.append(copy.deepcopy(a))
    scores += list((np.array(true_evaluation(batch, None, symbols)) - offsets)/scalars)

  L1 = layers.CategoryEncoding(num_tokens = 30, output_mode = "one_hot")
  L2 = layers.Flatten()
  inputs = L2(L1(np.array(layouts)))

  # Saves the resulting data as pickle files in your google drive
  save_path = '/content/drive/My Drive/colab_data/'
  os.makedirs(save_path, exist_ok=True)
  with open(os.path.join(save_path, 'layouts_large_' + str(t) + '.pkl'), 'wb') as f:
      pickle.dump(layouts, f)
  with open(os.path.join(save_path, 'inputs_large_' + str(t) + '.pkl'), 'wb') as f:
      pickle.dump(inputs, f)
  with open(os.path.join(save_path, 'scores_large_' + str(t) + '.pkl'), 'wb') as f:
      pickle.dump(np.array(scores), f)

In [None]:
# Running this will require having your Google Drive attached!
# If you don't wish to train the neural network, this will load the network weights
save_path = '/content/drive/My Drive/colab_data/'
with open(os.path.join(save_path, 'eight_hidden_layer_network_weights.pkl'), 'rb') as f:
    model.set_weights(pickle.load(f))

In [None]:
# Running this will require having your Google Drive attached!
# Requires significant compute! It is recommended that you use a GPU to speed up execution!
# This is the code for training the network. Run this cell four times with each successive random seed value.
# np.random.seed(152)
# np.random.seed(370)
# np.random.seed(878)
# np.random.seed(265)

for i in range(1, 11):
  save_path = '/4644_final/DeepGeneticAlgorithm/Final Pickle Data/'
  with open(os.path.join(save_path, 'inputs_large_' + str(i) + '.pkl'), 'rb') as f:
    inputs = pickle.load(f)
  with open(os.path.join(save_path, 'scores_large_' + str(i) + '.pkl'), 'rb') as f:
    scores = pickle.load(f)

  model.fit(epochs = 20, shuffle = True, x = inputs, y = np.array(scores), batch_size = 256, validation_split = 0.01)