In [1006]:
import matplotlib.pyplot as plt
import numpy as np
from dataclasses import *
from typing import *
import scipy.integrate as integrate
import unittest 
import timeit
import random
import sys
sys.path.insert(0, '/Users/niyi/Documents/GitHub/Optimal-Control/Tools')
from EnforceTyping import enforce_method_typing, EnforceClassTyping
from MDPFramework import MDPEnvironment, MDPController, LearningAgent

In [1007]:
@dataclass(unsafe_hash=True, order=True)
class Node(EnforceClassTyping):
  id: Union[str, int, float, np.ndarray]
  neighbors: Dict
  def __repr__(self) -> str:
    neighbor_ids= tuple(neighbor.id for neighbor in self.neighbors['Nodes'])
    name= str(self.id)+ ':'+ str(neighbor_ids)
    return name
  
  def add_neighbor(self, neighbor: 'Node', distance: int):
    """
    Adds a neighbor to the node's list of neighbors.
    """
    self.neighbors['Nodes'].append(neighbor) 
    self.neighbors['Distance'].append(distance) 

@dataclass(kw_only=True)
class Graph(EnforceClassTyping):
  n_vertices: int
  n_egdes: int
  network: Tuple[Node]= ()

  def transition_probability(self, node: Node):
    return np.array(node.neighbors['Distance'])/sum(node.neighbors['Distance'])
  
  def __post_init__(self):
    nodes= self.generate_states_space(self.n_vertices)
    self.network= self.randomly_connect_state_space(nodes, self.n_egdes)
  
  def add_node(self, node: Node):
    """
    Adds a new node to the network.
    """
    self.network= self.network+ (node, )

  def generate_states_space(self, n_nodes: int)-> Tuple[Node]:
    nodes= ()
    for i in range(n_nodes):
      nodes= nodes+ (Node(id= i, neighbors={"Nodes": [ ],
                                            "Distance": [ ]}), )
    return nodes

  def randomly_connect_state_space(self, nodes: Tuple[Node], n_connections: int)-> Tuple[Node]:
    for _ in range(n_connections):
      random_state1, random_state2= random.sample(sorted(nodes), 2)
      if not self.are_connected(random_state1, random_state2):
          self.connect_nodes(random_state1, random_state2, np.random.randint(1, 100))
    return nodes

  def connect_nodes(self, node1: Node, node2: Node, distance: int):
    """
    Connects two nodes in the network by adding each other to their respective neighbor lists.
    """
    node1.add_neighbor(node2, distance)
    node2.add_neighbor(node1, distance)

  def are_connected(self, node1: Node, node2: Node):
     are_connected= node1 in node2.neighbors["Nodes"] and node2 in node1.neighbors["Nodes"]
     return are_connected


In [1008]:

@dataclass(kw_only=True)
class MarkovChain(MDPEnvironment):
  
  @dataclass(unsafe_hash=True, order=True)
  class State(Node):
    pass
  
  state_space: Graph
  terminal_signal_weight: float= 100
  initial_state: State= None
  current_state: State= None

  def __post_init__(self):
    self.initial_state= random.choice(self.state_space.network)
    self.current_state= self.initial_state

  def state_transition_model(self, state: State, action: np.ndarray=None)-> State:
    transition_probabilities= self.state_space.transition_probability(state)
    next_state= random.choices(state.neighbors['Nodes'], weights= transition_probabilities)[0]
    return next_state
    
  def reward_model(self, state: State, next_state: State, terminal_signal: bool, action: np.ndarray= None)-> float:
    '''This is a scalar performance metric.'''
    next_state_index= state.neighbors['Nodes'].index(next_state)
    distance_gained = state.neighbors['Distance'][next_state_index]
    reward = (distance_gained + self.terminal_signal_weight * int(terminal_signal))
    return reward

  def is_terminal_condition(self, state: State)-> bool:
    if len(state.neighbors['Nodes']) < 2:
      return True
    else:
       return False

  def transition_step(self, state: State, action: np.ndarray = None) -> Tuple[State, float, bool]:
      """
      Simulates a single time step of the environment.

      Args:
          state (State): The current state of the environment. Defaults to current_state.
          action (np.ndarray): The action to take in the environment. Defaults to [0.0, 0.0].
          time_interval (float): The time interval for the simulation. Defaults to 0.1.

      Returns:
          Tuple[State, float, bool]: A tuple containing the next state, the reward, and a terminal signal.
      """
      next_state = self.state_transition_model(state)
      terminal_signal = self.is_terminal_condition(next_state)
      reward = self.reward_model(state, next_state, terminal_signal)
      return next_state, reward, terminal_signal

  def sample_trajectory(self, n_steps: int, initial_state: State = None) -> Tuple[List[State], float, List[float]]:
      """
      Generates a random state trajectory within the viable learning region.

      Args:
      - runtime (float): The total time for the trajectory in seconds.
      - initial_state (State): The initial state of the trajectory. Defaults to current_state.
      - n_steps (int): The number of steps in the trajectory. Defaults to 200.

      Returns:
      - A tuple containing the state trajectory, action trajectory, and time points.
      """
      if initial_state == None:
         state = self.current_state
      else:
         state = initial_state
      state_trajectory = []
      time_span = range(n_steps)
      return_value= 0.0

      for t in time_span:
          state_trajectory.append(state)
          state, reward, _ = self.transition_step(state)
          return_value += reward
      return state_trajectory, return_value, time_span


**1. Unit tests**

Unit tests are very low level and close to the source of an application. They consist in testing individual methods and functions of the classes, components, or modules used by your software. Unit tests are generally quite cheap to automate and can run very quickly by a continuous integration server.



In [1009]:
test_graph= Graph(n_vertices=10, n_egdes=20)

test_markov_chain= MarkovChain(state_space=test_graph)
node= test_markov_chain.state_space.network[0]
print(test_graph.network[0])
print(test_graph.transition_probability(node))
test_trace, test_return, time_span= test_markov_chain.sample_trajectory(20)
print(test_trace)

0:(2, 1)
[0.35869565 0.64130435]
[8:(4, 6, 7), 7:(5, 1, 3, 8), 3:(9, 6, 7, 2), 7:(5, 1, 3, 8), 8:(4, 6, 7), 6:(2, 5, 3, 8), 8:(4, 6, 7), 6:(2, 5, 3, 8), 3:(9, 6, 7, 2), 7:(5, 1, 3, 8), 5:(9, 6, 7), 7:(5, 1, 3, 8), 5:(9, 6, 7), 7:(5, 1, 3, 8), 3:(9, 6, 7, 2), 9:(5, 3, 4), 5:(9, 6, 7), 9:(5, 3, 4), 4:(9, 8, 2), 2:(0, 6, 4, 3)]


**2. Integration tests**

Integration tests verify that different modules or services used by your application work well together. For example, it can be testing the interaction with the database or making sure that microservices work together as expected. These types of tests are more expensive to run as they require multiple parts of the application to be up and running.



In [1010]:
@dataclass
class RandomPolicy:
  action_dims: tuple
  control_magnitude: float

  def __call__(self, observation: np.ndarray):
    return self.control_magnitude* (2*np.random.ranf(self.action_dims)-1)
    
@dataclass
class RandomController(MDPController):
  control_magnitude: float
  
  @property
  def policy(self):
    return RandomPolicy(action_dims=self.environment.action_dims, control_magnitude=self.control_magnitude)
  
  @enforce_method_typing
  def act(self, observation: np.ndarray)-> np.ndarray:
      action= self.policy(observation)
      return action
  
  def observe(self, state)-> np.ndarray:
    observation= state.vector()
    return observation
  
  @enforce_method_typing
  def sample_trajectory(self, runtime: float, n_steps: int=100):
    trajectory= []
    trajectory_return= 0.0
    time= 0.0
    state= self.environment.initial_state
    time_interval= runtime/n_steps
    time_span = np.linspace(time, runtime, n_steps)
    for _ in time_span:
        observation= self.observe(state)
        trajectory.append(observation)
        action= self.act(observation)
        state, reward, _= self.environment.transition_step(state, action, time_interval)
        trajectory_return += reward
    return trajectory, trajectory_return
  
  def plot_trajectory(self, trajectory):
    trajectory= np.array(trajectory)
    px, py, vx, vy= trajectory.transpose()
    plt.figure(figsize=(8, 8))
    plt.plot(px, py, label='Trajectory')
    plt.scatter(px[0], py[0], c='k', marker='o', label='Start')
    plt.scatter(px[-1], py[-1], c='r', marker='*', label='End')
    xmax= max(abs(px))
    ymax= max(abs(py))
    true_max= max((xmax, ymax))
    plt.xlim(-2*true_max, 2*true_max)
    plt.ylim(-2*true_max, 2*true_max)
    plt.grid(True)
    plt.legend()
    plt.show()


**3. Functional tests**
Functional tests focus on the application requirements of the code. Functional tests are performed to check if this module functions as intended.They only verify the output of an action and do not check the intermediate states of the system when performing that action. 

**4. Performance testing**
Performance tests help to measure the reliability, speed, scalability, and responsiveness of an application. It can determine if an application meets performance requirements, locate bottlenecks, measure stability during peak traffic, and more.