In [1]:
'''This repository contains a detailed implementation of the Reinforcement Learning Enviroment class'''
import matplotlib.pyplot as plt
import numpy as np
from dataclasses import *
from typing import Any, Callable, Dict, List, Tuple, Union, Optional
import scipy.integrate as integrate
T.Tensor.ndim = property(lambda self: len(self.shape))
import sys
sys.path.insert(0, '/Users/niyi/Documents/GitHub/Optimal-Control/Tools')
from EnforceTyping import EnforceClassTyping, EnforceMethodTyping
from ParticlesandFields import *
from MDPFramework import *

NameError: name 'T' is not defined

**Vector Field**

In [None]:
@dataclass(kw_only= True)
class ElectroStaticField2D(ClassicalField):
  """
  A class used to represent a 2D Electrostatic Field

  Attributes
  ----------
  FieldSources: Dict
      a formatted string to print out what the animal says
  Dimensionality: tuple
      a tuple of the dimensionality of the field  

  Methods
  -------
  Dynamics(self, ObservationPosition: np.ndarray, Time: float)-> np.ndarray:
      Represents the value of the field at any given point(s) or time. 
  Potential(self, ObservationPosition: np.ndarray, Time: float)-> float:
      Represents the potential dure to the field at a given position and/or time  
  PotentialDifference(self, InitialPosition: np.ndarray, FinalPosition: np.ndarray, Time: float)-> float:
      Represents the potential difference between two positions at a given time in the vector field   
  Gradient(self, ObservationPosition: np.ndarray, Time: float)-> float:
      Represents the gradient at a given position and/or time in the vector field 
  Curl(self, ObservationPosition: np.ndarray, Time: float)-> float:
      Represents the curl at a given position and/or time in the vector field 
  Divergence(self, ObservationPosition: np.ndarray, Time: float)-> float:
      Represents the divergence at a given position and/or time in the vector field
  """
  FieldSources: Dict
  Dimensionality: tuple = (2, )
  def __call__(self, ObservationPosition: np.ndarray)->np.ndarray:
      return self.Dynamics(ObservationPosition)
  
  @EnforceMethodTyping
  def Dynamics(self, ObservationPosition: np.ndarray)->np.ndarray:
    """
    This function outputs the field strength due to Field Sources experienced at any given point(s) or time. 
    This determines the physics of the field(a 2D Electricstatic Field in this case)

    Args:
        ObservationPosition (np.ndarray): The position.

    Returns:
        np.ndarray: The electric field strength vector at the given position.
    """
    assert len(self.FieldSources["Particle"]) == len(self.FieldSources["Position"]), "The length of particles and fields don't match"
    for FieldSource, _ in zip(self.FieldSources["Particle"], self.FieldSources["Position"]):
      assert isinstance(FieldSource, ClassicalParticle),  "The FieldSource is not a Particle"
    CoulombConstant = 8.9875e9 #N*m^2/C^2
    ElectricFieldVector = np.zeros_like(ObservationPosition)
    for FieldSource, SourcePosition in zip(self.FieldSources["Particle"], self.FieldSources["Position"]):
      PositionMatrices= np.broadcast_to(SourcePosition, reversed(ObservationPosition.shape)).T
      DisplacementVector = ObservationPosition - PositionMatrices
      DisplacementMagnitude = np.linalg.norm(DisplacementVector, axis=0)
      ElectricFieldVector += (DisplacementVector * FieldSource.Charge) / DisplacementMagnitude**3
    ElectricFieldVector= CoulombConstant * ElectricFieldVector
    return np.round(ElectricFieldVector, 3) #N/C or V/m
  
  @EnforceMethodTyping
  def Potential(self, ObservationPosition: np.ndarray)-> float:
    """
    Calculate the Potential(Voltage) at a position in the field.

    Args:
        ObservationPosition (np.ndarray): The position.

    Returns:
        np.ndarray: The Electric Potental at the given position.
    """
    assert len(self.FieldSources["Particle"]) == len(self.FieldSources["Position"]), "The length of particles and fields don't match"
    for FieldSource, _ in zip(self.FieldSources["Particle"], self.FieldSources["Position"]):
      assert isinstance(FieldSource, ClassicalParticle),  "The FieldSource is not a Particle"

    CoulombConstant = 8.9875e9 #N*m^2/C^2
    ElectricPotential = 0.0

    for FieldSource, SourcePosition in zip(self.FieldSources["Particle"], self.FieldSources["Position"]):
      PositionMatrices= np.broadcast_to(SourcePosition, reversed(ObservationPosition.shape)).T
      DisplacementVector = ObservationPosition - PositionMatrices
      DisplacementMagnitude = np.linalg.norm(DisplacementVector, axis=0)
      ElectricPotential += FieldSource.Charge / DisplacementMagnitude

    ElectricPotential= CoulombConstant * ElectricPotential
    return np.round(ElectricPotential, 3) #V
  
  @EnforceMethodTyping
  def PotentialDifference(self, InitialPosition: np.ndarray, FinalPosition: np.ndarray)-> float:
    """
    Calculate the potental difference between the initial position and the final position in the field.

    Args:
        InitialPosition (np.ndarray): The starting position.
        FinalPosition (np.ndarray): The ending position.
        resolution (int, optional): The number of intervals to divide the path into. Defaults to 5000.

    Returns:
        float: The work required to move from the initial position to the final position.
    """
    assert InitialPosition.shape == self.Dimensionality, "InitialPosition has the wrong dimensions"
    assert FinalPosition.shape == self.Dimensionality, "FinalPosition has the wrong dimensions"
    PorentialDifference= self.Potential(InitialPosition)- self.Potential(FinalPosition)
    return PorentialDifference

  def Gradient(self, ObservationPosition: np.ndarray, Delta: float= 0.001)->np.ndarray:
    """
    This function returns the derivative of the field at a given point

    Args:
        InitialPosition (np.ndarray): The starting position.
        FinalPosition (np.ndarray): The ending position.
        resolution (int, optional): The number of intervals to divide the path into. Defaults to 5000.

    Returns:
        float: The work required to move from the initial position to the final position.
    """
    Gradient= np.zeros_like(ObservationPosition)
    for i in range(len(ObservationPosition)):
      di= np.zeros_like(ObservationPosition)
      di[i, ] = di[i, ]+Delta
      plusdi= ObservationPosition+ di
      minusdi= ObservationPosition- di
      Gradient[i]= (self.Dynamics(plusdi)- self.Dynamics(minusdi))[i]/ (2* Delta)
    return Gradient
  
  def Curl(self, ObservationPosition: np.ndarray)-> float:
    """
    This function returns the Curl of the field at a given point

    Args:
        InitialPosition (np.ndarray): The starting position.
        FinalPosition (np.ndarray): The ending position.
        resolution (int, optional): The number of intervals to divide the path into. Defaults to 5000.

    Returns:
        float: The work required to move from the initial position to the final position.
    """
    pass
  
  def Divergence(self, ObservationPosition: np.ndarray)-> float:
    """
    This function returns the Divergence of the field

    Args:
        InitialPosition (np.ndarray): The starting position.
        FinalPosition (np.ndarray): The ending position.
    Returns:
        float: The work required to move from the initial position to the final position.
    """
    pass
  @EnforceMethodTyping
  def PlotField(self, LowBound= -20, HighBound= 20):
    """
    This funtion plots the 2D electric vector field

    Args:
        InitialPosition (np.ndarray): The starting position.
        FinalPosition (np.ndarray): The ending position.
    Returns:
        float: The work required to move from the initial position to the final position.
    """
    ObservationPosition= np.meshgrid(np.linspace(LowBound, HighBound, 25), 
                                    np.linspace(LowBound, HighBound, 25))
    ObservationPosition= np.stack(ObservationPosition)
    xd, yd = self.Dynamics(ObservationPosition)
    xd = xd / np.sqrt(xd**2 + yd**2)
    yd = yd / np.sqrt(xd**2 + yd**2)
    color_aara = np.sqrt(xd**2+ yd**2)
    fig, ax = plt.subplots(1,1)
    cp = ax.quiver(ObservationPosition[0],ObservationPosition[1],xd,yd,color_aara)
    fig.colorbar(cp)
    plt.rcParams['figure.dpi'] = 150
    plt.show()

**Example 1: Particle in Field Environment**

In [None]:
@dataclass(kw_only=True)
class ParticleInField(MDPEnvironment): 
  Field: ClassicalField
  ChargedParticle: ClassicalParticle
  Target: np.ndarray # m
  DistanceWeight: float= 1.0
  EnergyWeight: float= -1.0
  TerminalSignalWeight: float= -1000.0
  CurrentTime: float = 0.0# s
  @dataclass 
  class State(EnforceClassTyping):
    '''This class represents the state of the Agent with its Position, Velocity and the Field Strength if experiences at its Position. 
    These are parameters the agent is able to observe, they uniquely define the state of the agent.'''
    Position: np.ndarray # m
    Velocity: np.ndarray #m/s
    
    def Vector(self):
      return np.concatenate([self.Position, self.Velocity])
  InitialState: State = None
  CurrentState: State = None
  def __post_init__(self):
    if self.InitialState is None:
        self.InitialState= self.RandomState()
    self.CurrentState= self.InitialState

  def StateDynamics(self, State: np.ndarray, Time: float, ControlForce: np.ndarray):
    dxPosition, dyPosition = State[2], State[3]
    Position= np.array([State[0], State[1]])
    dxVelocity, dyVelocity = ((self.ChargedParticle.Charge* self.Field(Position))+ControlForce)/self.ChargedParticle.Mass

    return np.array([dxPosition, dyPosition, dxVelocity, dyVelocity])
  
  @EnforceMethodTyping
  def TransitionModel(self, State: State, Action: np.ndarray= np.array([0.0, 0.0]), TimeInterval:float= 1.0)-> State:
    '''Outputs the state of the system after taking an action(applying a constant force for *TimeInterval* seconds)'''
    Posx, Posy, Velx, Vely= integrate.odeint(self.StateDynamics, State.Vector(), [self.CurrentTime, self.CurrentTime+ TimeInterval], args=(Action,))[-1]
    CurrrentPosition= np.array([Posx, Posy])
    CurrentVelocity= np.array([Velx, Vely])
    return self.State(CurrrentPosition, CurrentVelocity)
  
  def RewardModel(self, State: State, Action: np.ndarray, NextState: State, TerminalSignal: bool)-> float:
      '''This method determines how the agent is rewarded given a state transition. The reward determines the behaviour the agent should learn(i.e getting to the target and using the least amount of energy).'''
      DistanceGainedFromTarget= np.linalg.norm(State.Position-self.Target)- np.linalg.norm(NextState.Position-self.Target) 
      EnergyConsumed= self.ChargedParticle.Charge* self.Field.FieldPotential(State.Position, NextState.Position)
      Cost= self.DistanceWeight* DistanceGainedFromTarget+ self.EnergyWeight* EnergyConsumed+ self.TerminalSignalWeight* TerminalSignal
      return Cost.item()
  
  @EnforceMethodTyping
  def IsTerminalCondition(self, State: State)-> bool:
      '''This method determines if the state is within the viable learning region of the environment: Constraints'''
      WithinXBound= -10. <= State.Position[0] <= 10.
      WithinYBound= -10. <= State.Position[1] <= 10. 
      WithinVelocityBound= np.linalg.norm(State.Velocity) < 10. 
      if WithinXBound and WithinYBound and WithinVelocityBound: 
          return False    
      else:
          return True
  
  def StateTransition(self, State: State= CurrentState, Action: np.ndarray= np.array([0.0, 0.0]), TimeInterval: float= 1.0):
      'Outputs the state of the system after taking an action, the reward ocurring from the transition and the terminal signal'
      NextState= self.TransitionModel(State, Action, TimeInterval=TimeInterval)
      TerminalSignal= self.IsTerminalCondition(NextState) 
      Reward= self.RewardModel(State, Action, NextState, TerminalSignal)
      return NextState, Reward, TerminalSignal
  
  @EnforceMethodTyping
  def RandomState(self)->State:
      '''This method generates a random state within the viable learning region'''
      RandomPosition= np.array([np.random.uniform(-10., 10.), 
                                np.random.uniform(-10., 10.)])
      RandomVelocity= np.zeros_like(RandomPosition)
      return self.State(RandomPosition, RandomVelocity)

  def SampleTrajectory(self, RunDuration: float, Policy: Optional[Callable]= None, TimeStep: int=0.1):
    Time= [0]
    State= self.CurrentState
    StateTrajectory= []
    ActionTrajectory= []
    while Time[-1]<RunDuration: 
      StateTrajectory.append(State)
      if Policy is Callable:
        Action = Policy(State)
      else:
          Action = np.random.randn(2)
      ActionTrajectory.append(Action)
      State= self.TransitionModel(State, Action, TimeInterval= TimeStep) 
      Time.append(Time[-1]+TimeStep) 
    return StateTrajectory, ActionTrajectory, Time

  def PlotTrajectory(self, StateTrajectory, Time): 
      PositionPath= [State.Position for State in StateTrajectory]
      VelocityPath= [State.Velocity for State in StateTrajectory]
      PositionTrajectory= np.stack(PositionPath).transpose(dim0=0, dim1=1)
      VelocityTrajectory= np.stack(VelocityPath).transpose(dim0=0, dim1=1)
      plt.plot(PositionTrajectory[0], PositionTrajectory[1])
      plt.plot(PositionTrajectory[0][0], PositionTrajectory[1][0], 'ko')
      plt.plot(PositionTrajectory[0][-1], PositionTrajectory[1][-1], 'r*')
      plt.xlim(-10,10)
      plt.ylim(-10,10)
      plt.grid(True)
      plt.show()

  def TrajectoryValue(self, StateTrajectory: list[State], ActionTrajectory, Time)-> float:
      Value= 0
      TimeInterval= (Time[-1]-Time[0])/len(Time)
      for State, Action in zip(StateTrajectory, ActionTrajectory):
         Value= Value+ (np.linalg.norm(State.Position-self.Target)+np.linalg.norm(Action))* TimeInterval
      return Value
  
  def Reset(self):
      self.CurrentState= self.InitialState
      self.CurrentTime= 0.0


In [None]:
NegativeCharge= ClassicalParticle(Mass=1.0, Charge= -1e-9)
PositiveCharge= ClassicalParticle(Mass=1.0, Charge= 1e-9)
Sources = {"Particle": [NegativeCharge, PositiveCharge],
           "Position": [np.array([1.0, 1.0]), np.array([-1.0, 1.0])]} 
TestElectricField= ElectroStaticField2D(FieldSources=Sources)

PositiveChargeInElectricField= ParticleInField(Field=TestElectricField, ChargedParticle=PositiveCharge, Target=np.array([0.0, 0.0]))
StateTrajectory, ActionTrajectory, Time= PositiveChargeInElectricField.SampleTrajectory(50)
Return= PositiveChargeInElectricField.TrajectoryValue(StateTrajectory, ActionTrajectory, Time)
PositiveChargeInElectricField.PlotTrajectory(StateTrajectory, Time)
Return

TypeError: ndarray.transpose() takes no keyword arguments

In [None]:
class LearningAgent(MDPController):
  MDPEnvironment: MDPEnvironment
  Policy: Callable

  def Act(self, Observation: np.ndarray)-> np.ndarray:
    ...
      
  def Observe(self)-> np.ndarray:
    ...

  def Learn(self):
    'Improves  the MDPController by updating its models'
    ...

