In [None]:
import torch.autograd
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np
from dataclasses import *
import torch as T
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from typing import Any, Callable, Dict, List, Tuple, Union, Optional
from functools import wraps
import os
import random
from abc import ABC, abstractmethod
from collections import deque, namedtuple

In [None]:
@dataclass
class EnforceClassTyping:
    def __post_init__(self):
        for (name, field_type) in self.__annotations__.items():
            if not isinstance(self.__dict__[name], field_type):
                current_type = type(self.__dict__[name])
                raise TypeError(f"The field `{name}` was assigned by `{current_type}` instead of `{field_type}`")
        # print("Check is passed successfully")
def EnforceMethodTyping(func: Callable) -> Callable:
    'Enforces type annotation/hints for class mathods'
    arg_annotations = func.__annotations__
    if not arg_annotations:
        return func

    @wraps(func)
    def wrapper(self, *args: Tuple[Any], **kwargs: Dict[str, Any]) -> Any:
        for arg, annotation in zip(args, arg_annotations.values()):
            if not isinstance(arg, annotation):
                raise TypeError(f"Expected {annotation} for argument {arg}, got {type(arg)}.")

        for arg_name, arg_value in kwargs.items():
            if arg_name in arg_annotations:
                annotation = arg_annotations[arg_name]
                if not isinstance(arg_value, annotation):
                    raise TypeError(f"Expected {annotation} for keyword argument {arg_name}, got {type(arg_value)}.")

        return func(self, *args, **kwargs)

    return wrapper
def EnforceFunctionTyping(func: Callable) -> Callable:
    'Enforces type annotation/hints for other functions'
    @wraps(func)
    def wrapper(*args, **kwargs):
        # Check positional arguments
        for arg, annotation in zip(args, func.__annotations__.values()):
            if not isinstance(arg, annotation):
                raise TypeError(f"Expected {annotation} for {arg}, got {type(arg)}.")

        # Check keyword arguments
        for arg_name, arg_value in kwargs.items():
            if arg_name in func.__annotations__:
                annotation = func.__annotations__[arg_name]
                if not isinstance(arg_value, annotation):
                    raise TypeError(f"Expected {annotation} for {arg_name}, got {type(arg_value)}.")

        return func(*args, **kwargs)

    return wrapper
 
def Euler(func, X0, t):
  """
  Euler integrator.
  """
  dt = t[1] - t[0]
  nt = len(t)
  X  = np.zeros([nt, len(X0)])
  X[0] = X0
  for i in range(nt-1):
      X[i+1] = X[i] + func(X[i], t[i]) * dt
  return X

def RK4(func, X0, t):
  """ Runge and Kutta 4 integrator. """
  dt = t[1] - t[0]
  nt = len(t)
  X  = np.zeros([nt, len(X0)])
  X[0] = X0
  for i in range(nt-1):
      k1 = func(X[i], t[i])
      k2 = func(X[i] + dt/2. * k1, t[i] + dt/2.)
      k3 = func(X[i] + dt/2. * k2, t[i] + dt/2.)
      k4 = func(X[i] + dt    * k3, t[i] + dt)
      X[i+1] = X[i] + dt / 6. * (k1 + 2. * k2 + 2. * k3 + k4)
  return X


In [None]:
class OUActionNoise(object):
    def __init__(self, mu, sigma=0.15, theta=.2, dt=1e-2, x0=None):
        self.theta = theta
        self.mu = mu
        self.sigma = sigma
        self.dt = dt
        self.x0 = x0
        self.reset()

    def __call__(self):
        x = self.x_prev + self.theta * (self.mu - self.x_prev) * self.dt + \
            self.sigma * np.sqrt(self.dt) * np.random.normal(size=self.mu.shape)
        self.x_prev = x
        return x

    def reset(self):
        self.x_prev = self.x0 if self.x0 is not None else np.zeros_like(self.mu)

    def __repr__(self):
        return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(
                                                            self.mu, self.sigma)

In [None]:
@dataclass
class Particle(EnforceClassTyping):
    'This class represents the electric field sources with its position in the field(Position) and the magnitude of the source(Charge)'
    Mass: float # kg
    Charge: float #C


In [None]:
from torch import Tensor

@dataclass
class Field(ABC):
  @abstractmethod
  def FieldStrength(self, ObservationPosition: T.Tensor)-> T.Tensor:
    ...
  @abstractmethod
  def FieldPotential(self, ObservationPosition: T.Tensor)-> float:
    ...
  def PotentialDifference(self, InitialPosition: T.Tensor, FinalPosition: T.Tensor) -> float:
    ...

@dataclass(kw_only= True)
class ElectricField(Field):
  FieldSources: Dict

  def __call__(self, ObservationPosition: T.Tensor)->T.Tensor:
      return self.FieldStrength(ObservationPosition)
  @EnforceMethodTyping
  def FieldStrength(self, ObservationPosition: T.Tensor)->T.Tensor:
    'This function takes a list of sources and outputs the field strength experienced at any given point(s). This determines the physics of the field(an electric field in this case)'
    CoulombConstant = 8.9875e9 #N*m^2/C^2
    assert len(self.FieldSources["Particle"]) == len(self.FieldSources["Position"]), "The length of particles and fields don't match"
    for FieldSource, _ in zip(self.FieldSources["Particle"], self.FieldSources["Position"]):
      assert isinstance(FieldSource, Particle),  "The FieldSource is not a Particle"
    ElectricFieldVector = T.zeros_like(ObservationPosition)
    for FieldSource, SourcePosition in zip(self.FieldSources["Particle"], self.FieldSources["Position"]):
      PositionMatrices= T.stack([T.ones_like(ObservationPosition[0])* SourcePosition[0].item(), 
                                T.ones_like(ObservationPosition[1])* SourcePosition[1].item()])
      DisplacementVector = ObservationPosition - PositionMatrices
      DisplacementMagnitude = T.sqrt(DisplacementVector[0]**2 +DisplacementVector[1]**2)
      ElectricFieldVector += (DisplacementVector * FieldSource.Charge) / DisplacementMagnitude**2
    return CoulombConstant * ElectricFieldVector #N/C or V/m
  @EnforceMethodTyping
  def FieldPotential(self, InitialPosition: T.Tensor, FinalPosition: T.Tensor, resolution: int= 5000)-> float:
      '''This method determines the amount of work required to get one position to another in the field'''
      XInterval= (FinalPosition[0] - InitialPosition[0]) / resolution
      YInterval= (FinalPosition[1] - InitialPosition[1]) / resolution
      XPositions = [InitialPosition[0] + i * XInterval for i in range(resolution + 1)]
      YPositions = [InitialPosition[1] + i * YInterval for i in range(resolution + 1)]
      WorkDone = 0
      for i in range(resolution):
          PositionFieldStrength = self.FieldStrength(T.Tensor([XPositions[i], YPositions[i]]))
          WorkDone += - (PositionFieldStrength[0]*XInterval + PositionFieldStrength[1]*YInterval)
      return WorkDone
  @EnforceMethodTyping
  def PlotField(self):
      'This funtion plots the 2D electric vector field'
      ObservationPosition= T.meshgrid(T.linspace(self.FieldLowBound, self.FieldHighBound, 50), 
                                      T.linspace(self.FieldLowBound, self.FieldHighBound, 50))
      ObservationPosition= T.stack(ObservationPosition)
      xd, yd = self.ElectricFieldStrength(ObservationPosition)
      xd = xd / T.sqrt(xd**2 + yd**2)
      yd = yd / T.sqrt(xd**2 + yd**2)
      color_aara = T.sqrt(xd**2+ yd**2)
      fig, ax = plt.subplots(1,1)
      cp = ax.quiver(ObservationPosition[0],ObservationPosition[1],xd,yd,color_aara)
      fig.colorbar(cp)
      plt.rcParams['figure.dpi'] = 250
      plt.show()


In [None]:
@dataclass(kw_only=True)
class Environment(ABC):  

  class State:
      pass
  InitialState: State 
  CurrentState: State 

  @abstractmethod
  def TransitionModel(self, State: State, Action)-> State:
      ...

  @abstractmethod
  def RewardModel(self, State: State, Action, NextState: State, TerminalSignal: bool)-> float:
      '''This is a scalar performance metric.'''
      ...

  @abstractmethod
  def IsTerminalCondition(self, State: State)-> bool:
      ...

  @abstractmethod
  def StateTransition(self, State: State, Action)-> tuple[float, State, bool]:
      ...

  @abstractmethod
  def SampleTrajectory(self, RunDuration: float)-> list[State]:
      ...
  @abstractmethod
  def TrajectoryValue(self, Trajectory: list[State])-> float:
      ...
  @abstractmethod
  def Reset(self):
      ...

In [None]:
@dataclass(kw_only=True)
class ParticleInField(Environment): 
  Field: Field
  ChargedParticle: Particle
  Target: T.Tensor
  DistanceWeight: float= 1.0
  EnergyWeight: float= -1.0
  TerminalSignalWeight: float= -1000.0
  CurrentTime: float = 0.0# s
  @dataclass 
  class State(EnforceClassTyping):
    '''This class represents the state of the Agent with its Position, Velocity and the Field Strength if experiences at its Position. 
    These are parameters the agent is able to observe, they uniquely define the state of the agent.'''
    Position: T.Tensor # m
    Velocity: T.Tensor #kg*m/s
    
    def StateDynamics(self):
      pass
    def Vector(self):
      return T.cat([self.Position, self.Velocity])
  InitialState: State = None
  CurrentState: State = None
  def __post_init__(self):
    if self.InitialState is None:
        self.InitialState= self.RandomState()
    self.CurrentState= self.InitialState

  @EnforceMethodTyping
  def TransitionModel(self, State: State, Action: T.Tensor= T.tensor([0.0, 0.0]), TimeInterval:float= 1.0, Resolution: int=100)-> State:
    '''Outputs the state of the system after taking an action(applying a constant force for *TimeInterval* seconds)'''
    CurrentVelocity= State.Velocity
    CurrrentPosition= State.Position
    TimeTaken= 0
    while TimeTaken< TimeInterval:
        CurrentVelocity = CurrentVelocity + ((self.ChargedParticle.Charge* self.Field(CurrrentPosition))+Action)/self.ChargedParticle.Mass*(TimeInterval/Resolution)
        CurrrentPosition= CurrrentPosition+ (CurrentVelocity)*(TimeInterval/Resolution)
        TimeTaken+= (TimeInterval/Resolution)
    return self.State(CurrrentPosition, CurrentVelocity)
  
  def RewardModel(self, State: State, Action: T.Tensor, TerminalSignal: bool)-> float:
      '''This method determines how the agent is rewarded given a state transition. The reward determines the behaviour the agent should learn(i.e getting to the target and using the least amount of energy).'''
      DistanceFromTarget= T.norm(State.Position-self.Target)
      EnergyConsumed= T.norm(Action)
      Cost= self.DistanceWeight* DistanceFromTarget+ self.EnergyWeight* EnergyConsumed+ self.TerminalSignalWeight* TerminalSignal
      return Cost.item()
  
  @EnforceMethodTyping
  def IsTerminalCondition(self, State: State)-> bool:
      '''This method determines if the state is within the viable learning region of the environment: Constraints'''
      WithinXBound= -10. <= State.Position[0] <= 10.
      WithinYBound= -10. <= State.Position[1] <= 10. 
      WithinVelocityBound= T.norm(State.Velocity) < 10. 
      GoalAchieved= T.equal(State.Position, self.Target)
      if WithinXBound and WithinYBound and WithinVelocityBound and not GoalAchieved: 
          return False    
      else:
          return True
  
  def StateTransition(self, State: State= CurrentState, Action: T.Tensor= T.tensor([0.0, 0.0]), TimeInterval: float= 1.0):
      'Outputs the state of the system after taking an action, the reward ocurring from the transition and the terminal signal'
      NextState= self.TransitionModel(State, Action, TimeInterval=TimeInterval)
      TerminalSignal= self.IsTerminalCondition(NextState) 
      Reward= self.RewardModel(State, Action,TerminalSignal)
      return NextState, Reward, TerminalSignal
  
  @EnforceMethodTyping
  def RandomState(self)->State:
      '''This method generates a random state within the viable learning region'''
      RandomPosition= T.tensor([np.random.uniform(-10., 10.), 
                                np.random.uniform(-10., 10.)])
      RandomVelocity= T.zeros_like(RandomPosition)
      return self.State(RandomPosition, RandomVelocity)

  def SampleTrajectory(self, RunDuration: float, Policy: Optional[Callable]= None, TimeStep: int=0.1):
    Time= [0]
    State= self.CurrentState
    StateTrajectory= []
    ActionTrajectory= []
    while Time[-1]<RunDuration: 
        StateTrajectory.append(State)
        if Policy is Callable:
          Action = Policy(State)
        else:
           Action = T.randn(2)
        ActionTrajectory.append(Action)
        State= self.TransitionModel(State, Action, TimeInterval= TimeStep) 
        Time.append(Time[-1]+TimeStep) 
    return StateTrajectory, ActionTrajectory, Time

  def PlotTrajectory(self, StateTrajectory, Time): 
      PositionPath= [State.Position for State in StateTrajectory]
      VelocityPath= [State.Velocity for State in StateTrajectory]
      PositionTrajectory= T.stack(PositionPath).transpose(dim0=0, dim1=1)
      VelocityTrajectory= T.stack(VelocityPath).transpose(dim0=0, dim1=1)
      plt.plot(PositionTrajectory[0], PositionTrajectory[1])
      plt.plot(PositionTrajectory[0][0], PositionTrajectory[1][0], 'ko')
      plt.plot(PositionTrajectory[0][-1], PositionTrajectory[1][-1], 'r*')
      plt.xlim(-100,100)
      plt.ylim(-100,100)
      plt.grid(True)
      plt.show()

  def TrajectoryValue(self, StateTrajectory: list[State], ActionTrajectory, Time)-> float:
      Value= 0
      TimeInterval= (Time[-1]-Time[0])/len(Time)
      for State, Action in zip(StateTrajectory, ActionTrajectory):
         Value= Value+ (T.norm(State.Position-self.Target)+T.norm(Action))* TimeInterval
      return Value
  def Reset(self):
      self.CurrentState= self.InitialState
      self.CurrentTime= 0.0

In [None]:
@dataclass
class Agent(ABC):
  AgentEnvironment: Environment
  ControlFrequency: float
  @abstractmethod
  def Act(self, Observation: T.Tensor)-> T.Tensor:
      ...
  @abstractmethod
  def Observe(self)-> T.Tensor:
      ...
  @abstractmethod
  def Learn(self):
      'Improves  the agent by updating its models'
      ...
  @abstractmethod
  def LearningAlgorithm(self):
      ...


In [None]:
class CriticNetwork(nn.Module):
    def __init__(self, learning_rate, state_dims, fc1_dims, fc2_dims, n_actions, name, chkpt_dir='tmp/ddpg'):
        super(CriticNetwork, self).__init__() 
        self.checkpoint_file = os.path.join(chkpt_dir,name+'_ddpg')

        self.fc1 = T.nn.utils.parametrizations.weight_norm(nn.Linear(state_dims+n_actions, fc1_dims)) 
        self.bn1 = nn.LayerNorm(fc1_dims)
        self.fc2 = T.nn.utils.parametrizations.weight_norm(nn.Linear(fc1_dims, fc2_dims))
        self.bn2 = nn.LayerNorm(fc2_dims)
        self.fc3 = T.nn.utils.parametrizations.weight_norm(nn.Linear(fc2_dims, 1))

        self.optimizer = optim.Adam(self.parameters(), lr=learning_rate)

        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, state, action):
        x = T.cat([state, action], dim=-1)
        x = T.relu(self.bn1(self.fc1(x)))
        x = T.relu(self.bn2(self.fc2(x)))
        x = self.fc3(x)
        return x

    def save_checkpoint(self):
        print('... saving checkpoint ...')
        T.save(self.state_dict(), self.checkpoint_file)

    def load_checkpoint(self):
        print('... loading checkpoint ...')
        self.load_state_dict(T.load(self.checkpoint_file))
 

In [None]:
class ActorNetwork(nn.Module):
    def __init__(self, learning_rate, state_dims, fc1_dims, fc2_dims, n_actions, name, chkpt_dir='tmp/ddpg'):
        super(ActorNetwork, self).__init__()
        self.checkpoint_file = os.path.join(chkpt_dir,name+'_ddpg')

        self.fc1 = T.nn.utils.parametrizations.weight_norm(nn.Linear(state_dims , fc1_dims)) 
        self.bn1 = nn.LayerNorm(fc1_dims)
        self.fc2 = T.nn.utils.parametrizations.weight_norm(nn.Linear(fc1_dims, fc2_dims))
        self.bn2 = nn.LayerNorm(fc2_dims)
        self.fc3 = T.nn.utils.parametrizations.weight_norm(nn.Linear(fc2_dims, n_actions))

        self.optimizer = optim.Adam(self.parameters(), lr=learning_rate)

        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, state):
        x = T.relu(self.bn1(self.fc1(state)))
        x = T.relu(self.bn2(self.fc2(x)))
        x = self.fc3(x)
        return x

    def save_checkpoint(self):
        print('... saving checkpoint ...')
        T.save(self.state_dict(), self.checkpoint_file)

    def load_checkpoint(self):
        print('... loading checkpoint ...')
        self.load_state_dict(T.load(self.checkpoint_file))


In [None]:
@dataclass(kw_only= True)
class DDPGAgent(Agent):
  ObservationDimensions: int
  ActionDimensions: int
  AgentEnvironment: Environment
  Layer1Size: int= 100
  Layer2Size: int= 50
  ActorLearningRate= 0.000025
  CriticLearningRate= 0.00025
  BufferSize: int= 128
  BatchSize: int = 64
  EpisodeDuration: int= 20
  NumberOfEpisodes: int= 50
  ControlFrequency: float= 1.0
  DiscountRate: float = 0.99
  SoftUpdateRate: float= 0.001
  Actor: ActorNetwork = NotImplemented
  Critic: CriticNetwork = NotImplemented
  TargetActor: ActorNetwork = NotImplemented
  TargetCritic: CriticNetwork = NotImplemented
  ReplayBuffer: deque = NotImplemented
  Noise= NotImplemented
  
  def __post_init__(self):
    self.Actor = ActorNetwork(self.ActorLearningRate, self.ObservationDimensions, self.Layer1Size, self.Layer2Size, n_actions=self.ActionDimensions, name='Actor')
    self.Critic = CriticNetwork(self.CriticLearningRate, self.ObservationDimensions, self.Layer1Size, self.Layer2Size, n_actions=self.ActionDimensions, name='Critic')
    self.TargetActor = ActorNetwork(self.ActorLearningRate, self.ObservationDimensions, self.Layer1Size, self.Layer2Size, n_actions=self.ActionDimensions, name='TargetActor')
    self.TargetCritic = CriticNetwork(self.CriticLearningRate, self.ObservationDimensions, self.Layer1Size, self.Layer2Size, n_actions=self.ActionDimensions, name='TargetCritic')
    for target_param, param in zip(self.TargetActor.parameters(), self.Actor.parameters()):
        target_param.data.copy_(param.data)
    for target_param, param in zip(self.TargetCritic.parameters(), self.Critic.parameters()):
        target_param.data.copy_(param.data) 
    self.ReplayBuffer = deque(maxlen=self.BufferSize)
    self.Noise = OUActionNoise(mu=np.zeros(self.ActionDimensions))

  def Observe(self, State)-> T.Tensor:  
    if isinstance(State, (tuple, list)):
        Observation= [] 
        for i in State:
          Observation.append(self.Observe(i)) 
        Observation= T.stack(Observation)
    elif isinstance(State, self.AgentEnvironment.State):
        Observation= T.cat([State.Position, State.Velocity])
    return Observation
  
  def Act(self, Observation: T.Tensor)-> T.Tensor:
    self.Actor.eval()
    Observation = T.tensor(Observation, dtype=T.float).to(self.Actor.device)
    Action = self.Actor.forward(Observation).to(self.Actor.device)
    NoisyAction = 1e-3 *(Action + T.tensor(self.Noise(), dtype=T.float).to(self.Actor.device))
    self.Actor.train()
    return NoisyAction.cpu().detach()

  def Learn(self):
    "  Updates target network with online model parameters"
    if len(self.ReplayBuffer) < self.BatchSize:
        return

    Batch = random.sample(self.ReplayBuffer, self.BatchSize)
    States, Actions, NextStates, Rewards, TerminalSignals = zip(*Batch)

    States = T.stack(States).to(self.Critic.device)
    Actions = T.stack(Actions).to(self.Critic.device)
    Rewards = T.tensor(Rewards, dtype=T.float).unsqueeze(1).to(self.Critic.device)
    NextStates =  T.stack(NextStates).to(self.Critic.device)
    TerminalSignals = T.tensor(TerminalSignals, dtype=T.float).unsqueeze(1).to(self.Critic.device)
    
    self.TargetActor.eval()
    self.TargetCritic.eval()
    self.Critic.eval()
    
    with torch.no_grad():
      TargetActions = self.TargetActor.forward(NextStates)
      CriticValue = self.TargetCritic.forward(NextStates, TargetActions)
      TargetQValue = Rewards + self.DiscountRate * CriticValue * (1 - TerminalSignals)
    
    ExpectedQValue = self.Critic.forward(States, Actions)
    CriticLoss = nn.MSELoss()(ExpectedQValue, TargetQValue.detach())
    self.Critic.train()
    self.Critic.optimizer.zero_grad()
    CriticLoss.backward()
    self.Critic.optimizer.step()

    self.Actor.eval()
    self.Critic.eval()

    PredictedAction = self.Actor.forward(States)
    PredictedReward = -self.Critic.forward(States, PredictedAction)

    ActorLoss = -T.mean(PredictedReward)
    self.Actor.train()
    self.Actor.optimizer.zero_grad()
    ActorLoss.backward()
    self.Actor.optimizer.step()
    
    self.Actor.eval()
    for target_param, param in zip(self.TargetActor.parameters(), self.Actor.parameters()):
        target_param.data.copy_(self.SoftUpdateRate * param.data + (1 - self.SoftUpdateRate) * target_param.data)

    for target_param, param in zip(self.TargetCritic.parameters(), self.Critic.parameters()):
        target_param.data.copy_(self.SoftUpdateRate* param.data + (1 - self.SoftUpdateRate) * target_param.data)

  def LearningAlgorithm(self):
    # self.Actor.load_checkpoint()
    # self.Critic.load_checkpoint()
    # self.TargetActor.load_checkpoint()
    # self.TargetCritic.load_checkpoint()
    ReturnHistory = []
    for _ in range(self.NumberOfEpisodes):
        self.AgentEnvironment.Reset()
        IsDone = False
        Return = 0
        for _ in range(self.EpisodeDuration):
          print('CurrentState:', self.AgentEnvironment.CurrentState)
          Observation= self.Observe(self.AgentEnvironment.CurrentState)
          print('Observation:', Observation)
          Action = self.Act(Observation) 
          print('Action:', Action)
          NextState, Reward, IsDone= self.AgentEnvironment.StateTransition(self.AgentEnvironment.CurrentState, Action, TimeInterval=self.ControlFrequency) 
          print('NextState:', NextState)
          print('Reward:', Reward)
          print('IsDone:', IsDone)
          self.ReplayBuffer.append((self.AgentEnvironment.CurrentState.Vector(), Action, NextState.Vector(), Reward, int(IsDone)))
          print('ReplayBuffer:', self.ReplayBuffer)
          self.Learn()
          Return += Reward
          self.AgentEnvironment.CurrentState = NextState
        ReturnHistory.append(Return)
    plt.plot(ReturnHistory)
    self.Actor.save_checkpoint()
    self.Critic.save_checkpoint()
    self.TargetActor.save_checkpoint()
    self.TargetCritic.save_checkpoint()
    return ReturnHistory


In [None]:
ObservationDimensions, layer1_size, layer2_size, n_actions= 4, 10, 5, 2
NegativeCharge= Particle(Mass=1.0, Charge= -1e-6)
PositiveCharge= Particle(Mass=1.0, Charge= 1e-6)
Sources = {"Particle": [NegativeCharge],
          "Position": [T.tensor([10.0, 0.0])]}
TestElectricField= ElectricField(FieldSources=Sources)
PositiveChargeInElectricField= ParticleInField(Field=TestElectricField, ChargedParticle=PositiveCharge, Target=T.tensor([50.0, 50.0]))
TestDDPGAgent= DDPGAgent(ObservationDimensions=ObservationDimensions, ActionDimensions=n_actions, AgentEnvironment=PositiveChargeInElectricField)
obs= TestDDPGAgent.Observe([PositiveChargeInElectricField.CurrentState])
TestDDPGAgent.LearningAlgorithm()

In [None]:
StateTrajectory, ActionTrajectory, Time= PositiveChargeInElectricField.SampleTrajectory(100, TestDDPGAgent.Actor)
PositiveChargeInElectricField.PlotTrajectory(StateTrajectory, Time)
StateTrajectory[0], PositiveChargeInElectricField.TrajectoryValue(StateTrajectory, ActionTrajectory, Time)

In [None]:
a = torch.arange(9, dtype= torch.float) - 4
b = a.reshape((3, 3))
a, b, torch.norm(a), torch.norm(b)