In [279]:
'''This repository contains a detailed implementation of the Reinforcement Learning Enviroment class'''
import matplotlib.pyplot as plt
import numpy as np
from dataclasses import *
import torch as T
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from typing import Any, Callable, Dict, List, Tuple, Union, Optional
from functools import wraps
import os
import random
from abc import ABC, abstractmethod
T.Tensor.ndim = property(lambda self: len(self.shape))

In [281]:
@dataclass
class EnforceClassTyping:
    def __post_init__(self):
        for (name, field_type) in self.__annotations__.items():
            if not isinstance(self.__dict__[name], field_type):
                current_type = type(self.__dict__[name])
                raise TypeError(f"The field `{name}` was assigned by `{current_type}` instead of `{field_type}`")
        # print("Check is passed successfully")
def EnforceMethodTyping(func: Callable) -> Callable:
    'Enforces type annotation/hints for class mathods'
    arg_annotations = func.__annotations__
    if not arg_annotations:
        return func

    @wraps(func)
    def wrapper(self, *args: Tuple[Any], **kwargs: Dict[str, Any]) -> Any:
        for arg, annotation in zip(args, arg_annotations.values()):
            if not isinstance(arg, annotation):
                raise TypeError(f"Expected {annotation} for argument {arg}, got {type(arg)}.")

        for arg_name, arg_value in kwargs.items():
            if arg_name in arg_annotations:
                annotation = arg_annotations[arg_name]
                if not isinstance(arg_value, annotation):
                    raise TypeError(f"Expected {annotation} for keyword argument {arg_name}, got {type(arg_value)}.")

        return func(self, *args, **kwargs)

    return wrapper
def EnforceFunctionTyping(func: Callable) -> Callable:
    'Enforces type annotation/hints for other functions'
    @wraps(func)
    def wrapper(*args, **kwargs):
        # Check positional arguments
        for arg, annotation in zip(args, func.__annotations__.values()):
            if not isinstance(arg, annotation):
                raise TypeError(f"Expected {annotation} for {arg}, got {type(arg)}.")

        # Check keyword arguments
        for arg_name, arg_value in kwargs.items():
            if arg_name in func.__annotations__:
                annotation = func.__annotations__[arg_name]
                if not isinstance(arg_value, annotation):
                    raise TypeError(f"Expected {annotation} for {arg_name}, got {type(arg_value)}.")

        return func(*args, **kwargs)

    return wrapper
 

In [282]:
@dataclass
class Particle(EnforceClassTyping):
    'This class represents the electric field sources with its position in the field(Position) and the magnitude of the source(Charge)'
    Mass: float # kg
    Charge: float #C
    Position: T.Tensor # m
    Velocity: T.Tensor # m/s


In [283]:
@dataclass
class Field:
    Dimensions: int
    FieldHighBound: list[float]
    FieldLowBound: list[float]
    def __post_init__(self):
        assert  len(self.FieldHighBound) == self.Dimensions| 1, "Length of high bound and dimensions do not match"
        assert  len(self.FieldLowBound) == self.Dimensions| 1, "Length of low bound and dimensions do not match"
    @abstractmethod
    def FieldStrength(self, ObservationPosition: T.Tensor)-> T.Tensor:
        pass
    @abstractmethod
    def FieldPotential(self, ObservationPosition: T.Tensor)-> float:
        pass

class HomogenousField(Field):
    def FieldStrength(self, ObservationPosition: T.Tensor)-> T.Tensor:
        return  T.zeros((ObservationPosition.shape[0], self.Dimensions), dtype=T.float64)
    def FieldPotential(self, ObservationPosition: T.Tensor)-> float:
        return  0.0

@dataclass
class LJField:
    FieldSources: list[Particle]
    FieldHighBound: float
    FieldLowBound: float
    def __call__(self, ObservationPosition: T.Tensor)->T.Tensor:
        return self.ElectricFieldStrength(ObservationPosition)
    @EnforceMethodTyping
    def ElectricFieldStrength(self, ObservationPosition: T.Tensor)->T.Tensor:
        'This function takes a list of sources and outputs the field strength experienced at any given point(s). This determines the physics of the field(an electric field in this case)'
        CoulombConstant = 8.9875e9 #N*m^2/C^2
        for FieldSource in self.FieldSources:
            if type(FieldSource) != Particle:
                raise TypeError("The input is not valid")
        assert type(ObservationPosition) == T.Tensor, "Invalid Reference point data type"
        ElectricFieldVector = T.zeros_like(ObservationPosition)
        for FieldSource in self.FieldSources:
            PositionMatrices= T.stack([T.ones_like(ObservationPosition[0])* FieldSource.Position[0].item(), 
                                            T.ones_like(ObservationPosition[1])* FieldSource.Position[1].item()])
            DisplacementVector = ObservationPosition - PositionMatrices
            DisplacementMagnitude = T.sqrt(DisplacementVector[0]**2 +DisplacementVector[1]**2)
            ElectricFieldVector += ((FieldSource.Charge) / DisplacementMagnitude**3 * DisplacementVector) - ((FieldSource.Charge) / DisplacementMagnitude**6 * DisplacementVector)
        ElectricFieldVector= CoulombConstant *ElectricFieldVector
        return ElectricFieldVector #N/C or V/m
    @EnforceMethodTyping
    def WorkDoneAgainstField(self, InitialPosition: T.Tensor, FinalPosition: T.Tensor, resolution: int= 5000)-> float:
        '''This method determines the amount of work required to get one position to another in the field'''
        XInterval= (FinalPosition[0] - InitialPosition[0]) / resolution
        YInterval= (FinalPosition[1] - InitialPosition[1]) / resolution
        XPositions = [InitialPosition[0] + i * XInterval for i in range(resolution + 1)]
        YPositions = [InitialPosition[1] + i * YInterval for i in range(resolution + 1)]
        WorkDone = 0
        for i in range(resolution):
            PositionFieldStrength = self.ForceFieldStrength(T.Tensor([XPositions[i], YPositions[i]]))
            WorkDone += - (PositionFieldStrength[0]*XInterval + PositionFieldStrength[1]*YInterval)
        return WorkDone
    @EnforceMethodTyping
    def PlotField(self):
        'This funtion plots the 2D electric vector field'
        ObservationPosition= T.meshgrid(T.linspace(self.FieldLowBound, self.FieldHighBound, 40), 
                                        T.linspace(self.FieldLowBound, self.FieldHighBound, 40))
        ObservationPosition= T.stack(ObservationPosition)
        xd, yd = self.ElectricFieldStrength(ObservationPosition)
        xd = xd / T.sqrt(xd**2 + yd**2)
        yd = yd / T.sqrt(xd**2 + yd**2)
        color_aara = T.sqrt(xd**2+ yd**2)
        fig, ax = plt.subplots(1,1)
        cp = ax.quiver(ObservationPosition[0],ObservationPosition[1],xd,yd,color_aara)
        fig.colorbar(cp)
        plt.rcParams['figure.dpi'] = 250
        plt.show()

@dataclass
class ElectricField:
    FieldSources: list[Particle]
    FieldHighBound: float
    FieldLowBound: float
    def __call__(self, ObservationPosition: T.Tensor)->T.Tensor:
        return self.ElectricFieldStrength(ObservationPosition)
    @EnforceMethodTyping
    def ElectricFieldStrength(self, ObservationPosition: T.Tensor)->T.Tensor:
        'This function takes a list of sources and outputs the field strength experienced at any given point(s). This determines the physics of the field(an electric field in this case)'
        CoulombConstant = 8.9875e9 #N*m^2/C^2
        for FieldSource in self.FieldSources:
            if type(FieldSource) != Particle:
                raise TypeError("The input is not valid")
        assert type(ObservationPosition) == T.Tensor, "Invalid Reference point data type"
        ElectricFieldVector = T.zeros_like(ObservationPosition)
        for FieldSource in self.FieldSources:
            PositionMatrices= T.stack([T.ones_like(ObservationPosition[0])* FieldSource.Position[0].item(), 
                                            T.ones_like(ObservationPosition[1])* FieldSource.Position[1].item()])
            DisplacementVector = ObservationPosition - PositionMatrices
            DisplacementMagnitude = T.sqrt(DisplacementVector[0]**2 +DisplacementVector[1]**2)
            ElectricFieldVector += (CoulombConstant * FieldSource.Charge) / DisplacementMagnitude**3 * DisplacementVector
        return ElectricFieldVector #N/C or V/m
    @EnforceMethodTyping
    def WorkDoneAgainstField(self, InitialPosition: T.Tensor, FinalPosition: T.Tensor, resolution: int= 5000)-> float:
        '''This method determines the amount of work required to get one position to another in the field'''
        XInterval= (FinalPosition[0] - InitialPosition[0]) / resolution
        YInterval= (FinalPosition[1] - InitialPosition[1]) / resolution
        XPositions = [InitialPosition[0] + i * XInterval for i in range(resolution + 1)]
        YPositions = [InitialPosition[1] + i * YInterval for i in range(resolution + 1)]
        WorkDone = 0
        for i in range(resolution):
            PositionFieldStrength = self.ForceFieldStrength(T.Tensor([XPositions[i], YPositions[i]]))
            WorkDone += - (PositionFieldStrength[0]*XInterval + PositionFieldStrength[1]*YInterval)
        return WorkDone
    @EnforceMethodTyping
    def PlotField(self):
        'This funtion plots the 2D electric vector field'
        ObservationPosition= T.meshgrid(T.linspace(self.FieldLowBound, self.FieldHighBound, 50), 
                                        T.linspace(self.FieldLowBound, self.FieldHighBound, 50))
        ObservationPosition= T.stack(ObservationPosition)
        xd, yd = self.ElectricFieldStrength(ObservationPosition)
        xd = xd / T.sqrt(xd**2 + yd**2)
        yd = yd / T.sqrt(xd**2 + yd**2)
        color_aara = T.sqrt(xd**2+ yd**2)
        fig, ax = plt.subplots(1,1)
        cp = ax.quiver(ObservationPosition[0],ObservationPosition[1],xd,yd,color_aara)
        fig.colorbar(cp)
        plt.rcParams['figure.dpi'] = 250
        plt.show()



In [284]:
# shoul obey newtons laws in Homogenous vector field 
@dataclass
class Environment:
    Time: float # s
    @dataclass
    class State:
        pass
    InitialState: State 
    CurrentState: State 
 
    def __post_init__(self):
        pass
 
    @abstractmethod
    def TransitionModel(self, State: State, Action)-> State:
        pass

    @abstractmethod
    def RewardModel(self, State: State, Action, NextState: State, TerminalSignal: bool)-> float:
        pass

    @abstractmethod
    def IsTerminalCondition(self, State: State)-> bool:
        pass

    @abstractmethod
    def Step(self, State: State, Action)-> tuple[float, State, bool]:
        pass

    @abstractmethod
    def Run(self, RunDuration: float)-> list[State]:
        pass
    
@dataclass
class ParticleInField(EnforceClassTyping):
    '''This class represents the environment the agent will learn from. 
    
    The UppperBoundX, LowerBoundX, UpperBoundY, and LowerBoundY determine the dimensions of the viable learning region of the environment.
    The FieldType determines the physics/dynamics of the environment
    The FieldSources shape the field '''
    Field: ElectricField
    ChargedParticle: Particle
    Target: T.Tensor
    DistanceWeight: float= 0.5
    EnergyWeight: float= 0.5
    TerminalSignalWeight: float= 0.5
    @dataclass 
    class State(EnforceClassTyping):
        '''This class represents the state of the Agent with its Position, Momentum and the Field Strength if experiences at its Position. 
        These are parameters the agent is able to observe, they uniquely define the state of the agent.'''
        Position: T.Tensor # m
        Momentum: T.Tensor #kg*m/s
        Time: float # s
        def __add__(self, other):
            Position = self.Position + other.Position
            Momentum = self.Momentum + other.Momentum
            Time = self.Time + other.Time
            return self(Position, Momentum, Time)
        def Unwrap(self)->T.Tensor:
            '''This function converts the state parameters to a single tensor for processing. '''
            return T.cat([self.Position,
                          self.Momentum,
                          self.Time])
    InitialState: State = None
    CurrentState: State = None
    def __post_init__(self):
        if self.InitialState is None:
            self.InitialState= self.RandomState()
        self.CurrentState= self.InitialState

    def StateDynamics(self, State: State, Action: T.Tensor= T.tensor([0.0, 0.0])):
        PositionDynamics= State.Momentum/ self.ChargedParticle.Mass
        MomentumDynamics= (self.ChargedParticle.Charge* self.Field(State.Position))+Action
        TimeDynamics= 1.0
    
    @EnforceMethodTyping
    def TransitionModel(self, State: State, Action: T.Tensor= T.tensor([0.0, 0.0]), TimeInterval:float= 1, Resolution: int=30)-> State:
        '''This function determines how the state of the system changes after a given period given the agents state and parameters'''
        CurrentMomentum= State.Momentum
        CurrrentPosition= State.Position
        TimeTaken= 0
        for _ in range(Resolution):
            CurrentMomentum, CurrrentPosition= CurrentMomentum + ((self.ChargedParticle.Charge* self.Field(CurrrentPosition))+Action)*(TimeInterval/Resolution), CurrrentPosition+ (CurrentMomentum/ self.ChargedParticle.Mass)*(TimeInterval/Resolution)
            TimeTaken+= (TimeInterval/Resolution)
        CurrentTime= State.Time+ TimeTaken
        return self.State(CurrrentPosition, CurrentMomentum, CurrentTime)
    
    @EnforceMethodTyping
    def IsTerminalCondition(self, State: State)-> bool:
        '''This method determines if a position is within the viable learning region of the environment'''
        WithinXBound= self.Field.FieldLowBound[0] <= State.Position[0] <= self.Field.FieldHighBound[0]
        WithinYBound= self.Field.FieldLowBound[1] <= State.Position[1] <= self.Field.FieldHighBound[1]
        if WithinXBound or WithinYBound:
            return False    
        else:
            return True
    
    @EnforceMethodTyping
    def RewardModel(self, State: State, Action: T.Tensor, NextState: State, TerminalSignal: bool)-> float:
        '''This method determines how the agent is rewarded given a state transition. The reward determines the behaviour the agent should learn(i.e getting to the target and using the least amount of energy).'''
        DistanceGainedFromTarget= T.norm(State.Position-self.Target)- T.norm(NextState.Position-self.Target) 
        EnergyConsumed= self.Field.WorkDoneAgainstField(State.Position, NextState.Position)
        Cost= self.DistanceWeight* DistanceGainedFromTarget+ self.EnergyWeight* EnergyConsumed+ self.TerminalSignalWeight* TerminalSignal
        return -Cost.item()
    
    def Step(self, State: State, Action: T.Tensor):
        NextState= self.TransitionModel(State, Action)
        TerminalSignal= self.IsTerminalCondition(NextState)
        Reward= self.RewardModel(State, Action, NextState, TerminalSignal)
        return NextState, Reward, TerminalSignal
    
    @EnforceMethodTyping
    def RandomState(self)->State:
        '''This method generates a random state within the viable learning region'''
        RandomPosition= T.Tensor([np.random.uniform(self.Field.FieldLowBound, self.Field.FieldHighBound), 
                                  np.random.uniform(self.Field.FieldLowBound, self.Field.FieldHighBound)])
        RandomMomentum= T.zeros_like(RandomPosition)
        return self.State(RandomPosition, RandomMomentum, 0.0)
    
    def Render(self):
        pass

    def Run(self, RunDuration: float, Resolution: int=30):
        Path= []
        State= self.CurrentState
        Time= 0
        for _ in range(Resolution):
            Path.append(State.Position)
            State= self.TransitionModel(State)
            Time += (RunDuration/Resolution)
        return Path
    
    def PlotRun(self, RunDuration: float):
        Path= self.Run(RunDuration)
        Path= T.stack(Path)
        Path= Path.transpose(dim0=0, dim1=1)
        # print(Path)
        t=  T.arange(0, RunDuration)
        plt.plot(Path[0], Path[1])
        plt.plot(Path[0][0], Path[1][0], 'ko')
        plt.plot(Path[0][-1], Path[1][-1], 'r*')
        plt.xlim(-10,10)
        plt.ylim(-10,10)
        plt.grid(True)
        plt.show()

    def Reset(self):
        self.CurrentState= self.Start


In [None]:
@dataclass
class Agent:
    AgentEnvironment: Environment
    CriticModel: Callable
    ActorModel: Callable

    def __post_init__(self):
        pass
    @abstractmethod
    def Act(self, State: Environment.State)-> T.Tensor:
        pass
    @abstractmethod
    def Learn(self):
        pass
    @abstractmethod
    def LearningAlgorithm(self):
        pass

In [None]:
Electron= Particle(Mass=9.11e-8, Charge= -1.6e-9, Position=T.tensor([1.0, 0.0]), Velocity=T.tensor([0.0, 0.0]))
Proton= Particle(Mass=9.11e-8, Charge= 1.6e-9, Position=T.tensor([-1.0, 0.0]), Velocity=T.tensor([0.0, 0.0]))
Source= [Electron]
ElectricField1= ElectricField(Source, 10.0, -10.0)
# ElectricField1.PlotField()
Environment= ParticleInField(ElectricField1, Proton, T.tensor([1, 1]), 0.5, 0.5, 0.5)
Environment.PlotRun(20)