In [1]:
################################### CHAPTER 2 ################################

# Imports
from random import randint
from abc import ABC, abstractmethod
from dataclasses import dataclass, replace
from typing import Generic, TypeVar
import statistics

##### Naive method
def six_sided():
    # distributions hard coded
    return randint(1, 6)

print(six_sided())

def roll_dice():
    # can only sample in a limited manner
    return six_sided() + six_sided() 

print(roll_dice())

##### Using a class to define an interface
class Distribution(ABC):
    @abstractmethod
    def sample(self):
        pass

# Use the abstract "Distribution" class to generate a concrete class "Die"
class Die(Distribution):
    # Initialize as a uniform over n=sides
    def __init__(self, sides):
        self.sides = sides
    # Define attribute "sample" that returns realization of Die
    def sample(self):
        return randint(1, self.sides)
    # In a class - can choose what calling the class returns
    def __repr__(self):
        return f"Die(sides={self.sides})" 
    # Easier bugfixes and removes errors in comparisons to other objects
        # Returns self as function definition
    def __eq__(self, other):
        if isinstance(other, Die):
            return self.sides == other.sides
        return False

# Generate class
six_sided = Die(6)

# Sample using class
print(six_sided.sample())

# Generate function to roll 2 dice
def roll_dice():
    return six_sided.sample() + six_sided.sample()
print(roll_dice())

# Changed due to __repr__ in class definition for debugging
print(six_sided)

# Comparison of different instances of the class
print(six_sided==six_sided)
# Changed due to __eq__ in class definition
print(six_sided==Die(6))
print(Die(6)==Die(6)) 
print(Die(6)==None)

###### Defining all these things is tedious
# Use decorator for dataclass to avoid all of this - frozen prevents changing parameters
@dataclass (frozen=True)
class Die(Distribution):
    # Restricts parameter sides: to be an int
    sides: int
    # Defines the sample attribute
    def sample(self):
        return randint(1, self.sides)
    
# Generate class
six_sided = Die(6)

# Sample using class
print(six_sided.sample())

# Generate function to roll 2 dice
def roll_dice():
    return six_sided.sample() + six_sided.sample()
print(roll_dice())

# Changed due to __repr__ in class definition for debugging
print(six_sided)

# Comparison of different instances of the class
print(six_sided==six_sided)
# Changed due to __eq__ in class definition
print(six_sided==Die(6))
print(Die(6)==Die(6)) 
print(Die(6)==None)

# Changed due to frozen - raises error
# six_sided.sides = 10

# Instead we must generate a new copy - easy with replace from dataclass
d20 = replace(six_sided, sides=20)
print(d20)
# Finally - immutable objects from frozen lets us use immutable objects as dict keys and set elements
    # Essentially function as plain data - and not function references


###### Important to add type annotations - but sometimes unclear what the type is

# A type variable named ”A”
A = TypeVar("A")
            
# Distribution is ”generic in A”
class Distribution(ABC, Generic[A]):
    # Sampling must produce a value of type A
    @abstractmethod
    def sample(self) -> A:
        pass

# Can now specify what type of Distribution we generate
@dataclass (frozen=True)
class Die(Distribution[int]):
    # Defines the sample attribute
    def sample(self):
        return random.randint(1, self.sides)
    
def expected_value(d: Distribution[float], n: int = 100) -> float:
    return statistics.mean(d.sample() for _ in range(n))

3
12
3
5
Die(sides=6)
True
True
True
False
2
7
Die(sides=6)
True
True
True
False
Die(sides=20)


In [None]:
################################### CHAPTER 4 ################################

# Imports
import numpy as np
from abc import ABC, abstractmethod
from dataclasses import dataclass, replace
from typing import Generic, TypeVar
import statistics

from rl.distribution import Choose, Distribution, Callable, Constant, Iterable, SampledDistribution
from rl.dynamic_programming import NonTerminal


A = TypeVar('A')    # Generic action space
S = TypeVar('S')    # Generic state space
            
# Define a class for policies
class Policy(ABC, Generic[S, A]):   #Takes a generic state and action space

    # Calling act with a non-terminal state yields a distribution of actions
    @abstractmethod
    def act(self, state: NonTerminal[S]) -> Distribution[A]:
        pass

# Define a class for deterministic policies using the generic policy class
@dataclass(frozen=True)
class DeterministicPolicy(Policy[S, A]):
    action_for: Callable[[S], A]

    # Calling act with a non-terminal state yields a constant action A
    def act(self, state: NonTerminal[S]) -> Constant[A]:
        return Constant(self.action_for(state.state))

# We can also generate policies that assign equal probability to each non-terminal policy
@dataclass(frozen=True)
class UniformPolicy(Policy[S, A]):
    valid_actions: Callable[[S], Iterable[A]]

    # Only difference is this Choose[A] command
    def act(self, state: NonTerminal[S]) -> Choose[A]:
        return Choose(self.valid_actions(state.state))
    

# Define a class defining the inventory state
@dataclass(frozen=True)
class InventoryState:
    on_hand: int
    on_order: int

    def inventory_position(self) -> int:
        return self.on_hand + self.on_order
    
# Define a class to 
class SimpleInventoryDeterministicPolicy(
        DeterministicPolicy[InventoryState, int]
        ):
            
    def __init__(self, reorder_point: int):
        self.reorder_point: int = reorder_point

    def action_for(s: InventoryState) -> int:
        return max(self.reorder_point - s.inventory_position(), 0)
        super().__init__(action_for)

si_dp = SimpleInventoryDeterministicPolicy(reorder_point=8)

class SimpleInventoryStochasticPolicy(Policy[InventoryState, int]):

    def __init__(self, reorder_point_poisson_mean: float):
        self.reorder_point_poisson_mean: float = reorder_point_poisson_mean

    def act(self, state: NonTerminal[InventoryState]) -> SampledDistribution[int]:
        def action_func(state=state) -> int:
            reorder_point_sample: int = np.random.poisson(self.reorder_point_poisson_mean)
            return max(reorder_point_sample - state.state.inventory_position(),0)
        return SampledDistribution(action_func)

si_sp = SimpleInventoryStochasticPolicy(reorder_point_poisson_mean=8)


TypeError: action_for() takes 1 positional argument but 2 were given