In [1]:
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
class JacksCarRentalEnvironment:
    
    def __init__(self):
        self._carparkA = CarPark(3,3)
        self._carparkB = CarPark(4,2)
        print([self._carparkA.getObservation(),self._carparkB.getObservation()])
        
    def reset(self,val=None):
        self._carparkA = CarPark(3,3,val)
        self._carparkB = CarPark(4,2,val)
        print([self._carparkA.getObservation(),self._carparkB.getObservation()])
        
        
    def step(self,val):
        # val > 0 Autos wandern von A nach B
        # val < 0 Autos wandern von B nach A
        if val < 0:
            a = self._carparkB
            b = self._carparkA
            val = -1*val
        else:
            a = self._carparkA
            b = self._carparkB
        # Zu aller erst werden die autos von jack bewegt    
        # An einem Tag kommen zuerst Kunden und leihen Autos
        # Dann werden Autos zurückgegeben
        # Dann kann jack entscheiden welche autos bewegt werden
        # Einfachster Fall val == 0
        carsRented = 0
        reward = 0.0
        done = False
        if val == 0:
            carsRented += a.carsRenting()
            carsRented += b.carsRenting()
            reward = 10.0*carsRented
            # Bevor Autos kommen auf available pruefen oder spaeter
            if 0 in [self._carparkA.getObservation(),self._carparkB.getObservation()]:
                done = True
            a.carsArriving()
            b.carsArriving()
        else:
            availableCars = a.removeCars(val)
            reward = -2.0*val
            b.addCars(availableCars)
            carsRented += a.carsRenting()
            carsRented += b.carsRenting()
            reward = reward + 10.0*carsRented
            if 0 in [self._carparkA.getObservation(),self._carparkB.getObservation()]:
                done = True
            a.carsArriving()
            b.carsArriving()
        observation = [self._carparkA.getObservation(),self._carparkB.getObservation()]
        return (observation,reward,done,'')
    

In [3]:
class CarPark:
    
    def __init__(self, lamRequest, lamReturn, val=None):
        self._lamRequest = lamRequest
        self._lamReturn = lamReturn
        self._maxSize = 20
        self._maxMove = 5
        if val == None:
            self._carsAvailable = np.random.randint(0,21,1)[0]
        else:
            self._carsAvailable = val
        
        
        
    def getObservation(self):
        return self._carsAvailable
    
    def carsArriving(self):
        r = np.random.poisson(self._lamReturn)
        if self._carsAvailable + r > self._maxSize:
            self._carsAvailable = self._maxSize
        else:
            self._carsAvailable += r
    
    def carsRenting(self):
        carsRented = np.random.poisson(self._lamRequest)
        if self._carsAvailable - carsRented < 0:
            carsRented = self._carsAvailable
            self._carsAvailable = 0
        else:
            self._carsAvailable -= carsRented
        return carsRented
           
    def addCars(self,amount):
        if amount > self._maxMove:
            raise Exception('Zu viele Autos')
        elif amount < 0:
            raise Exception('Du wolltest autos hinzufuegen')
        self._carsAvailable += amount
        if self._carsAvailable > self._maxSize:
            self._carsAvailable = self._maxSize
            
    def removeCars(self,amount):
        if amount > self._maxMove:
            raise Exception('Zu viele Autos')
        elif amount < 0:
            raise Exception('Du')
        if self._carsAvailable - amount < 0:
            amount = self._carsAvailable
            self._carsAvailable = 0
        else:
            self._carsAvailable -= amount
        return amount

In [63]:
env = JacksCarRentalEnvironment()

[0, 7]


In [35]:
env.step(5)

([2, 3], 22.0, True, '')

In [36]:
env.step(-3)

([7, 1], 14.0, True, '')

In [37]:
env.step(-3)

([9, 3], 18.0, True, '')

In [64]:
env.reset(0)

[0, 0]


In [65]:
env.step(5)

([1, 4], -10.0, True, '')

In [4]:
import unittest
class TestJacksCarRentalEnvironment(unittest.TestCase):
    # there are a lot of magic numbers in this test
    # these numbers depending on the env parameters!
    
    def setUp(self):
        self.env = JacksCarRentalEnvironment()
        
    def _avg_mean(self, cars, action, t_mean, max_diff=1.):
        env = self.env
        n = 10000
        r_mean = 0
        for i in range(n):
            env.reset(cars)
            _, r, _,_ = env.step(action)
            r_mean += r
        r_mean = r_mean / n
        diff = np.abs(r_mean - t_mean)
        self.assertTrue(diff < max_diff)
    
    def test_rent0(self):
        self._avg_mean(20, 0, 70)
        
    def test_rent1(self):
        self._avg_mean(10, 0, 70)    
        
    def test_nightly_moves(self):
        env.reset(0)
        _, r, _,_ = env.step(5)
        self.assertTrue(r==-10.)
        
    def _avg_nb_cars(self, a_desired, b_desired, 
                     cars, action):
        
        env = self.env
        n = 10000
        a_mean = 0
        b_mean = 0
        for i in range(n):
            env.reset(cars)
            ab, _, _,_ = env.step(action)
            a_mean += ab[0]
            b_mean += ab[1]
        a_diff = np.abs((a_mean / n) - a_desired)
        b_diff = np.abs((b_mean / n) - b_desired)
        self.assertTrue(a_diff < 0.5)
        self.assertTrue(b_diff < 0.5)
    
    def test_nb_cars_10_0(self):
        a_desired = 10
        b_desired = 8 
        cars = 10
        action = 0
        self._avg_nb_cars(a_desired, b_desired, 
                     cars, action)
        
    def test_nb_cars_10_3(self):
        a_desired = 7
        b_desired = 11 
        cars = 10
        action = 3
        self._avg_nb_cars(a_desired, b_desired, 
                     cars, action)
        
    def test_nb_cars_10_m2(self):
        a_desired = 12
        b_desired = 6 
        cars = 10
        action = -2
        self._avg_nb_cars(a_desired, b_desired, 
                     cars, action)

In [5]:
unittest.main(argv=['first-arg-is-ignored'], exit=False)

[15, 18]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[

.


[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]


.

[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[

.E


[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]


.

[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[20, 20]
[

.


[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]
[10, 10]



ERROR: test_nightly_moves (__main__.TestJacksCarRentalEnvironment)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-4-d247c7c8d771>", line 28, in test_nightly_moves
    env.reset(0)
NameError: name 'env' is not defined

----------------------------------------------------------------------
Ran 6 tests in 5.411s

FAILED (errors=1)


<unittest.main.TestProgram at 0x10eacaac8>

In [None]:
# pro car rental 20 Zustaende --> 2 Car rentals --> 20^2 --> 400 Zustaende
# pro Zustand 10 Aktionen moegliche
# Von allen Zustaenden kann man zu allen Zustaenden gelange --> P , abhaengig von der Policy 