In [2]:
import gym
from src.driver import Driver
from src.agents.random import Random
from src.agents.qlearner import Qlearner
from src.agents.tdlearner import TDlearner

'''
USAGE INSTRUCTIONS

At the bottom of this file, these functions defining a combination of an agent
and an environment are invoked. All but one are commented out. Choose which
agent and environment you want to run, and uncomment that line. For example,
to see the Qlearner agent operating the Taxi environment, uncomment:

    #taxi_qlearner()

It is recommended you leave all other function invocations commented out when
you run this file, as it will be faster and you will only see the output you
are interested in.
'''

def taxi_random():
    agent = Random()
    driver = Driver({
        'epochs': 1000,
        'env': gym.make('Taxi-v3'),
        'agent': agent,
    })
    driver.run_taxi_random()

def taxi_qlearner():
    agent = Qlearner({
        'alpha': 0.1,
        'gamma': 0.6,
        'epsilon': 0.1,
    })
    driver = Driver({
        'epochs': 10000,
        'env': gym.make('Taxi-v3'),
        'agent': agent,
    })
    driver.run_taxi_qlearner()

def cartpole_random():
    agent = Random()
    driver = Driver({
        'epochs': 1000,
        'env': gym.make('CartPole-v1'),
        'agent': agent,
    })
    driver.run_cartpole_random()

def cartpole_qlearner():
    agent = Qlearner({
        'alpha': 0.2,
        'gamma': 0.5,
        'epsilon': 0.1,
    })
    driver = Driver({
        'epochs': 50000,
        'env': gym.make('CartPole-v1'),
        'agent': agent,
    })
    driver.run_cartpole_qlearner()

def cartpole_tdlearner():
    agent = TDlearner({
        'alpha': 0.2,
        'gamma': 0.5,
        'epsilon': 0.1,
    })
    driver = Driver({
        'epochs': 50000,
        'env': gym.make('CartPole-v1'),
        'agent': agent,
    })
    driver.run_cartpole_tdlearner()

def frozen_lake_random():
    agent = Random()
    driver = Driver({
        'epochs': 1000,
        'env': gym.make('FrozenLake-v0'),
        'agent': agent,
    })
    driver.run_frozen_lake_random()

def frozen_lake_qlearner():
    agent = Qlearner({
        'alpha': 0.1,
        'gamma': 0.6,
        'epsilon': 0.3,
    })
    driver = Driver({
        'epochs': 10000,
        'env': gym.make('FrozenLake-v0'),
        'agent': agent,
    })
    driver.run_frozen_lake_qlearner()

def frozen_lake_tdlearner():
    agent = TDlearner({
        'alpha': 0.1,
        'gamma': 0.6,
        'epsilon': 0.3,
    })
    driver = Driver({
        'epochs': 10000,
        'env': gym.make('FrozenLake-v0'),
        'agent': agent,
    })
    driver.run_frozen_lake_tdlearner()

if __name__ == '__main__':
    #taxi_random()
    #taxi_qlearner()
    #cartpole_random()
    #cartpole_qlearner()
    #cartpole_tdlearner()
    #frozen_lake_random()
    #frozen_lake_qlearner()
    #frozen_lake_tdlearner()



progress: 10%
progress: 20%
progress: 30%
progress: 40%
progress: 50%
progress: 60%
progress: 70%
progress: 80%
progress: 90%
progress: 100%
Step: 0 | Cumulative Reward: 0
RENDERING...

[41mS[0mFFF
FHFH
FFFH
HFFG
observation:  0
action:  1
reward:  0
Step: 1 | Cumulative Reward: 0.0
RENDERING...
  (Down)
S[41mF[0mFF
FHFH
FFFH
HFFG
observation:  1
action:  3
reward:  0.0
Step: 2 | Cumulative Reward: 0.0
RENDERING...
  (Up)
[41mS[0mFFF
FHFH
FFFH
HFFG
observation:  0
action:  1
reward:  0.0
Step: 3 | Cumulative Reward: 0.0
RENDERING...
  (Down)
S[41mF[0mFF
FHFH
FFFH
HFFG
observation:  1
action:  3
reward:  0.0
Step: 4 | Cumulative Reward: 0.0
RENDERING...
  (Up)
S[41mF[0mFF
FHFH
FFFH
HFFG
observation:  1
action:  3
reward:  0.0
Step: 5 | Cumulative Reward: 0.0
RENDERING...
  (Up)
SF[41mF[0mF
FHFH
FFFH
HFFG
observation:  2
action:  0
reward:  0.0
Step: 6 | Cumulative Reward: 0.0
RENDERING...
  (Left)
SFFF
FH[41mF[0mH
FFFH
HFFG
observation:  6
action:  1
reward:  0.0
Step: 7 |