In [1]:
import numpy as np
import pandas as pd
import scipy
from typing import TypeVar,Mapping, Set, Generic, Sequence, Callable

###  RL interface with value function approximation

In [None]:
class MDPforRL_FA():
    
    # note that state and actions are defined as int in this part 
    def __init__(self, 
                 state_action_simulator: Callable[[int], int], 
                 state_reward_simulator: Callable[[(int,int)], int],
                 init_state: Callable[[], int]
                 gamma: float) -> None:
        super(MDPforRL_FA, self).__init__()

        self.init_state = init_state
        self.state_action_func = state_action_simulator
        self.state_reward_func = state_reward_simulator
        self.gamma = gamma
    
    def gen_init_state(self): 
        return self.init_state()
    
    def gen_init_action(self):
        return self.state_action_func()
    
    def gen_next_state_reward(self,S: int,A: int):
        return self.state_reward_func(S,A)
    
class FA_RL_interface():

    def __init__(self, mdp: MDPforRL_FA):
        super(FA_RL_interface).__init__()
        self.mdp = mdp
    
    # Generate initial step
    def init_state_gen(self) -> int:
        return mdp.gen_init_state()
    def init_action_gen(self) -> int:
        return mdp.gen_init_action()
    
    # Generate next step
    def next_state_gen(self, cur_state: int, cur_act: int) -> tuple:
        return mdp.gen_next_state_reward(cur_state,cur_act)
    

### Monte-Carlo Prediction algorithm with Value Function approximation

### TD Prediction algorithm with Value Function approximation

### TD($\lambda$) Prediction algorithm with Value Function approximation

### SARSA with Value Function approximation

### Q-Learning with Value Function approximation

### Test implementions