### TD($\lambda$) prediction algorithm 

In [1]:
from typing import Iterable, Iterator, TypeVar, List, Sequence, Mapping
from rl.function_approx import Gradient
import rl.markov_process as mp
from rl.markov_decision_process import NonTerminal
import numpy as np
from rl.approximate_dynamic_programming import ValueFunctionApprox
from rl.approximate_dynamic_programming import extended_vf

S = TypeVar('S')

#### Tabular Case

In [2]:
def td_lambda_prediction_tab(
        traces: Iterable[Iterable[mp.TransitionStep[S]]],
        gamma: float,
        lambd: float,
        alpha: float
) -> Iterator[Mapping[S, float]]:
    
    vf: Mapping[S, float] = dict() # State VF approximation

    yield vf
    for trace in traces:
        el_tr: Mapping[S, float] = dict() # Eligibility trace

        trace_seq: Sequence[mp.TransitionStep[S]] = list(trace)
        for t, step in enumerate(trace_seq):
            x: NonTerminal[S] = step.state
            el_tr[x.state] = lambd * gamma * el_tr.get(x.state, 0) + 1
            y: float = step.reward + gamma * vf.get(step.next_state.state, 0) - vf.get(x.state, 0)
            for state in vf.keys():
                vf[state] = vf.get(state, 0) + alpha * y * el_tr.get(state, 0)

            yield vf

#### Function Approximation Case

In [None]:

def td_lambda_prediction_func_approx(
        traces: Iterable[Iterable[mp.TransitionStep[S]]],
        approx_0: ValueFunctionApprox[S],
        γ: float,
        lambd: float
) -> Iterator[ValueFunctionApprox[S]]:
    func_approx: ValueFunctionApprox[S] = approx_0
    yield func_approx

    for trace in traces:
        el_tr: Gradient[ValueFunctionApprox[S]] = Gradient(func_approx).zero()
        for step in trace:
            x: NonTerminal[S] = step.state
            y: float = step.reward + γ * \
                extended_vf(func_approx, step.next_state)
            el_tr = el_tr * (γ * lambd) + func_approx.objective_gradient(
                xy_vals_seq=[(x, y)],
                obj_deriv_out_fun=lambda x1, y1: np.ones(len(x1))
            )
            func_approx = func_approx.update_with_gradient(
                el_tr * (func_approx(x) - y)
            )
            yield func_approx