# Exercise 1
## Finite Horizon Optimal Control
a. Given dynamical system
$$
x_{n+1} = 
\begin{cases} 
  -x_n + 1 + u_n & \text{if } -2 \leq -x_n + 1 + u_n \leq 2 \\
  2 & \text{if } -x_n + 1 + u_n > 2 \\
  -2 &  \text{else}
\end{cases}
$$
The cost function is given by
$$
J = \sum_{k=0}^{2} \left(2 |x_k| + |u_k|\right) + x_3^2
$$

In [1]:
from typing  import Tuple, Optional, List
from math import inf

In [2]:
x_space = [i for i in range(-2,3,1)]
u_space = [i for i in range(-1,2,1)]

In [3]:
def Update_State(x_n, u_n, w = 1):
    if u_n is None:
        print("Invalid Control! None is not a valid control argument")
        return None
    if (-2 <= -x_n + w + u_n  <= 2):
        x_n1 = -x_n + w + u_n 
    elif (-x_n + 1 + u_n  > 2):
        x_n1 = 2
    else:
        x_n1 = -2
    return x_n1

def Cost(x, u):
    return 2*abs(x) + abs(u)


In [4]:
class Optimal_State:
    def __init__(self, state: int, cost: float, control: Optional[int]):
        # Initialize the state, cost, and control input for the optimal policy
        self.state = state  
        self.cost = cost    
        self.control = control  

class FiniteHorizonDP:
    def __init__(self, num_stages: int, states: List[int]):
        # Initialize container as a list of lists, with each list representing a stage
        self.container = [[None for _ in states] for _ in range(num_stages)]
        self.num_stages = num_stages
        self.states = states

    def recall(self, stage: int, x: int) -> Optional[Optimal_State]:
        # Retrieve the container entry for a specific stage and state
        state_index = self.states.index(x)
        return self.container[stage][state_index]

    def store(self, stage: int, x: int, cost: float, control: Optional[int]):
        # Create and store a container entry for a specific stage and state
        state_index = self.states.index(x)
        self.container[stage][state_index] = Optimal_State(state=x, cost=cost, control=control)

    # Define the cost-to-go function using dynamic programming where w is constant
    def cost_to_go(self, stage: int, x: int) -> Tuple[float, Optional[int]]:
        # Check if the cost for the current stage and state is already memoized
        container_recall = self.recall(stage, x)
        if container_recall is not None:
            return container_recall.cost, container_recall.control

        if stage == self.num_stages - 1:
            # Base case: final stage, cost is x squared, no control input
            j_min = x ** 2
            u_opt = None
            self.store(stage, x, j_min, u_opt)
            return j_min, u_opt

        # Initialize minimum cost to infinity and no optimal control
        j_min = inf
        u_opt = None

        # Iterate through all possible control inputs to find the optimal one
        for u_i in u_space:
            # Compute the next state based on current state and control input
            x1 = Update_State(x_n=x, u_n=u_i)
            # Compute the total cost: current cost + cost to go from the next state
            j_i = Cost(x=x, u=u_i) + self.cost_to_go(stage=stage + 1, x=x1)[0]
            # Update the minimum cost and optimal control if a lower cost is found
            if j_i < j_min:
                j_min = j_i
                u_opt = u_i

        # Memoize the computed minimum cost and optimal control for the current stage and state
        self.store(stage, x, j_min, u_opt)
        return j_min, u_opt
    
    # Define the cost-to-go function using dynamic programming where w has a probability to be 0 or 1
    def cost_to_go_prob(self, stage:int, x:int) -> Tuple[float, Optional[int]]:
        # Check iif the cost of the current stage and state is already memoized
        container_recall = self.recall(stage, x)
        if container_recall is not None:
            return container_recall.cost, container_recall.control
        if stage == self.num_stages - 1:
            # Base case: final stage, cost is x squared, no control input
            j_min = x ** 2
            u_opt = None
            self.store(stage, x, j_min, u_opt)
            return j_min, u_opt

        # Initialize minimum cost to infinity and no optimal control
        j_min = inf
        u_opt = None

        # Iterate through all possible control inputs to find the optimal one
        for u_i in u_space:
            x1_w0 = Update_State(x_n=x, u_n=u_i, w=0)
            x1_w1 = Update_State(x_n=x, u_n=u_i, w=1)
            # Compute the total cost: current cost + cost to go from the next state
            j_i = Cost(x=x, u=u_i) + 0.4*self.cost_to_go_prob(stage=stage + 1, x=x1_w0)[0] + 0.6*self.cost_to_go_prob(stage=stage + 1, x=x1_w1)[0]

            # Update the minimum cost and optimal control if a lower cost is found
            if j_i < j_min:
                j_min = j_i
                u_opt = u_i
        # Memoize the computed minimum cost and optimal control for the current stage and state
        self.store(stage, x, j_min, u_opt)
        return j_min, u_opt

    def display_costs_and_controls(self):
        # Define the header with columns for each stage
        header = ["State"]
        for stage in range(self.num_stages):
            header.append(f"J_{stage}")
            header.append(f"u_{stage}")

        # Print the header with formatted spacing
        print("{:<6} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(*header))
        print("-" * 80)

        # Iterate through each possible state and compute costs and optimal controls
        for x in self.states:
            row = [x]
            for stage in range(self.num_stages):
                j, u = self.cost_to_go(stage=stage, x=x)
                row.append(j)
                row.append(u)

            # Format and print the row with aligned columns
            print("{:<6} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(*row))

    
    def display_costs_and_controls_prob(self):
        # Define the header with columns for each stage
        header = ["State"]
        for stage in range(self.num_stages):
            header.append(f"J_{stage}")
            header.append(f"u_{stage}")

        # Print the header with formatted spacing
        print("{:<6} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(*header))
        print("-" * 80)

        # Iterate through each possible state and compute costs and optimal controls
        for x in self.states:
            row = [x]
            for stage in range(self.num_stages):
                j, u = self.cost_to_go_prob(stage=stage, x=x)
                row.append(f"{j:.3f}")
                row.append(u)

            # Format and print the row with aligned columns
            print("{:<6} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10} {:<10}".format(*row))



Optimal_Policy = FiniteHorizonDP(4, x_space)
Optimal_Policy.display_costs_and_controls()


State  J_0        u_0        J_1        u_1        J_2        u_2        J_3       
--------------------------------------------------------------------------------
-2     10         0          9          0          8          0          4         
-1     6          -1         5          -1         4          -1         1         
0      3          -1         2          -1         1          -1         0         
1      4          0          3          0          2          0          1         
2      7          1          6          1          5          0          4         


b. What is the sequence of control actions, states and the optimal cost if $x_0 = 0$, if $x_0 = −2$ and if $x_0 = 2$.

In [5]:
# Function to extract stagewise optimzed policy
def Stagewise(x):
    for i in range(4):
        if i == 3:
            print(f"Stage 4: x = {x}, J = {Optimal_Policy.cost_to_go(i, x)[0]}")
            break
        u = Optimal_Policy.cost_to_go(i, x)[1]
        J = Optimal_Policy.cost_to_go(i, x)[0]
        x1 = Update_State(x, u)
        print(f"Stage {i}: x = {x}, J = {J}, u = {u}")
        x = x1


# Extracting stagewise optimized policy for different initial states
print("Stagewise Policy for Initial State x0 = 0")
Stagewise(0)
print("\n")
print("Stagewise Policy for Initial State x0 = -2")
Stagewise(-2)
print("\n")
print("Stagewise Policy for Initial State x0 = 2")
Stagewise(2)

Stagewise Policy for Initial State x0 = 0
Stage 0: x = 0, J = 3, u = -1
Stage 1: x = 0, J = 2, u = -1
Stage 2: x = 0, J = 1, u = -1
Stage 4: x = 0, J = 0


Stagewise Policy for Initial State x0 = -2
Stage 0: x = -2, J = 10, u = 0
Stage 1: x = 2, J = 6, u = 1
Stage 2: x = 0, J = 1, u = -1
Stage 4: x = 0, J = 0


Stagewise Policy for Initial State x0 = 2
Stage 0: x = 2, J = 7, u = 1
Stage 1: x = 0, J = 2, u = -1
Stage 2: x = 0, J = 1, u = -1
Stage 4: x = 0, J = 0


c. Assume that the constant term 1 in the previous dynamics can sometimes be 0 with probability 0.4. The new dynamics is given by:
$$
x_{n+1} = 
\begin{cases} 
  -x_n + w_n + u_n & \text{if } -2 \leq -x_n + w_n + u_n \leq 2 \\
  2 & \text{if } -x_n + w_n + u_n > 2 \\
  -2 & \text{else}
\end{cases}
$$
where $x_n$ $\in$ \{−2, −1, 0, 1, 2\}, $u_n$ $\in$ \{−1, 0, 1\} and $\omega_n$ ∈ {0, 1} is a random variable with probability
distribution p($\omega_n$ = 0) = 0.4, p($\omega_n$ = 1) = 0.6. The cost function to minimize becomes
$$
J = \mathbb{E} \left [\sum_{k=0}^{2} \left(2 |x_k| + |u_k|\right) + x_3^2\right]
$$


In [6]:
# class Optimal_State_Prob:
#     def __init__(self, num_stages: int, states: List[int]):
#         # Initialize container as a list of lists, with each list representing a stage
#         self.container = [[None for _ in states] for _ in range(num_stages)]
#         self.num_stages = num_stages
#         self.states = states

In [7]:
Optimal_Policy_Prob = FiniteHorizonDP(4, x_space)
Optimal_Policy_Prob.display_costs_and_controls_prob()

State  J_0        u_0        J_1        u_1        J_2        u_2        J_3       
--------------------------------------------------------------------------------
-2     10.600     -1         9.200      -1         7.800      -1         4.000     
-1     5.952      -1         4.680      -1         3.600      -1         1.000     
0      2.952      0          1.680      0          0.600      0          0.000     
1      4.880      0          3.800      0          2.400      0          1.000     
2      7.880      1          6.800      1          5.400      1          4.000     
