In [2]:
#1 A)

import numpy as np

# Define the state and control ranges
x_range = [-2, -1, 0, 1, 2]
u_range = [-1, 0, 1]

# Define the state transition function
def state_transition(x_n, u_n):
    result = -x_n + 1 + u_n
    if -2 <= result <= 2:
        return result
    elif result > 2:
        return 2
    else:
        return -2

# Define the cost function for a single step
def single_step_cost(x, u):
    return 2 * abs(x) + abs(u)

# Define the terminal cost function
def terminal_cost(x):
    return x**2

# Initialize cost-to-go and control matrices for each step (backward dynamic programming)
N = 3  # horizon length
cost_to_go = [{} for _ in range(N+1)]
optimal_control = [{} for _ in range(N)]

# Terminal cost-to-go for each state
for x in x_range:
    cost_to_go[N][x] = terminal_cost(x)

# Perform backward induction
for n in range(N-1, -1, -1):
    for x in x_range:
        min_cost = float('inf')
        best_u = None
        for u in u_range:
            x_next = state_transition(x, u)
            total_cost = single_step_cost(x, u) + cost_to_go[n+1][x_next]
            if total_cost < min_cost:
                min_cost = total_cost
                best_u = u
        cost_to_go[n][x] = min_cost
        optimal_control[n][x] = best_u

# Display results
print("Stage-by-Stage Optimal Costs and Controls:")
for n in range(N):
    print(f"\nStep {n}:")
    print("State\tCost-to-Go(J)\tOptimal Control(u)")
    for x in x_range:
        print(f"{x}\t{cost_to_go[n][x]}\t\t{optimal_control[n][x]}")

# Terminal step costs
print("\nStep 3 (Terminal):")
print("State\tCost-to-Go")
for x in x_range:
    print(f"{x}\t{cost_to_go[3][x]}")


Stage-by-Stage Optimal Costs and Controls:

Step 0:
State	Cost-to-Go(J)	Optimal Control(u)
-2	10		0
-1	6		-1
0	3		-1
1	4		0
2	7		1

Step 1:
State	Cost-to-Go(J)	Optimal Control(u)
-2	9		0
-1	5		-1
0	2		-1
1	3		0
2	6		1

Step 2:
State	Cost-to-Go(J)	Optimal Control(u)
-2	8		0
-1	4		-1
0	1		-1
1	2		0
2	5		0

Step 3 (Terminal):
State	Cost-to-Go
-2	4
-1	1
0	0
1	1
2	4


In [3]:
#1b)

###### ANSWER


# Simulate the system and print the sequence of states and controls for multiple initial states
def simulate_system(initial_states, optimal_control):
    for x0 in initial_states:
        x = x0
        trajectory = [x0]
        print(f"\nInitial state: {x0}")
        for i in range(0, 3):
            u = optimal_control[i][x]
            print(f"For x = {x}, u = {u} so x+ = {state_transition(x, u)} and has the cost as  J = {cost_to_go[i][x]}")
            x = state_transition(x, u)
            trajectory.append(x)
        print(f"For x = {x} with J = {cost_to_go[3][x]}")
        print(f"Trajectory: {trajectory}\n")

# Example simulation for multiple starting states
initial_states = [2, 0, -2]  # Change these values for different starting states
simulate_system(initial_states, optimal_control)


Initial state: 2
For x = 2, u = 1 so x+ = 0 and has the cost as  J = 7
For x = 0, u = -1 so x+ = 0 and has the cost as  J = 2
For x = 0, u = -1 so x+ = 0 and has the cost as  J = 1
For x = 0 with J = 0
Trajectory: [2, 0, 0, 0]


Initial state: 0
For x = 0, u = -1 so x+ = 0 and has the cost as  J = 3
For x = 0, u = -1 so x+ = 0 and has the cost as  J = 2
For x = 0, u = -1 so x+ = 0 and has the cost as  J = 1
For x = 0 with J = 0
Trajectory: [0, 0, 0, 0]


Initial state: -2
For x = -2, u = 0 so x+ = 2 and has the cost as  J = 10
For x = 2, u = 1 so x+ = 0 and has the cost as  J = 6
For x = 0, u = -1 so x+ = 0 and has the cost as  J = 1
For x = 0 with J = 0
Trajectory: [-2, 2, 0, 0]



In [1]:
#1C)


import numpy as np

# Define the state and control ranges
x_range = [-2, -1, 0, 1, 2]
u_range = [-1, 0, 1]

# Define the state transition function with randomness
def state_transition(x_n, u_n, omega_n):
    result = -x_n + omega_n + u_n
    if -2 <= result <= 2:
        return result
    elif result > 2:
        return 2
    else:
        return -2

# Define the cost function for a single step
def single_step_cost(x, u):
    return 2 * abs(x) + abs(u)

# Define the terminal cost function
def terminal_cost(x):
    return x**2

# Define the probability of omega_n
prob_omega = {0: 0.4, 1: 0.6}

# Initialize cost-to-go and control matrices for each step (backward dynamic programming)
N = 3  # horizon length
cost_to_go = [{} for _ in range(N+1)]
optimal_control = [{} for _ in range(N)]

# Terminal cost-to-go for each state
for x in x_range:
    cost_to_go[N][x] = terminal_cost(x)

# Perform backward induction
for n in range(N-1, -1, -1):
    for x in x_range:
        min_cost = float('inf')
        best_u = None
        for u in u_range:
            expected_cost = 0
            for omega, prob in prob_omega.items():
                x_next = state_transition(x, u, omega)
                expected_cost += prob * (single_step_cost(x, u) + cost_to_go[n+1][x_next])
            if expected_cost < min_cost:
                min_cost = expected_cost
                best_u = u
        cost_to_go[n][x] = min_cost
        optimal_control[n][x] = best_u

# Display results
print("Stage-by-Stage Optimal Costs and Controls:")
for n in range(N):
    print(f"\nStep {n}:")
    print("State\tCost-to-Go\tOptimal Control")
    for x in x_range:
        print(f"{x}\t{cost_to_go[n][x]}\t\t{optimal_control[n][x]}")

# Terminal step costs
print("\nStep 3 (Terminal):")
print("State\tCost-to-Go")
for x in x_range:
    print(f"{x}\t{cost_to_go[3][x]}")



Stage-by-Stage Optimal Costs and Controls:

Step 0:
State	Cost-to-Go	Optimal Control
-2	10.600000000000001		-1
-1	5.952		-1
0	2.952		0
1	4.880000000000001		0
2	7.88		1

Step 1:
State	Cost-to-Go	Optimal Control
-2	9.200000000000001		-1
-1	4.680000000000001		-1
0	1.6800000000000002		0
1	3.8		0
2	6.8		1

Step 2:
State	Cost-to-Go	Optimal Control
-2	7.8		-1
-1	3.6		-1
0	0.6		0
1	2.4000000000000004		0
2	5.4		1

Step 3 (Terminal):
State	Cost-to-Go
-2	4
-1	1
0	0
1	1
2	4
