In [2]:
import numpy as np

rows,cols=3,4
states=[(i,j) for i in range(rows) for j in range(cols)]
wall=(1,2)
goal=(0,3)
danger=(1,3)
states.remove(wall)
actions=["UP","DOWN","LEFT","RIGHT"]

def reward(state):
    if state==goal:
        return 1.0
    elif state==danger:
        return -1.0
    else:
        return -0.04



# Step 2: Transition Model (80-10-10)
def next_state(state,action):
    """Deterministic transition before adding stochasticity."""
    i,j=state
    if action=="UP":
        i=max(i-1,0)
    elif action=="DOWN":
        i=min(i+1,rows-1)
    elif action=="LEWrFT":
        j=max(j-1,0)
    elif action=="RIGHT":
        j=min(j+1,cols-1)
    if (i,j)==wall:
        return state
    return (i,j)

def transition_probabilities(state,action):
    """Stochastic transition model with 80-10-10 rule."""
    if state in [goal,danger]:
        return {state:1.0}
    probs={}
    intended=next_state(state,action)
    if action=="UP":
        left,right="LEFT","RIGHT"
    elif action=="DOWN":
        left,right="RIGHT","LEFT"
    elif action=="LEFT":
        left,right="DOWN","UP"
    else: # RIGHT
        left,right="UP","DOWN"
    slip_left=next_state(state,left)
    slip_right=next_state(state,right)
    probs[intended]=probs.get(intended,0)+0.8
    probs[slip_left]=probs.get(slip_left,0)+0.1
    probs[slip_right]=probs.get(slip_right,0)+0.1
    return probs

# Step 3: Testing with Discount Factor (Î³)
gamma=0.9

# Example 1: From state (2,0), action="UP"
state=(2,0)
action="UP"
transitions=transition_probabilities(state,action)
print(f"From state {state}, action={action}:")
for next_s,prob in transitions.items():
    print(f" -> {next_s} with P={prob:.2f}, Reward={reward(next_s)}")

# Example 2: From state (0,2), action="RIGHT"
state=(0,2)
action="RIGHT"
transitions=transition_probabilities(state,action)
print(f"\nFrom state {state}, action={action}:")
for next_s,prob in transitions.items():
    print(f" -> {next_s} with P={prob:.2f}, Reward={reward(next_s)}")

examples=[((1,0),"RIGHT"),((2,2),"UP"),((0,0),"LEFT"),((1,1),"DOWN"),((0,3),"LEFT"),((1,3),"UP")]
for state,action in examples:
    transitions=transition_probabilities(state,action)
    print(f"\nFrom state {state}, action={action}:")
    for next_s,prob in transitions.items():
        print(f" -> {next_s} with P={prob:.2f}, Reward={reward(next_s)}")

From state (2, 0), action=UP:
 -> (1, 0) with P=0.80, Reward=-0.04
 -> (2, 0) with P=0.10, Reward=-0.04
 -> (2, 1) with P=0.10, Reward=-0.04

From state (0, 2), action=RIGHT:
 -> (0, 3) with P=0.80, Reward=1.0
 -> (0, 2) with P=0.20, Reward=-0.04

From state (1, 0), action=RIGHT:
 -> (1, 1) with P=0.80, Reward=-0.04
 -> (0, 0) with P=0.10, Reward=-0.04
 -> (2, 0) with P=0.10, Reward=-0.04

From state (2, 2), action=UP:
 -> (2, 2) with P=0.90, Reward=-0.04
 -> (2, 3) with P=0.10, Reward=-0.04

From state (0, 0), action=LEFT:
 -> (0, 0) with P=0.90, Reward=-0.04
 -> (1, 0) with P=0.10, Reward=-0.04

From state (1, 1), action=DOWN:
 -> (2, 1) with P=0.80, Reward=-0.04
 -> (1, 1) with P=0.20, Reward=-0.04

From state (0, 3), action=LEFT:
 -> (0, 3) with P=1.00, Reward=1.0

From state (1, 3), action=UP:
 -> (1, 3) with P=1.00, Reward=-1.0
