In [1]:
import numpy as np

# Matrix Operations
You perform various matrix operations on a 5x5 matrix

In [8]:
grid_size = 5
goal_state = (4, 4) 

In [9]:
# Extract columns
column_1 = matrix[:, 0]  # First column
column_2 = matrix[:, 1]  # Second column
column_3 = matrix[:, 2]  # Third column
column_4 = matrix[:, 3]  # Fourth column
column_5 = matrix[:, 4]  # Fifth column

# Print the columns
print("\nColumns of the matrix:")
print("Column 1:", column_1)
print("Column 2:", column_2)
print("Column 3:", column_3)
print("Column 4:", column_4)
print("Column 5:", column_5)


Columns of the matrix:
Column 1: [ 1  6 11 16 21]
Column 2: [ 2  7 12 17 22]
Column 3: [ 3  8 13 18 23]
Column 4: [ 4  9 14 19 24]
Column 5: [ 5 10 15 20 25]


In [10]:
# Matrix addition
matrix_add = matrix + 5
print("Matrix after adding 5 to each element:")
print(matrix_add)

# Matrix multiplication (element-wise)
matrix_mult = matrix * 2
print("Matrix after multiplying each element by 2:")
print(matrix_mult)

# Matrix transpose
matrix_transpose = matrix.T
print("Transposed Matrix:")
print(matrix_transpose)


Matrix after adding 5 to each element:
[[ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]
 [21 22 23 24 25]
 [26 27 28 29 30]]
Matrix after multiplying each element by 2:
[[ 2  4  6  8 10]
 [12 14 16 18 20]
 [22 24 26 28 30]
 [32 34 36 38 40]
 [42 44 46 48 50]]
Transposed Matrix:
[[ 1  6 11 16 21]
 [ 2  7 12 17 22]
 [ 3  8 13 18 23]
 [ 4  9 14 19 24]
 [ 5 10 15 20 25]]


In [11]:
# Reshape the matrix to 1x25
matrix_reshaped = matrix.reshape(1, 25)
print("Matrix reshaped to 1x25:")
print(matrix_reshaped)

# Reshape the matrix to 25x1
matrix_reshaped_25x1 = matrix.reshape(25, 1)
print("Matrix reshaped to 25x1:")
print(matrix_reshaped_25x1)


Matrix reshaped to 1x25:
[[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
  25]]
Matrix reshaped to 25x1:
[[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]
 [13]
 [14]
 [15]
 [16]
 [17]
 [18]
 [19]
 [20]
 [21]
 [22]
 [23]
 [24]
 [25]]


In [12]:
# Flatten the matrix
matrix_flattened = matrix.flatten()
print("Flattened matrix:")
print(matrix_flattened)


Flattened matrix:
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25]


# Markov Decision Process (MDP) Simulation
This section simulates an MDP, where the agent moves through a grid.

In [14]:
#Define states as positions in the grid (5x5)
states = [(i, j) for i in range(grid_size) for j in range(grid_size)]

In [15]:

# Define actions
actions = ['up', 'down', 'left', 'right']

In [16]:
# Define the rewards for each state-action pair
rewards = {}
for state in states:
    if state == goal_state:
        rewards[state] = 10  # Reward for reaching the goal
    else:
        rewards[state] = -1  # Penalty for every move


In [17]:
# Define the transition dynamics
def next_state(state, action):
    i, j = state
    if action == 'up':
        return (max(i-1, 0), j)  # Stay within bounds
    elif action == 'down':
        return (min(i+1, grid_size-1), j)  # Stay within bounds
    elif action == 'left':
        return (i, max(j-1, 0))  # Stay within bounds
    elif action == 'right':
        return (i, min(j+1, grid_size-1))  # Stay within bounds

In [18]:
# Define the transition probabilities (deterministic)
def transition_probability(state, action, next_state_):
    if next_state(state, action) == next_state_:
        return 1.0  # Deterministic transition
    else:
        return 0.0

In [19]:
# Define the MDP model
class MDP:
    def __init__(self, states, actions, rewards, goal_state):
        self.states = states
        self.actions = actions
        self.rewards = rewards
        self.goal_state = goal_state
        
    def step(self, state, action):
        # Get the next state
        next_state_ = next_state(state, action)
        
        # Get the reward for the next state
        reward = self.rewards[next_state_]
         # Return next state, reward
        return next_state_, reward


In [20]:
# Create an MDP instance
mdp = MDP(states, actions, rewards, goal_state)


In [21]:
# Define a policy (random for simplicity)
def random_policy(state):
    return np.random.choice(actions)

In [22]:
# Simulate the agent's journey through the grid
def simulate_mdp(mdp, start_state, policy, max_steps=10):
    state = start_state
    total_reward = 0
    print(f"Starting state: {state}")
    
    for step in range(max_steps):
        if state == mdp.goal_state:
            print(f"Reached goal state {mdp.goal_state} at step {step}")
            break
        
        action = policy(state)
        next_state_, reward = mdp.step(state, action)
        total_reward += reward
        
        print(f"Step {step + 1}: State: {state}, Action: {action}, Next State: {next_state_}, Reward: {reward}")
        state = next_state_
    
    print(f"Total reward: {total_reward}")

In [31]:
# Start the simulation from the top-left corner (0, 0)
simulate_mdp(mdp, start_state=(0, 0), policy=random_policy)



Starting state: (0, 0)
Step 1: State: (0, 0), Action: left, Next State: (0, 0), Reward: -1
Step 2: State: (0, 0), Action: up, Next State: (0, 0), Reward: -1
Step 3: State: (0, 0), Action: left, Next State: (0, 0), Reward: -1
Step 4: State: (0, 0), Action: right, Next State: (0, 1), Reward: -1
Step 5: State: (0, 1), Action: up, Next State: (0, 1), Reward: -1
Step 6: State: (0, 1), Action: down, Next State: (1, 1), Reward: -1
Step 7: State: (1, 1), Action: right, Next State: (1, 2), Reward: -1
Step 8: State: (1, 2), Action: right, Next State: (1, 3), Reward: -1
Step 9: State: (1, 3), Action: right, Next State: (1, 4), Reward: -1
Step 10: State: (1, 4), Action: right, Next State: (1, 4), Reward: -1
Total reward: -10


In [26]:
simulate_mdp(mdp, start_state=(0, 0), policy=random_policy)

Starting state: (0, 0)
Step 1: State: (0, 0), Action: up, Next State: (0, 0), Reward: -1
Step 2: State: (0, 0), Action: down, Next State: (1, 0), Reward: -1
Step 3: State: (1, 0), Action: right, Next State: (1, 1), Reward: -1
Step 4: State: (1, 1), Action: right, Next State: (1, 2), Reward: -1
Step 5: State: (1, 2), Action: down, Next State: (2, 2), Reward: -1
Step 6: State: (2, 2), Action: left, Next State: (2, 1), Reward: -1
Step 7: State: (2, 1), Action: left, Next State: (2, 0), Reward: -1
Step 8: State: (2, 0), Action: down, Next State: (3, 0), Reward: -1
Step 9: State: (3, 0), Action: left, Next State: (3, 0), Reward: -1
Step 10: State: (3, 0), Action: left, Next State: (3, 0), Reward: -1
Total reward: -10


In [27]:
simulate_mdp(mdp, start_state=(0, 0), policy=random_policy)

Starting state: (0, 0)
Step 1: State: (0, 0), Action: up, Next State: (0, 0), Reward: -1
Step 2: State: (0, 0), Action: right, Next State: (0, 1), Reward: -1
Step 3: State: (0, 1), Action: right, Next State: (0, 2), Reward: -1
Step 4: State: (0, 2), Action: left, Next State: (0, 1), Reward: -1
Step 5: State: (0, 1), Action: up, Next State: (0, 1), Reward: -1
Step 6: State: (0, 1), Action: right, Next State: (0, 2), Reward: -1
Step 7: State: (0, 2), Action: down, Next State: (1, 2), Reward: -1
Step 8: State: (1, 2), Action: down, Next State: (2, 2), Reward: -1
Step 9: State: (2, 2), Action: right, Next State: (2, 3), Reward: -1
Step 10: State: (2, 3), Action: right, Next State: (2, 4), Reward: -1
Total reward: -10
