<a href="https://colab.research.google.com/github/OneFineStarstuff/State-of-the-Art/blob/main/Reinforcement_Learning_(RL).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

class QLearningAgent:
    def __init__(self, state_size, action_size, learning_rate=0.1, discount_factor=0.99, exploration_rate=1.0, exploration_decay=0.995):
        self.state_size = state_size
        self.action_size = action_size
        self.q_table = np.zeros((state_size, action_size))
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay = exploration_decay

    def choose_action(self, state):
        if np.random.rand() < self.exploration_rate:
            return np.random.choice(self.action_size)
        return np.argmax(self.q_table[state])

    def update_q_value(self, state, action, reward, next_state):
        best_next_action = np.argmax(self.q_table[next_state])
        td_target = reward + self.discount_factor * self.q_table[next_state, best_next_action]
        td_error = td_target - self.q_table[state, action]
        self.q_table[state, action] += self.learning_rate * td_error
        self.exploration_rate *= self.exploration_decay

# Example environment: Simple grid world with 5 states and 2 actions
env_states = 5
env_actions = 2
agent = QLearningAgent(state_size=env_states, action_size=env_actions)

for episode in range(1000):
    state = np.random.randint(0, env_states)
    for step in range(100):
        action = agent.choose_action(state)
        next_state = (state + 1) % env_states  # Simple transition
        reward = 1 if next_state == 4 else 0
        agent.update_q_value(state, action, reward, next_state)
        state = next_state

print("Q-table after training:")
print(agent.q_table)