<a href="https://colab.research.google.com/github/Daffand/UAS_BigData/blob/main/UAS_BDA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import random

In [None]:
class Environment:
    def __init__(self):
        self.grid_size = 5
        self.goal = (4, 4)
        self.hole = [(1, 1), (3, 3)]
        self.actions = ['up', 'down', 'left', 'right']
        self.state = (0, 0)

    def reset(self):
        self.state = (0, 0)
        return self.state

    def step(self, action):
        x, y = self.state

        if action == 'up':
            next_state = (x - 1, y) if x > 0 else (x, y)
        elif action == 'down':
            next_state = (x + 1, y) if x < self.grid_size - 1 else (x, y)
        elif action == 'left':
            next_state = (x, y - 1) if y > 0 else (x, y)
        elif action == 'right':
            next_state = (x, y + 1) if y < self.grid_size - 1 else (x, y)

        if next_state == self.goal:
            reward = 10
        elif next_state in self.hole:
            reward = -10
        else:
            reward = -1

        self.state = next_state
        return next_state, reward

In [None]:
class QLearningAgent:
    def __init__(self, environment, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):
        self.environment = environment
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.q_table = np.zeros((environment.grid_size, environment.grid_size, len(environment.actions)))

    def choose_action(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(self.environment.actions)
        else:
            action_values = self.q_table[state[0], state[1], :]
            return self.environment.actions[np.argmax(action_values)]

    def learn(self, state, action, next_state, reward):
        current_q = self.q_table[state[0], state[1], self.environment.actions.index(action)]
        max_future_q = np.max(self.q_table[next_state[0], next_state[1], :])
        new_q = current_q + self.learning_rate * (reward + self.discount_factor * max_future_q - current_q)
        self.q_table[state[0], state[1], self.environment.actions.index(action)] = new_q

In [None]:
# Membuat lingkungan dan agen
env = Environment()
agent = QLearningAgent(env)

# Training
num_episodes = 1000

for episode in range(num_episodes):
    state = env.reset()
    done = False

    while not done:
        action = agent.choose_action(state)
        next_state, reward = env.step(action)
        agent.learn(state, action, next_state, reward)
        state = next_state

        if state == env.goal or state in env.hole:
            done = True

# Pengujian
state = env.reset()
path = [state]

while state != env.goal and state not in env.hole:
    action = agent.choose_action(state)
    next_state, _ = env.step(action)
    state = next_state
    path.append(state)

if state == env.goal:
    print("Goal reached!")
else:
    print("Fell into hole.")


Goal reached!


In [None]:
print("Path taken:", path)

Path taken: [(0, 0), (0, 1), (0, 2), (1, 2), (1, 3), (1, 4), (2, 4), (3, 4), (4, 4)]
