In [1]:
import copy


class KitchenEnv:
    def __init__(self, grid, num_agents=2):
        self.grid = grid
        self.grid_width = len(grid[0])
        self.grid_height = len(grid)
        self.num_agents = num_agents

        # Ajan başlangıç konumları (örnek pozisyonlar)
        self.agent_positions = [[1, 1 + i] for i in range(num_agents)]
        self.agent_dirs = ["move_down"] * num_agents
        self.agent_items = [None for _ in range(num_agents)]

        # Ortam nesneleri (masa üstüne bırakılan her şey)
        self.objects_on_map = {}  # {(x, y): {"type": "Tomato", "state": "Chopped"}, ...}

        # Loglama altyapısı
        self.agent_logs = {f"agent_{i}": [] for i in range(num_agents)}
        self.burger_logs = []
        self.snapshot_logs = []

        # Diğer durumlar
        self.time_step = 0
        self.terminated = False

    def reset(self):
        # Tüm ortamı yeniden başlat
        self.agent_positions = [[1, 1 + i] for i in range(self.num_agents)]
        self.agent_dirs = ["move_down"] * self.num_agents
        self.agent_items = [None for _ in range(self.num_agents)]
        self.objects_on_map = {}

        self.agent_logs = {f"agent_{i}": [] for i in range(self.num_agents)}
        self.burger_logs = []
        self.snapshot_logs = []

        self.time_step = 0
        self.terminated = False

        return [self.get_state(i) for i in range(self.num_agents)]

    def get_state(self, agent_id):
        # 1. Pozisyonu one-hot olarak encode et
        pos = self.agent_positions[agent_id]
        onehot_pos = [0] * (self.grid_width * self.grid_height)
        idx = pos[1] * self.grid_width + pos[0]
        onehot_pos[idx] = 1

        # 2. Elindeki nesne
        item = self.agent_items[agent_id]
        item_types = ["None", "Tomato", "Meat", "Lettuce", "Plate"]
        item_type = item["type"] if item else "None"
        onehot_item = [1 if item_type == t else 0 for t in item_types]

        # 3. (isteğe bağlı) daha sonra çevresel bilgi veya direction eklenebilir

        return onehot_pos + onehot_item

    def step(self, actions):
        rewards = [0.0 for _ in range(self.num_agents)]

        for agent_id, action in enumerate(actions):
            x, y = self.agent_positions[agent_id]
            old_pos = (x, y)

            if action == 0:  # Move Up
                new_pos = (x, y - 1)
            elif action == 1:  # Move Down
                new_pos = (x, y + 1)
            elif action == 2:  # Move Left
                new_pos = (x - 1, y)
            elif action == 3:  # Move Right
                new_pos = (x + 1, y)
            elif action == 4:  # Interact
                rewards[agent_id] += self.handle_interact(agent_id)
                new_pos = (x, y)
            else:
                new_pos = (x, y)  # invalid action

            # Hareket geçerli mi kontrol et
            if (0 <= new_pos[0] < self.grid_width) and (0 <= new_pos[1] < self.grid_height):
                self.agent_positions[agent_id] = list(new_pos)

            # Log ekle
            self.agent_logs[f"agent_{agent_id}"].append({
                "step": self.time_step,
                "position": self.agent_positions[agent_id],
                "action": action,
                "item": copy.deepcopy(self.agent_items[agent_id])
            })

        # Ortam snapshot'ı kaydet
        self.snapshot_logs.append({
            "step": self.time_step,
            "agents": copy.deepcopy(self.agent_positions),
            "items": copy.deepcopy(self.agent_items),
            "objects": copy.deepcopy(self.objects_on_map)
        })

        self.time_step += 1
        return [self.get_state(i) for i in range(self.num_agents)], rewards, self.is_done(), {}


    def handle_interact(self, agent_id):
        x, y = self.agent_positions[agent_id]
        cell = self.grid[y][x]
        item = self.agent_items[agent_id]
        reward = 0.0

        pos_key = (x, y)
        ground_obj = self.objects_on_map.get(pos_key)

        # 🚀 PICKUP from source stations
        if item is None and cell in ["T", "B", "TO", "M", "L", "PL"]:
            new_item = self.generate_item_from_station(cell)
            if new_item:
              self.agent_items[agent_id] = new_item
              self.terminated = True  # ✅ pickup işlemi başarı → episode bitir
              return 1.0  # ✅ yüksek ödül ver

        # 📦 PICKUP from table
        if item is None and ground_obj:
            self.agent_items[agent_id] = ground_obj
            del self.objects_on_map[pos_key]
            return 0.2

        # 🪵 DROP to table
        if item and cell == "T" and pos_key not in self.objects_on_map:
            self.objects_on_map[pos_key] = item
            self.agent_items[agent_id] = None
            return 0.2

        # 🔪 CHOP
        if item and cell == "C":
            if item["type"] in ["Tomato", "Lettuce"] and item["state"] == "Raw":
                item["state"] = "Chopped"
                return 0.5

        # 🔥 COOK
        if item and cell == "P":
            if item["type"] == "Meat" and item["state"] == "Raw":
                item["state"] = "Cooked"
                return 0.7

        # 🗑️ DISCARD
        if item and cell == "X":
            self.agent_items[agent_id] = None
            return -0.2

        # 🍔 MERGE to plate on ground
        if item and ground_obj:
            merged = self.try_merge(item, ground_obj)
            if merged:
                self.objects_on_map[pos_key] = merged
                self.agent_items[agent_id] = None
                return 1.0

        return -0.1  # geçersiz etkileşim cezası


    def generate_item_from_station(self, cell):
        if cell == "B":
            return {"type": "Bread", "state": "Whole"}
        elif cell == "M":
            return {"type": "Meat", "state": "Raw"}
        elif cell == "TO":
            return {"type": "Tomato", "state": "Raw"}
        elif cell == "L":
            return {"type": "Lettuce", "state": "Raw"}
        elif cell == "PL":
            return {"type": "Plate", "state": "Clean", "contents": []}
        return None


    def try_merge(self, item, plate):
        if plate["type"] != "Plate":
            return None
        if "contents" not in plate:
            return None

        # Zaten eklendiyse tekrar eklenmesin
        if item["type"] in plate["contents"]:
            return None

        # Sadece belirli türler birleşebilir
        mergeable = ["Tomato", "Lettuce", "Meat", "Bread"]
        if item["type"] not in mergeable:
            return None

        # Eğer chop/cook durumu uygunsa devam
        if item["type"] in ["Tomato", "Lettuce"] and item["state"] != "Chopped":
            return None
        if item["type"] == "Meat" and item["state"] != "Cooked":
            return None

        # Merge işlemi başarılı → içeriği güncelle
        new_plate = copy.deepcopy(plate)
        new_plate["contents"].append(item["type"])
        new_plate["contents"] = sorted(new_plate["contents"])  # sıralı olsun

        # Yeni tabak tipi hesapla
        new_plate["type"] = self.get_plate_type(new_plate["contents"])
        return new_plate


    def get_plate_type(self, contents):
        if not contents:
            return "plate_clean"
        name = "plate_" + "_".join(c.lower() for c in contents)
        if set(contents) == {"Bread", "Tomato", "Lettuce", "Meat"}:
            return "plate_burger"  # final burger
        return name


    def is_done(self):
      return self.terminated



    def get_episode_log(self, episode_id):
        return {
            episode_id: {
                "agent_logs": self.agent_logs,
                "burger_logs": self.burger_logs,
                "snapshots": [
                    {
                        "step": snap["step"],
                        "agents": snap["agents"],
                        "items": snap["items"],
                        "objects": {
                            f"{x},{y}": v
                            for (x, y), v in snap["objects"].items()
                        }
                    }
                    for snap in self.snapshot_logs
                ]
            }
        }


In [2]:
import numpy as np
import random
from collections import deque
import tensorflow as tf
from tensorflow.keras import models, layers, optimizers

class DQNAgent:
    def __init__(self, state_size, action_size, gamma=0.95, epsilon=1.0, epsilon_min=0.1, epsilon_decay=0.995, lr=0.001):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.model = self._build_model(lr)

    def _build_model(self, lr):
        model = models.Sequential()
        model.add(layers.Dense(64, input_dim=self.state_size, activation='relu'))
        model.add(layers.Dense(64, activation='relu'))
        model.add(layers.Dense(self.action_size, activation='linear'))
        model.compile(optimizer=optimizers.Adam(learning_rate=lr), loss='mse')
        return model

    def select_action(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(np.array([state]), verbose=0)
        return np.argmax(act_values[0])

    def store(self, experience):
        self.memory.append(experience)

    def train(self, batch_size=32):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)

        states, targets = [], []
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(np.array([next_state]), verbose=0)[0])
            target_f = self.model.predict(np.array([state]), verbose=0)[0]
            target_f[action] = target
            states.append(state)
            targets.append(target_f)

        self.model.fit(np.array(states), np.array(targets), epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [4]:
grid = [
    ["T", "B", "T", "T", "T", "T", "C", "P", "M", "T"],
    ["T", ".", ".", ".", ".", ".", ".", ".", ".", "D"],
    ["T", ".", ".", ".", ".", ".", ".", ".", ".", "PL"],
    ["T", ".", ".", ".", ".", ".", ".", ".", ".", "P"],
    ["T", ".", ".", ".", ".", ".", ".", ".", ".", "X"],
    ["T", ".", ".", ".", ".", ".", ".", ".", ".", "T"],
    ["T", "T", "T", "L", "TO", "T", "T", "C", "T", "T"],
]

env = KitchenEnv(grid, num_agents=2)
state_size = len(env.get_state(0))
action_size = 5  # up, down, left, right, interact

agents = [DQNAgent(state_size, action_size) for _ in range(2)]
EPISODES = 3
MAX_STEPS = 30

for ep in range(EPISODES):
    states = env.reset()
    done = False
    total_reward = [0.0, 0.0]

    for step in range(MAX_STEPS):
        actions = [agents[i].select_action(states[i]) for i in range(2)]
        next_states, rewards, done, _ = env.step(actions)

        for i in range(2):
            agents[i].store((states[i], actions[i], rewards[i], next_states[i], done))
            agents[i].train()
            total_reward[i] += rewards[i]

        states = next_states

        if done:
            break

    print(f"Episode {ep+1}/{EPISODES} — Rewards: {total_reward}")

# Replay için logları kaydet
logs = env.get_episode_log(f"episode_{EPISODES}")
with open("episode_logs.json", "w") as f:
    import json
    json.dump(logs, f, indent=2, default=str)



Episode 1/3 — Rewards: [-0.6, -0.4]
Episode 2/3 — Rewards: [-0.4, -0.6]
Episode 3/3 — Rewards: [0.7, 0.0]
