#**FrozenLakeDQN**

**Description:**  
FrozenLakeDQN is a reinforcement learning project that uses **Deep Q-Networks (DQN)** to train an agent to solve the **FrozenLake-v1** environment. The agent learns to navigate a slippery 4x4 gridworld, avoiding holes and reaching the goal.  

**Technologies Used:**  
- **TensorFlow** – For building and training the neural network.  
- **NumPy** – For handling data and state representations.  
- **Gymnasium** – For environment simulation and interaction.  

**How It Works:**  
1. The agent uses **one-hot encoding** to represent states.  
2. A **neural network** predicts Q-values for actions.  
3. The agent **explores** (random actions) and **exploits** (best actions) to improve over time.  
4. **Experience replay** and a **target network** stabilize training.  
5. The model learns using **reward-based feedback** to maximize long-term success.



In [None]:
import numpy as np
import tensorflow as tf
import gymnasium as gym
import matplotlib.pyplot as plt
import time
from IPython.display import clear_output

# تحميل البيئة
env = gym.make('FrozenLake-v1', render_mode=None, is_slippery=True)
num_states = env.observation_space.n
num_actions = env.action_space.n


In [None]:
# إنشاء النموذج
def build_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(num_states,)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(num_actions)  # Q-values لكل أكشن
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')
    return model

In [None]:
# تحويل الحالة إلى One-hot
def one_hot_state(state):
    state_one_hot = np.zeros(num_states)
    state_one_hot[state] = 1
    return state_one_hot.reshape(1, -1)

# اختيار الأكشن بناءً على السياسة ε-greedy
def select_action(model, state, epsilon):
    if np.random.rand() <= epsilon:
        return env.action_space.sample()  # اختيار عشوائي
    q_values = model.predict(state, verbose=0)
    return np.argmax(q_values[0])


In [None]:
# تحديث Q-values باستخدام تجربة إعادة التشغيل
def update_q_values(model, target_model, batch, gamma=0.99):
    states, actions, rewards, next_states, dones = zip(*batch)

    states = np.array([s[0] for s in states])
    next_states = np.array([s[0] for s in next_states])

    q_values = model.predict(states, verbose=0)
    q_next = target_model.predict(next_states, verbose=0)

    for i in range(len(batch)):
        if dones[i]:
            q_values[i][actions[i]] = rewards[i]
        else:
            q_values[i][actions[i]] = rewards[i] + gamma * np.max(q_next[i])

    model.train_on_batch(states, q_values)


In [None]:
# دالة التدريب
def train_agent(num_episodes=1000, target_update_freq=10, batch_size=32):
    model = build_model()
    target_model = build_model()
    target_model.set_weights(model.get_weights())

    epsilon = 1.0
    epsilon_min = 0.01
    epsilon_decay = 0.995
    memory = []
    max_memory_size = 2000

    rewards_list = []

    for episode in range(num_episodes):
        state = env.reset()[0]
        state = one_hot_state(state)
        done = False
        total_reward = 0

        while not done:
            action = select_action(model, state, epsilon)
            next_state, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            next_state = one_hot_state(next_state)

            if done and not terminated:
                reward = -1  # إذا لم يصل للنهاية بعد عدد خطوات معين

            memory.append((state, action, reward, next_state, done))
            if len(memory) > max_memory_size:
                memory.pop(0)

            state = next_state
            total_reward += reward

            if len(memory) >= batch_size:
                batch_indices = np.random.choice(len(memory), batch_size, replace=False)
                batch = [memory[i] for i in batch_indices]
                update_q_values(model, target_model, batch)

        rewards_list.append(total_reward)

        if episode % target_update_freq == 0:
            target_model.set_weights(model.get_weights())

        if epsilon > epsilon_min:
            epsilon *= epsilon_decay

        if episode % 50 == 0:
            clear_output(wait=True)
            print(f"Episode {episode}, Reward: {total_reward}, Epsilon: {epsilon:.4f}")

    return model

In [None]:
# تدريب الوكيل
trained_model = train_agent(num_episodes=500)

# حفظ الموديل
trained_model.save("/content/drive/MyDrive/Hub/Semester Winter 2025/PROJ-DRL/frozen_lake_dqn.h5")
print("Model saved successfully.")

Episode 450, Reward: 0.0, Epsilon: 0.1043




Model saved successfully.


# **Evolution**  Cont...

In [None]:
model_path = "/content/drive/MyDrive/Hub/Semester Winter 2025/PROJ-DRL/frozen_lake_dqn.h5"
loaded_model = tf.keras.models.load_model(model_path, custom_objects={"mse": tf.keras.losses.MeanSquaredError()})



In [None]:
def evaluate_agent(agent, env, num_episodes=100):
    success_count = 0
    total_steps = 0

    for episode in range(num_episodes):
        state = env.reset()[0]
        state = one_hot_state(state)
        done = False
        steps = 0

        while not done:
            q_values = agent.predict(state, verbose=0)
            action = np.argmax(q_values[0])
            next_state, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            state = one_hot_state(next_state)
            steps += 1

            if terminated and reward > 0:
                success_count += 1

        total_steps += steps

    success_rate = (success_count / num_episodes) * 100
    avg_steps = total_steps / num_episodes

    print(f"Success Rate: {success_rate:.2f}%")
    print(f"Average Steps to Goal: {avg_steps:.2f}")
    return success_rate, avg_steps

# Run evaluation
evaluate_agent(loaded_model, env)


Success Rate: 52.00%
Average Steps to Goal: 30.78


(52.0, 30.78)

In [None]:
loaded_model.summary()


# **License**

This code is licensed under the MIT License.

Author: Karman.

Some Code Generated with AI.
