<a href="https://colab.research.google.com/github/EziamaUgonna/DWT_BDL-and-DWT-DDQN/blob/master/DWT_DDQN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pywt
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.optimizers import Adam
from collections import deque
import random
import pandas as pd
import matplotlib.pyplot as plt

# Hyperparameters
GAMMA = 0.99  # Discount factor
LEARNING_RATE = 0.0005
BATCH_SIZE = 64
MEMORY_SIZE = 100000
TAU = 0.125  # For soft target updates
EPSILON_DECAY = 0.995  # Epsilon decay for epsilon-greedy policy
EPSILON_MIN = 0.01
EPSILON_START = 1.0
EPISODES = 500
MAX_STEPS = 200  # Maximum steps per episode

# Load data
df1 = pd.read_csv('/content/drive/My Drive/Sensor1_values_bias_0_1_dur_10_dep.csv').astype(np.float32)
df2 = pd.read_csv('/content/drive/My Drive/Ground_truth_bias_0_1_dur_10_sensor1_dep.csv')

# Assuming df2 contains the ground truth (binary classification, 0 or 1)
y = df2['ground_truth'].values  # Replace with the correct column name for ground truth
X = df1.values  # Sensor readings as feature matrix

# Discrete Wavelet Transform for feature extraction
def perform_dwt(data, wavelet='db4', level=3):
    """
    Apply Discrete Wavelet Transform (DWT) to the input data.
    :param data: 1D signal array.
    :param wavelet: Wavelet type (default: 'db4').
    :param level: Decomposition level (default: 3).
    :return: Concatenated wavelet coefficients as features.
    """
    coeffs = pywt.wavedec(data, wavelet, level=level)
    return np.hstack(coeffs)  # Flatten coefficients for input to NN

# Apply DWT to each row in the feature matrix X
X_dwt = np.array([perform_dwt(x) for x in X])

# Deep Q-Network (DQN) Model
class DQN(Model):
    def __init__(self, input_shape, action_space):
        super(DQN, self).__init__()
        self.dense1 = layers.Dense(256, activation='relu', input_shape=input_shape)
        self.dense2 = layers.Dense(128, activation='relu')
        self.dense3 = layers.Dense(64, activation='relu')
        self.q_values = layers.Dense(action_space, activation='linear')  # Output Q-values for each action

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.dense2(x)
        x = self.dense3(x)
        return self.q_values(x)

# Double DQN Agent
class DDQNAgent:
    def __init__(self, state_shape, action_space):
        self.state_shape = state_shape
        self.action_space = action_space
        self.epsilon = EPSILON_START
        self.memory = deque(maxlen=MEMORY_SIZE)

        # Initialize the DQN models
        self.model = DQN(self.state_shape, self.action_space)
        self.target_model = DQN(self.state_shape, self.action_space)
        self.target_model.set_weights(self.model.get_weights())  # Initialize target model with the same weights

        # Optimizer and loss function
        self.optimizer = Adam(learning_rate=LEARNING_RATE)
        self.loss_fn = tf.keras.losses.Huber()

    def update_target_model(self):
        # Soft update of target model (using tau)
        target_weights = self.target_model.get_weights()
        model_weights = self.model.get_weights()
        new_weights = [TAU * model_weight + (1 - TAU) * target_weight
                       for model_weight, target_weight in zip(model_weights, target_weights)]
        self.target_model.set_weights(new_weights)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.choice(range(self.action_space))  # Explore
        state = np.expand_dims(state, axis=0)
        q_values = self.model(state)
        return np.argmax(q_values[0].numpy())  # Exploit

    def replay(self):
        if len(self.memory) < BATCH_SIZE:
            return

        # Sample a batch of experiences
        batch = random.sample(self.memory, BATCH_SIZE)

        states = np.array([x[0] for x in batch])
        actions = np.array([x[1] for x in batch])
        rewards = np.array([x[2] for x in batch])
        next_states = np.array([x[3] for x in batch])
        dones = np.array([x[4] for x in batch])

        # Q-value updates (Double DQN)
        q_values_next = self.target_model(next_states)
        q_values_online = self.model(next_states)

        target_q_values = self.model(states)
        for i in range(BATCH_SIZE):
            if dones[i]:
                target_q_values[i][actions[i]] = rewards[i]
            else:
                next_action = np.argmax(q_values_online[i])  # Greedy action from the online network
                target_q_values[i][actions[i]] = rewards[i] + GAMMA * q_values_next[i][next_action]

        # Compute loss and update weights
        with tf.GradientTape() as tape:
            q_values_pred = self.model(states)
            loss = self.loss_fn(target_q_values, q_values_pred)

        gradients = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))

        # Update epsilon for epsilon-greedy policy
        if self.epsilon > EPSILON_MIN:
            self.epsilon *= EPSILON_DECAY

# Environment (Dummy)
class DummyEnvironment:
    def __init__(self, state_size, action_space):
        self.state_size = state_size
        self.action_space = action_space

    def reset(self):
        return np.zeros(self.state_size)

    def step(self, action):
        next_state = np.random.random(self.state_size)  # Random next state for example
        reward = np.random.randn()  # Random reward
        done = np.random.rand() > 0.95  # Random done condition
        return next_state, reward, done

# Training Loop
def train_ddqn():
    state_size = X_dwt.shape[1]  # Number of DWT features per sensor reading
    action_space = 4  # Number of possible actions
    agent = DDQNAgent(state_shape=(state_size,), action_space=action_space)
    env = DummyEnvironment(state_size=state_size, action_space=action_space)

    episode_rewards = []

    for episode in range(EPISODES):
        state = env.reset()
        total_reward = 0

        for step in range(MAX_STEPS):
            action = agent.act(state)
            next_state, reward, done = env.step(action)

            # Store in memory
            agent.remember(state, action, reward, next_state, done)

            # Train the agent on a batch from memory
            agent.replay()

            state = next_state
            total_reward += reward

            if done:
                break

        # Update target model
        agent.update_target_model()

        episode_rewards.append(total_reward)
        print(f"Episode {episode+1}/{EPISODES}, Total Reward: {total_reward}, Epsilon: {agent.epsilon}")

        if episode % 50 == 0:
            # Plotting episode rewards every 50 episodes
            plt.plot(episode_rewards)
            plt.title('Episode Rewards Over Time')
            plt.show()

# Main execution
if __name__ == "__main__":
    train_ddqn()
