In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers

# Define the DQN model
class DQNModel(tf.keras.Model):
    def __init__(self, num_actions):
        super(DQNModel, self).__init__()
        self.dense1 = layers.Dense(32, activation='relu')
        self.dense2 = layers.Dense(32, activation='relu')
        self.output_layer = layers.Dense(num_actions, activation='linear')

    def call(self, state):
        x = self.dense1(state)
        x = self.dense2(x)
        return self.output_layer(x)

# Initialize the DQN model
model = DQNModel(num_actions=2)  # 2 actions: Notify or Not Notify

# Define DQN parameters
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
huber_loss = tf.keras.losses.Huber()

# Manual simulation loop
num_episodes = 1000

for episode in range(num_episodes):
    # Manual input for state values
    noise = float(input("Enter noise level (0-1): "))
    deadline = float(input("Enter deadline (hours): "))
    task_priority = float(input("Enter task priority (0-1): "))
    time_of_day = float(input("Enter time of the day (0-24): "))
    task_duration = float(input("Enter task duration (hours): "))
    device_usage = float(input("Enter device usage (0-1): "))

    # Concatenate state variables
    state = np.array([noise, deadline, task_priority, time_of_day, task_duration, device_usage])

    # Reshape state for model input
    state = np.reshape(state, (1, -1))

    # Get the model's suggested action (0: Notify, 1: Not Notify)
    q_values = model(state)
    suggested_action = np.argmax(q_values.numpy())

    print("Suggested Action:")
    if suggested_action == 0:
        print("Notify")
    else:
        print("Not Notify")

    # Manual input for reward
    reward = float(input("Enter reward for the suggested action: "))

    # Train the model based on the provided reward
    with tf.GradientTape() as tape:
        q_values = model(state)
        q_action = tf.reduce_sum(tf.one_hot(suggested_action, 2) * q_values, axis=1)
        loss = huber_loss(reward, q_action)

    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    print("Received Reward:", reward)

# Optionally, save the model weights
# model.save_weights("dqn_model_weights.h5")

Suggested Action:
Not Notify
Received Reward: 12.0
