In [None]:
%pip install tensorflow numpy pandas matplotlib

In [19]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from collections import deque
import random


In [20]:
class SchedulingEnv:
    def __init__(self):
        self.time_blocks = 24  # 24 hours in a day
        self.productivity = np.zeros(self.time_blocks)  # Initialize productivity levels
        self.current_time = 0  # Start from hour 0
        
    def reset(self):
        self.productivity = np.zeros(self.time_blocks)
        self.current_time = 0
        return self.current_time
    
    def step(self, action):
        done = False
        reward = 0
        # If action is '1', the user is working during that time
        if action == 1:
            self.productivity[self.current_time] = 1  # Productive hour
            reward = 1
        else:
            self.productivity[self.current_time] = 0  # Non-productive hour
        
        self.current_time += 1
        
        # If we have reached the end of the day
        if self.current_time >= self.time_blocks:
            done = True
        
        return self.current_time, reward, done

env = SchedulingEnv()


In [None]:
class DQNAgent:
    def __init__(self):
        self.state_size = 1  # Current time block
        self.action_size = 2  # Either study/work (1) or rest (0)
        self.memory = deque(maxlen=2000)  # Replay memory
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self.build_model()

    def build_model(self):
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(tf.keras.layers.Dense(24, activation='relu'))
        model.add(tf.keras.layers.Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate))
        return model
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])
    
    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

agent = DQNAgent()


In [None]:
episodes = 14  # Simulating over 2 weeks
batch_size = 32

for e in range(episodes):
    state = np.reshape(env.reset(), [1, 1])
    for time_block in range(env.time_blocks):
        action = agent.act(state)
        next_state, reward, done = env.step(action)
        next_state = np.reshape(next_state, [1, 1])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            print(f"Day {e+1}/{episodes} completed with productivity: {sum(env.productivity)}")
            break
    if len(agent.memory) > batch_size:
        agent.replay(batch_size)


In [None]:
plt.plot(env.productivity, label='Productivity')
plt.title('Productive Hours After 2 Weeks')
plt.xlabel('Hour of the Day')
plt.ylabel('Productivity')
plt.legend()
plt.show()


In [24]:
class RealWorldSchedulingEnv:
    def __init__(self):
        self.time_blocks = 24  # 24 hours in a day
        self.productivity = np.zeros(self.time_blocks)  # Track productivity in each time block
        self.current_time = 0  # Start at time block 0
    
    def reset(self):
        self.productivity = np.zeros(self.time_blocks)
        self.current_time = 0
        return self.current_time
    
    def step(self, action, user_input):
        """
        Takes the agent's action (whether to schedule work) and user input (whether they were productive).
        """
        done = False
        reward = 0

        # Get user input: 1 means productive, 0 means non-productive (manual feedback)
        # The action suggests whether the agent thought this time would be productive.
        if user_input == 1:
            # User was productive in this block
            self.productivity[self.current_time] = 1
            reward = 1  # Reward agent for correct prediction
        else:
            # User was not productive in this block
            self.productivity[self.current_time] = 0
            reward = 0  # No reward for incorrect prediction

        self.current_time += 1
        
        if self.current_time >= self.time_blocks:
            done = True
        
        return self.current_time, reward, done


In [25]:
class RealWorldDQNAgent:
    def __init__(self):
        self.state_size = 1  # Current time block
        self.action_size = 2  # Study/work (1) or rest (0)
        self.memory = deque(maxlen=2000)  # Replay memory
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self.build_model()

    def build_model(self):
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(tf.keras.layers.Dense(24, activation='relu'))
        model.add(tf.keras.layers.Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=self.learning_rate))
        return model
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        # Choose action: Exploration vs. Exploitation
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)  # Explore
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # Exploit
    
    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [None]:
episodes = 14  # Training over 14 days (2 weeks)
batch_size = 32

for e in range(episodes):
    try:
        state = np.array([env.reset()]).reshape(1, 1)  # Reset environment and reshape state
        print(f"\nDay {e+1}/{episodes}")
        
        for time_block in range(env.time_blocks):
            try:
                action = agent.act(state)  # Suggest a schedule (work or rest)
                
                # User input: after each day, the user provides feedback for each hour
                print(f"Suggested work for hour {time_block}: {'Work' if action == 1 else 'Rest'}")
                
                # Input loop to handle invalid or empty input
                while True:
                    try:
                        user_input_str = str(input(f"Was hour {time_block} productive? (1 for yes, 0 for no): "))
                        
                        # Handle empty input (user pressed enter)
                        if user_input_str.strip() == "":
                            raise ValueError("Input cannot be empty. Please enter 1 for yes or 0 for no.")
                        
                        user_input = int(user_input_str)  # Convert input to integer
                        
                        # Validate if the input is either 0 or 1
                        if user_input not in [0, 1]:
                            raise ValueError("Invalid input. Please enter 1 for yes or 0 for no.")
                        
                        # Exit the loop if the input is valid
                        break
                    except ValueError as ve:
                        print(f"Error: {ve}")
            
                # Process the feedback and update the environment
                next_time_block, reward, done = env.step(action, user_input)  # Feedback loop
                next_state = np.array([next_time_block]).reshape(1, 1)  # Reshape next state
                agent.remember(state, action, reward, next_state, done)  # Store experience
                state = next_state
                
                if done:
                    print(f"Day {e+1} completed with total productivity: {sum(env.productivity)} hours.")
                    break
            
            except Exception as ex:
                print(f"Error during processing time block {time_block}: {ex}")
    
    except Exception as e:
        print(f"Error on Day {e+1}/{episodes}: {e}")
    
    # Replay experience if there are enough samples
    if len(agent.memory) > batch_size:
        try:
            agent.replay(batch_size)  # Train the agent from the replay buffer
        except Exception as ex:
            print(f"Error during agent replay: {ex}")

In [None]:
plt.plot(env.productivity, label='Productivity')
plt.title('Productive Hours Over Time')
plt.xlabel('Hour of the Day')
plt.ylabel('Productivity (1 = productive, 0 = non-productive)')
plt.legend()
plt.show()
