In [None]:
%pip install tensorflow pandas numpy matplotlib

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [3]:
class SchedulingEnv:
    def __init__(self, timetable, productive_hours):
        self.timetable = timetable
        self.productive_hours = productive_hours
        self.state = self.reset()

    def reset(self):
        self.state = np.zeros(len(self.timetable))
        return self.state

    def step(self, action):
        reward = 0
        done = False
        if self.timetable[action] == 0:
            self.state[action] = 1
            reward = 1 if action in self.productive_hours else -1
        else:
            reward = -1
        if np.sum(self.state) == len(self.timetable):
            done = True
        return self.state, reward, done, {}

    def render(self):
        print(self.state)


In [4]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = []
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = np.random.choice(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [6]:
def replay(self, batch_size):
    minibatch = np.array(self.memory)
    indices = np.random.choice(len(minibatch), batch_size, replace=False)
    minibatch = minibatch[indices]
    for state, action, reward, next_state, done in minibatch:
        target = reward
        if not done:
            target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        target_f = self.model.predict(state)
        target_f[0][action] = target
        self.model.fit(state, target_f, epochs=1, verbose=0)
    if self.epsilon > self.epsilon_min:
        self.epsilon *= self.epsilon_decay


In [None]:
state = env.reset()
state = np.reshape(state, [1, state_size])
for time in range(24):
    action = agent.act(state)
    next_state, reward, done, _ = env.step(action)
    next_state = np.reshape(next_state, [1, state_size])
    state = next_state
    env.render()
    if done:
        break


In [None]:
# Initialize variables to store scheduling counts
time_slots = 24
scheduling_counts = np.zeros(time_slots)

# Run multiple episodes to gather data
episodes = 1000
for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    for time in range(time_slots):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])
        state = next_state
        scheduling_counts[action] += 1
        if done:
            break

# Calculate probabilities
probabilities = scheduling_counts / episodes

# Ensure probabilities are between 0 and 1
probabilities = np.clip(probabilities, 0, 1)

# Print probabilities with time ranges
for hour, prob in enumerate(probabilities):
    print(f"Hour {hour:02d}:00 - {hour:02d}:59: {prob:.2f}")

# Plot probabilities
plt.figure(figsize=(10, 6))
plt.bar(range(time_slots), probabilities, color='skyblue')
plt.xlabel('Hour of the Day')
plt.ylabel('Probability of Being Productive')
plt.title('Probability of Being Productive at Each Hour')
plt.xticks(range(time_slots))
plt.show()


In [None]:
# Print time slots with a probability of 1.00
for hour, prob in enumerate(probabilities):
    if prob == 1.00:
        print(f"Hour {hour:02d}:00 - {hour:02d}:59: {prob:.2f}")
