In [5]:
pip install numpy matplotlib gym torch

Note: you may need to restart the kernel to use updated packages.


In [95]:
import numpy as np
import matplotlib.pyplot as plt
import random
from collections import deque

import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np
from collections import deque
import torch.nn.functional as F


In [106]:
class RessourceEnv:
    def __init__(self, capacity=100, num_users=10):
        self.total_bandwidth = capacity  # Total available bandwidth
        self.num_users = num_users       # Number of users
        self.requests = []              # User requests

    def reset(self):
        self.requests = [random.randint(0, 20) for _ in range(self.num_users)]  # Generate random requests for each user
        return self.requests

    def step(self, allocation):
        reward = 0
        unused_bandwidth = self.total_bandwidth
        successful_allocations = 0  # Track how many requests were satisfied
        total_requested_bandwidth = sum(self.requests)  # Total bandwidth requested
        
        for i, alloc in enumerate(allocation):
            if alloc >= self.requests[i] and unused_bandwidth >= alloc:
                reward += 1  # Successful allocation
                successful_allocations += 1  # Track successful allocation
                unused_bandwidth -= alloc
        
        # Calculate unused bandwidth ratio
        unused_bandwidth_ratio = unused_bandwidth / self.total_bandwidth
        # Calculate the percentage of successful allocations
        successful_alloc_percentage = (successful_allocations / self.num_users) * 100
        
        done = True  # One-step environment
        
        # Return reward, done, unused bandwidth ratio, and successful allocation percentage
        return reward, done, unused_bandwidth_ratio, successful_alloc_percentage

In [108]:
class DQN(nn.Module):
    def __init__(self, n_observations, n_actions):
        super(DQN, self).__init__()
        self.layer1 = nn.Linear(n_observations, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, n_actions)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        return self.layer3(x)

In [110]:
class DQNAgent:
    def __init__(self, state_dim, action_dim, lr, gamma, epsilon, epsilon_decay, buffer_size):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.lr = lr
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.memory = deque(maxlen=buffer_size)
        self.model = DQN(state_dim, action_dim)
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.action_dim)
        q_values = self.model(torch.tensor(state, dtype=torch.float32))
        return torch.argmax(q_values).item()

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * torch.max(self.model(torch.tensor(next_state, dtype=torch.float32))).item()
            target_f = self.model(torch.tensor(state, dtype=torch.float32)).detach().numpy()
            target_f[action] = target
            self.optimizer.zero_grad()
            loss = nn.MSELoss()(torch.tensor(target_f), self.model(torch.tensor(state, dtype=torch.float32)))
            loss.backward()
            self.optimizer.step()
        if self.epsilon > 0.01:
            self.epsilon *= self.epsilon_decay


In [148]:
capacity = 200
num_users = 40
max_bandwidth = 20  # Maximum allocation per user
state_dim = num_users  # State size = number of users
action_dim = max_bandwidth + 1  # Action space size = 21 (0 to 20 bandwidth units)

# Initialize environment and agent
env = RessourceEnv(capacity=capacity, num_users=num_users)
#agent = DQNAgent(state_dim, action_dim, lr=0.001, gamma=0.99, epsilon=1.0, epsilon_decay=0.995, buffer_size=1000)
agent = DQNAgent(state_dim, action_dim, lr=0.0005, gamma=0.99, epsilon=1.0, epsilon_decay=0.995, buffer_size=10000)

# Run a few episodes to train the agent
num_episodes = 3000
batch_size = 30

total_test_reward = 0
total_successful_allocations = 0
total_unused_bandwidth = 0
total_successful_alloc_percentage = 0

for episode in range(num_episodes):
    state = env.reset()  # Get the initial state
    done = False
    total_reward = 0
    successful_allocations = 0
    unused_bandwidth = 0
    successful_alloc_percentage = 0

    while not done:
        # Choose an action (bandwidth allocation) based on the current state
        action = agent.act(state)
        
        # Allocate bandwidth based on the chosen action (expand action to match num_users)
        allocation = [action] * num_users  # This could be customized per user

        # Take the step in the environment
        reward, done, unused_bandwidth_ratio, successful_alloc_percentage = env.step(allocation)
        total_reward += reward
        successful_allocations += reward
        unused_bandwidth += unused_bandwidth_ratio
        successful_alloc_percentage = successful_alloc_percentage
        
        # Store the experience in memory
        next_state = env.reset()  # Reset to get a new state
        agent.remember(state, action, reward, next_state, done)
        
        # Train the agent (replay the memory)
        agent.replay(batch_size)

        state = next_state  # Update state to the next state

    # Track stats for average
    total_test_reward += total_reward
    total_successful_allocations += successful_allocations
    total_unused_bandwidth += unused_bandwidth
    total_successful_alloc_percentage += successful_alloc_percentage
    
    # Output the progress
    print(f"Episode {episode + 1}: Reward = {total_reward}, "
          f"Successful Allocations = {successful_allocations}, "
          f"Unused Bandwidth Ratio = {unused_bandwidth_ratio:.2f}, "
          f"Successful Allocation Percentage = {successful_alloc_percentage:.2f}%")

# Calculate averages
average_test_reward = total_test_reward / num_episodes
average_successful_allocations = total_successful_allocations / num_episodes
average_used_bandwidth = 1 - total_unused_bandwidth / num_episodes 
average_successful_alloc_percentage = total_successful_alloc_percentage / num_episodes

# Final averages
print(f"\nAverage Test Reward over {num_episodes} episodes: {average_test_reward}")
print(f"Average Successful Allocations: {average_successful_allocations}")
print(f"Average Used Bandwidth Ratio: {average_used_bandwidth:.2f}")
print(f"Average Successful Allocation Percentage: {average_successful_alloc_percentage:.2f}%")


Episode 1: Reward = 6, Successful Allocations = 6, Unused Bandwidth Ratio = 0.94, Successful Allocation Percentage = 15.00%
Episode 2: Reward = 15, Successful Allocations = 15, Unused Bandwidth Ratio = 0.70, Successful Allocation Percentage = 37.50%
Episode 3: Reward = 10, Successful Allocations = 10, Unused Bandwidth Ratio = 0.80, Successful Allocation Percentage = 25.00%
Episode 4: Reward = 13, Successful Allocations = 13, Unused Bandwidth Ratio = 0.03, Successful Allocation Percentage = 32.50%
Episode 5: Reward = 16, Successful Allocations = 16, Unused Bandwidth Ratio = 0.04, Successful Allocation Percentage = 40.00%
Episode 6: Reward = 11, Successful Allocations = 11, Unused Bandwidth Ratio = 0.01, Successful Allocation Percentage = 27.50%
Episode 7: Reward = 20, Successful Allocations = 20, Unused Bandwidth Ratio = 0.00, Successful Allocation Percentage = 50.00%
Episode 8: Reward = 14, Successful Allocations = 14, Unused Bandwidth Ratio = 0.02, Successful Allocation Percentage = 3