In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

##  Client-Server Environment Simulator

In [5]:
import gymnasium as gym

  and should_run_async(code)


In [2]:
class Server:
    def __init__(self, capacity):
        self.capacity = capacity  # Requests per second
        self.active = False  # Whether the server is active

In [3]:
class ClientServerEnv:
    def __init__(self, max_servers=10, demand_pattern='poisson'):
        # Server settings
        self.max_servers = max_servers
        self.servers = [Server(capacity=10) for _ in range(max_servers)]
        self.active_servers = 1  # Start with 1 server
        
        # Client demand
        self.demand_pattern = demand_pattern  # 'poisson' or 'sinusoidal'
        self.queue = []
        
        # RL settings
        self.state_dim = 3  # [active_servers, queue_length, demand]
        self.action_space = [-1, 0, +1]  # Actions: remove, keep, add
        
    def reset(self):
        self.active_servers = 1
        self.queue = []
        return self._get_state()
    
    def _get_state(self):
        # Normalized state: [active_servers, queue_length, demand]
        return np.array([
            self.active_servers / self.max_servers,
            len(self.queue) / 100,  # Assume max queue=100
            self._generate_demand() / 20  # Normalize demand
        ])
    
    def _generate_demand(self):
        if self.demand_pattern == 'poisson':
            return np.random.poisson(lam=10)  # Avg 10 requests/sec
        elif self.demand_pattern == 'sinusoidal':
            return 10 + 10 * np.sin(self.time * 0.1)  # Time-varying
    
    def step(self, action):
        # Update server count (clamp between 1 and max_servers)
        self.active_servers = np.clip(
            self.active_servers + action, 1, self.max_servers
        )
        
        # Generate new requests
        demand = self._generate_demand()
        self.queue.extend([1] * int(demand))
        
        # Process requests
        processed = min(
            len(self.queue),
            self.active_servers * self.servers[0].capacity  # All servers have same capacity
        )
        self.queue = self.queue[processed:]
        
        # Calculate reward
        reward = self._calculate_reward(processed)
        
        # Next state
        next_state = self._get_state()
        done = False
        return next_state, reward, done, {}
    
    def _calculate_reward(self, processed):
        # Penalize under-provisioning (queue length)
        queue_penalty = len(self.queue) * 0.1
        # Penalize over-provisioning (idle servers)
        idle_servers = self.active_servers - (processed / 10)
        idle_penalty = idle_servers * 0.5
        # Reward processed requests
        processed_reward = processed * 1.0
        return processed_reward - queue_penalty - idle_penalty

In [6]:
from stable_baselines3 import PPO
env = ClientServerEnv()
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10_000)

Using cpu device


ValueError: The environment is of type <class '__main__.ClientServerEnv'>, not a Gymnasium environment. In this case, we expect OpenAI Gym to be installed and the environment to be an OpenAI Gym environment.

In [None]:
import matplotlib.pyplot as plt
states = []
rewards = []
obs = env.reset()
for _ in range(1000):
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    states.append(obs)
    rewards.append(reward)
plt.plot(rewards)
plt.xlabel("Time Step")
plt.ylabel("Reward")
plt.show()

In [7]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np

class ServerAllocationEnv(gym.Env):
    metadata = {'render.modes': ['human']}
    
    def __init__(self, max_servers=10, demand_pattern='poisson'):
        super(ServerAllocationEnv, self).__init__()
        
        # Server configuration
        self.max_servers = max_servers
        self.server_capacity = 10  # Requests/sec per server
        self.active_servers = 1
        
        # Demand configuration
        self.demand_pattern = demand_pattern
        self.queue = []
        self.time = 0
        
        # Gymnasium spaces
        self.action_space = spaces.Discrete(3)  # 0=remove, 1=keep, 2=add
        self.observation_space = spaces.Box(
            low=np.array([0, 0, 0], dtype=np.float32),
            high=np.array([1, 1, 1], dtype=np.float32),
            shape=(3,), dtype=np.float32
        )
        
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.active_servers = 1
        self.queue = []
        self.time = 0
        return self._get_state(), {}
    
    def _get_state(self):
        return np.array([
            self.active_servers / self.max_servers,
            len(self.queue) / 100,  # Normalized queue length
            self._generate_demand() / 20  # Normalized demand
        ], dtype=np.float32)
    
    def _generate_demand(self):
        if self.demand_pattern == 'poisson':
            return self.np_random.poisson(lam=10)
        elif self.demand_pattern == 'sinusoidal':
            return 10 + 10 * np.sin(self.time * 0.1)
        return 10
    
    def step(self, action):
        # Convert action to server change (-1, 0, +1)
        server_change = action - 1
        self.active_servers = np.clip(
            self.active_servers + server_change, 1, self.max_servers
        )
        
        # Generate demand and process requests
        demand = self._generate_demand()
        self.queue.extend([1] * int(demand))
        processed = min(len(self.queue), self.active_servers * self.server_capacity)
        self.queue = self.queue[processed:]
        
        # Calculate reward
        reward = self._calculate_reward(processed)
        self.time += 1
        
        # Return Gymnasium step format
        return self._get_state(), reward, False, False, {}
    
    def _calculate_reward(self, processed):
        idle_servers = self.active_servers - (processed / self.server_capacity)
        return (
            processed * 1.0                # Reward for processed requests
            - len(self.queue) * 0.1         # Penalize queue length
            - idle_servers * 0.5            # Penalize idle servers
        )
    
    def render(self, mode='human'):
        print(f"Time: {self.time} | Servers: {self.active_servers} | Queue: {len(self.queue)}")
    
    def close(self):
        pass

# Example usage with Gymnasium
if __name__ == "__main__":
    env = ServerAllocationEnv()
    obs, _ = env.reset()
    
    for _ in range(1000):
        action = env.action_space.sample()  # Random policy
        obs, reward, terminated, truncated, info = env.step(action)
        
        if _ % 100 == 0:
            env.render()
        
        if terminated or truncated:
            obs, _ = env.reset()

Time: 1 | Servers: 2 | Queue: 0
Time: 101 | Servers: 4 | Queue: 0
Time: 201 | Servers: 6 | Queue: 0
Time: 301 | Servers: 4 | Queue: 0
Time: 401 | Servers: 10 | Queue: 0
Time: 501 | Servers: 10 | Queue: 0
Time: 601 | Servers: 10 | Queue: 0
Time: 701 | Servers: 4 | Queue: 0
Time: 801 | Servers: 5 | Queue: 0
Time: 901 | Servers: 8 | Queue: 0


In [8]:
from stable_baselines3 import PPO
env = ServerAllocationEnv()
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10_000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------------------------
| time/              |      |
|    fps             | 1112 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 824         |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.012553623 |
|    clip_fraction        | 0.188       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.00283    |
|    learning_rate        | 0.0003      |
|    loss                 | 6.45e+03    |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0191     |
|    value_loss         

<stable_baselines3.ppo.ppo.PPO at 0x7ab6448b1cc0>