In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
!pip install simpy

Collecting simpy
  Downloading simpy-4.1.1-py3-none-any.whl.metadata (6.1 kB)
Downloading simpy-4.1.1-py3-none-any.whl (27 kB)
Installing collected packages: simpy
Successfully installed simpy-4.1.1


In [58]:
import numpy as np
import simpy
import random
import gym
from gym import spaces
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque

In [76]:
class ServerFarm:
    def __init__(self, env, num_servers):
        self.env = env
        self.num_servers = num_servers
        self.servers = simpy.Resource(env, capacity=num_servers)

    def process_request(self, request_id, process_time):
        with self.servers.request() as req:
            yield req
            yield self.env.timeout(process_time)

In [77]:
class ClientServerEnv(gym.Env):
    def __init__(self, max_servers=10, max_requests=100, avg_process_time=5):
        super(ClientServerEnv, self).__init__()
        self.max_servers = max_servers
        self.num_servers = 3  # Initial server count
        self.env = simpy.Environment()
        self.server_farm = ServerFarm(self.env, self.num_servers)
        self.max_requests = max_requests
        self.avg_process_time = avg_process_time
        
        self.action_space = spaces.Discrete(3)  # Actions: Increase, Decrease, Maintain servers
        self.observation_space = spaces.Box(low=0, high=max_servers, shape=(1,), dtype=np.float32)
        
    def step(self, action):
        if action == 0 and self.num_servers > 1:
            self.num_servers -= 1  # Reduce server count
        elif action == 1 and self.num_servers < self.max_servers:
            self.num_servers += 1  # Increase server count
        
        self.server_farm = ServerFarm(self.env, self.num_servers)
        self.env.process(self.generate_requests())
        self.env.run()
        
        # Improved reward function
        reward = -abs(self.num_servers - (self.max_requests // 10))
        if self.num_servers > self.max_servers:
            reward -= 10
        elif self.num_servers < 1:
            reward -= 10
        else:
            reward += 5
        
        state = np.array([self.num_servers], dtype=np.float32)
        done = False
        
        return state, reward, done, {}
    
    def reset(self):
        self.num_servers = 3
        state = np.array([self.num_servers], dtype=np.float32)
        return state
    
    def generate_requests(self):
        for i in range(self.max_requests):
            process_time = random.expovariate(1.0 / self.avg_process_time)
            self.env.process(self.server_farm.process_request(i, process_time))
            yield self.env.timeout(random.expovariate(0.5))

In [78]:
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 24)
        self.fc2 = nn.Linear(24, 24)
        self.fc3 = nn.Linear(24, action_size)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

In [79]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.1  # Ensure continued exploration
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        
        self.model = DQN(state_size, action_size)
        self.target_model = DQN(state_size, action_size)  # Target network
        self.target_model.load_state_dict(self.model.state_dict())
        
        self.update_target_every = 20
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.criterion = nn.MSELoss()
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        if np.random.rand() <= max(self.epsilon, 0.1):  # Ensuring exploration
            return random.randrange(self.action_size)
        state = torch.FloatTensor(state).unsqueeze(0)
        with torch.no_grad():
            action_values = self.model(state)
        return torch.argmax(action_values).item()
    
    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * torch.max(self.target_model(torch.FloatTensor(next_state).unsqueeze(0))).item()
            target_f = self.model(torch.FloatTensor(state).unsqueeze(0))
            target_f[0][action] = target
            self.optimizer.zero_grad()
            loss = self.criterion(target_f, self.model(torch.FloatTensor(state).unsqueeze(0)))
            loss.backward()
            self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    
    def update_target_model(self):
        self.target_model.load_state_dict(self.model.state_dict())

## Training the agent

In [80]:
env = ClientServerEnv()
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)

In [82]:
episodes = 500
for e in range(episodes):
    state = env.reset()
    total_reward = 0
    for time in range(200):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        if done:
            break
    
    for _ in range(5):  # Train 5 times per episode
        agent.replay(32)
    
    if e % agent.update_target_every == 0:
        agent.update_target_model()
    
    print(f"Episode {e+1}/{episodes}, Total Reward: {total_reward}, Epsilon: {agent.epsilon:.2f}")

Episode 1/500, Total Reward: 974, Epsilon: 0.10
Episode 2/500, Total Reward: 958, Epsilon: 0.10
Episode 3/500, Total Reward: 964, Epsilon: 0.10
Episode 4/500, Total Reward: 967, Epsilon: 0.10
Episode 5/500, Total Reward: 945, Epsilon: 0.10
Episode 6/500, Total Reward: 973, Epsilon: 0.10
Episode 7/500, Total Reward: 976, Epsilon: 0.10
Episode 8/500, Total Reward: 962, Epsilon: 0.10
Episode 9/500, Total Reward: 963, Epsilon: 0.10
Episode 10/500, Total Reward: 974, Epsilon: 0.10
Episode 11/500, Total Reward: 971, Epsilon: 0.10
Episode 12/500, Total Reward: 967, Epsilon: 0.10
Episode 13/500, Total Reward: 970, Epsilon: 0.10
Episode 14/500, Total Reward: 965, Epsilon: 0.10
Episode 15/500, Total Reward: 972, Epsilon: 0.10
Episode 16/500, Total Reward: 970, Epsilon: 0.10
Episode 17/500, Total Reward: 941, Epsilon: 0.10
Episode 18/500, Total Reward: 976, Epsilon: 0.10
Episode 19/500, Total Reward: 972, Epsilon: 0.10
Episode 20/500, Total Reward: 963, Epsilon: 0.10
Episode 21/500, Total Reward:

In [83]:
torch.save(agent.model.state_dict(), "dqn_server_optimizer.pth")
print("Model saved successfully.")

Model saved successfully.


In [84]:
# Load the trained model
agent.model.load_state_dict(torch.load("dqn_server_optimizer.pth"))
agent.model.eval()
print("Model loaded successfully.")

# Test the model
state = env.reset()
for _ in range(20):  # Run for 20 test steps
    action = agent.act(state)
    state, reward, _, _ = env.step(action)
    print(f"Action: {action}, Servers: {state}, Reward: {reward}")


Model loaded successfully.
Action: 1, Servers: [4.], Reward: -1
Action: 1, Servers: [5.], Reward: 0
Action: 1, Servers: [6.], Reward: 1
Action: 1, Servers: [7.], Reward: 2
Action: 1, Servers: [8.], Reward: 3
Action: 1, Servers: [9.], Reward: 4
Action: 1, Servers: [10.], Reward: 5
Action: 2, Servers: [10.], Reward: 5
Action: 2, Servers: [10.], Reward: 5
Action: 2, Servers: [10.], Reward: 5
Action: 2, Servers: [10.], Reward: 5
Action: 2, Servers: [10.], Reward: 5
Action: 2, Servers: [10.], Reward: 5
Action: 2, Servers: [10.], Reward: 5
Action: 2, Servers: [10.], Reward: 5
Action: 2, Servers: [10.], Reward: 5
Action: 2, Servers: [10.], Reward: 5
Action: 2, Servers: [10.], Reward: 5
Action: 2, Servers: [10.], Reward: 5
Action: 2, Servers: [10.], Reward: 5


  agent.model.load_state_dict(torch.load("dqn_server_optimizer.pth"))
