In [1]:
import numpy as np
import random
from collections import deque
import tensorflow as tf
from tensorflow.keras import layers

In [None]:
class SlicingEnv:
    def __init__(self, demand_data, slice_types, total_resources=100):
        self.demand_data = demand_data
        self.slice_types = slice_types
        self.total_resources = total_resources
        self.current_step = 0
        self.total_steps = len(demand_data)
        self.state_size = 2  # [loadInt, sliceTypeId]
        self.action_size = 11  # Discrete allocation: 0%, 10%, ..., 100%

    def reset(self):
        self.current_step = 0
        return self._get_state()

    def _get_state(self):
        load = self.demand_data[self.current_step]
        slice_type = self.slice_types[self.current_step]
        slice_id = {"emBB": 0, "URLLC": 1, "mMTC": 2}[slice_type]
        return np.array([load, slice_id], dtype=np.float32)

    def calculate_reward(self, action):
        load = self.demand_data[self.current_step]
        alloc_percent = action * 10  # 0%, 10%, ..., 100%
        alloc = alloc_percent / 100.0 * self.total_resources

        # Simple reward logic: penalize under/overallocation
        if alloc >= load:
            return 1 - (alloc - load) / self.total_resources
        else:
            return -1 * (load - alloc) / self.total_resources

    def step(self, action):
        reward = self.calculate_reward(action)
        self.current_step += 1
        done = self.current_step >= self.total_steps
        next_state = self._get_state() if not done else np.array([0.0, 0], dtype=np.float32)
        return next_state, reward, done

In [16]:

# DQN Agent
class DQNAgent:
    def __init__(self, state_size, action_size, learning_rate=0.001, gamma=0.95, epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.learning_rate = learning_rate

        self.model = self.build_model()
        self.memory = []

    def build_model(self):
        model = tf.keras.Sequential([
            layers.Dense(64, input_shape=(self.state_size,), activation='relu'),
            layers.Dense(64, activation='relu'),
            layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate),
                      loss='mse')
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
        if len(self.memory) > 10000:
            self.memory.pop(0)

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state[np.newaxis, :], verbose=0)
        return np.argmax(q_values[0])

    def replay(self, batch_size=32):
        minibatch = random.sample(self.memory, min(len(self.memory), batch_size))
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                next_q = self.model.predict(next_state[np.newaxis, :], verbose=0)[0]
                target += self.gamma * np.amax(next_q)
            target_f = self.model.predict(state[np.newaxis, :], verbose=0)
            target_f[0][action] = target
            self.model.fit(state[np.newaxis, :], target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [17]:
demand_data = np.random.uniform(0.1, 1.0, 500)  # loadInt values
slice_types = np.random.choice(['emBB', 'URLLC', 'mMTC'], size=500)  # slice types

# Initialize environment and agent
env = SlicingEnv(demand_data, slice_types)
agent = DQNAgent(state_size=2, action_size=11)

# Train the agent
EPISODES = 100
for episode in range(EPISODES):
    print(f"Episode {episode + 1}/{EPISODES}")
    state = env.reset()
    total_reward = 0
    done = False

    while not done:
        action = agent.act(state)
        next_state, reward, done = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        agent.replay(batch_size=32)
        state = next_state
        total_reward += reward

    print(f"Total Episode Reward: {total_reward:.4f}, Epsilon: {agent.epsilon:.4f}")

Episode 1/100


KeyboardInterrupt: 