In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import numpy as np
from collections import namedtuple, deque

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import causallearn

from causallearn.utils.GraphUtils import GraphUtils
import matplotlib.image as mpimg
import io
import os
import matplotlib.cm as cm
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from matplotlib.colors import Normalize
import matplotlib as mp

In [3]:
# Define the DDQN network
class Net(nn.Module):
    def __init__(self, state_size, action_size, hidden_size=64):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(state_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, action_size)

    def forward(self, state):
        x = self.relu(self.fc1(state))
        return self.fc2(x)

In [4]:
# prelare offline data, experiment: bridge, different pktsize-corresponding the different ddioway?

In [5]:
import pandas as pd

# Path to your CSV file
csv_file = f'..\\bridge\\datasets\\combined_64.csv'

# Read the CSV file
df = pd.read_csv(csv_file, header=[0, 1])

FileNotFoundError: [Errno 2] No such file or directory: '..\\bridge\\datasets\\combined_64.csv'

In [6]:
states_set = pd.MultiIndex.from_pruduce([df.columns,["bridge-pcm_Socket 0-IPC","bridge-pcm_Socket 0-L3MISS","bridge-pcm_Socket 0-L3HIT",
                                                     "pcm-pcie_new_skt-0_PCIRdCur","pcm-pcie_new_skt-0_ItoM",
                                                     "input_rate","output_rate","latency",]], names=['Original Column', 'Subcolumn'])
new_df = pd.DataFrame(states_set)
new_df.info()

AttributeError: type object 'MultiIndex' has no attribute 'from_pruduce'

In [9]:
# Experience replay memory
class ReplayMemory:
    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        self.memory.append(namedtuple("Transition", ("state", "action", "next_state", "reward"))(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [10]:
# Function to select an action
def select_action(state, online_net, epsilon):
    if random.random() > epsilon:
        with torch.no_grad():
            return online_net(state).max(1)[1].data.numpy()
        action = action[0] if ENV_A_SHAPE == 0 else action.reshape(ENV_A_SHAPE) 
        exploit_count +=1
    else:
        return torch.tensor([[random.randrange(action_size)]], dtype=torch.long)
        explore_count +=1
    return action

# Training Process

In [5]:
def train_ddqn(env,online_net,target_net, optimizer, memory, episodes, batch_size, target_update):
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        epsilon = max(epsilon_end, epsilon_start * (epsilon_decay ** episode))
        
        # Update the target network
        if episode % target_update == 0:
            target_net.load_state_dict(online_net.state_dict())
            
        for t in count():
            action = action = select_action(state, online_net, epsilon)
            next_state, reward, done, _ = env.step(action)
            memory.push(state,action,next_state,reward)
            
            if len(memory) > batch_size:
                transitions = memory.sample(batch_size)
                batch = namedtuple("Transition", ("state", "action", "next_state", "reward"))(*zip(*transitions))
                # Prepare the batch for training
                non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state)), dtype=torch.bool)
                non_final_next_states = torch.cat([s for s in batch.next_state if s is not None])
                state_batch = torch.cat(batch.state)
                action_batch = torch.cat(batch.action)
                reward_batch = torch.tensor(batch.reward)

                # Compute Q values
                state_action_values = online_net(state_batch).gather(1, action_batch)
                next_state_values = torch.zeros(batch_size)
                next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0].detach()
                expected_state_action_values = (next_state_values * gamma) + reward_batch

                # Compute loss
                loss = nn.functional.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1))
                
                # Optimize the model
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            if done:
                break

# Running Process

In [6]:
def run_ddqn(env, online_net):
    state = env.reset()
    total_reward = 0
    while True:
        action = online_net(state).max(1)[1].view(1,1)
        state, reward, done, _ = env.step(action)
        total_reward +=reward
        if done:
            break
    return total_reward


# Environment Definition

In [7]:
class ServerEnvironment:
    # para
    MAX_STEPS = 1000
    DESIRED_THROUGHPUT = 0.95
    def __init__(self,num_nf_instance,num_cores,num_llc_ways):
        self.num_nf_instances = num_nf_instances
        self.num_cores = num_cores
        self.num_llc_ways=num_llc_ways
        self.state=self._initialize_state()
        
    def _initialize_state(self):
        #llc
        base_allocation = self.num_llc_ways // self.num_nf_instances
        remainder = self.num_llc_ways % self.num_nf_instances
        default_llc_partitions = [base_allocation] * self.num_nf_instances
        for i in range(remainder):
            default_llc_partitions[i] += 1
            
        #dma    
        default_dma_buffer_size = 1024
        
        # traffic 
        
        self.state = {
            'llc_partitions':default_llc_partitions,
            'dma_buffer_size':default_dma_buffer_size
        }
        return self.state
        
    def step(self,action):
        new_state = self_apply_action(action)
        reward = self_calculate_reward(new_state)
        done = self._check_termination_condition(new_state)
        return new_state, reward, done
    
    def _apply_action(self, action):
        # action is a dictionary with keys corresponding to what needs to be changed
        # action = {'llc_change': [0.1, -0.1, 0, 0],'dma_change': 256}
        
        #Adjust LLC partition
        if 'llc_change' in action:
            for i,change in enumerate(action['llc_change']):
                self.state['llc_partitions'][i]+= change
            self.state['llc_partitions'] = [max(0,alloc) for alloc in self.state['llc_partitions']]
            total_alloc = sum(self.state['llc_partitions'])
            if total_alloc > 0:
                self.state['llc_partitions'] =[alloc / total_alloc for alloc in self.state['llc_partitions']]
            else:
                self.state['llc_paritions'] = [1./len(self.state['llc_partitions'])]*len(self.state['llc_partitions'])
        
        #Adjust DMA buffer size
        if 'dma_change' in action:
            new_buffer_size = self.state['dma_buffer_size'] + action['dma_change']
            valid_buffer_sizes = [64, 256, 512, 1024, 2048]
            self.state['dma_buffer_size'] = min(valid_buffer_sizes, key=lambda x: abs(x - new_buffer_size))
   
        return self.state

    def _calculate_reward(self, state):
        R = state['throughput']
        T = state['input_rate']
        R_i_list = state['nf_throughput'] # list of throughput for each NF
        T_i_list = state['nf_inputrate'] # list of input rate for each NF
        
        r1 = R/T if T!=0 else 0
        r2 = max((T_i-R_i)/T_i if T_i !=0 else 0 for R_i,T_i in zip(R_i_list, T_i_list))
        eta1 = 1
        eta2 = 0.1
        r_a = eta1 *r1 - eta2 * r2
        return r_a
    def _check_ternimation_condition(self, state):
        self.current_step +=1
        if self.current_step >= self.MAX_STEP:
            return True
        elif state['throughout']>= DESIRED_THROUGHPUT:
            return True
        else:
            return False
    def reset(self):
        # Reset the environment for a new episode
        self.state = self._initialize_state()
        return self.state     

In [9]:
# initialize environment
num_nf_instances = 2
num_llc_ways = 11
num_cores = 20

env= ServerEnvironment(num_nf_instances, num_cores, num_llc_ways)

initial_state = env.reset()
print("Initial state:", initial_state)

Initial state: {'llc_partitions': [6, 5], 'dma_buffer_size': 1024}


# Main Script

In [14]:
# Hyperparameters
state_size = 13 # Number of state features
action_size = 4 # Number of possible actions
hidden_size = 64
batch_size = 32
gamma = 0.95  # Discount factor
epsilon_start = 1.0
epsilon_end = 0.01
epsilon_decay = 0.995
target_update = 20  # How frequently to update the target network
memory_size = 10000
learning_rate = 0.001
capacity = 100
episodes=1

In [15]:
DMA_buffer_space = [64,128,256,512,1024,2048]


In [16]:
# Create DDQN networks and replay memory

online_net = Net(state_size, action_size, hidden_size)
target_net = Net(state_size, action_size, hidden_size)
target_net.load_state_dict(online_net.state_dict())
target_net.eval()  
optimizer = optim.Adam(online_net.parameters(), lr=learning_rate)
memory = ReplayMemory(memory_size)

In [17]:
# Train the model
train_ddqn(env, online_net, target_net, optimizer, memory, episodes, batch_size)

# Save the trained model
torch.save(online_net.state_dict(), 'policy_net.pth')

# Load the trained model for running
online_net.load_state_dict(torch.load('policy_net.pth'))
online_net.eval()  # Set to evaluation mode

# Run the model
total_reward = run_ddqn(env, online_net)
print(f"Total reward: {total_reward}")


TypeError: train_ddqn() missing 1 required positional argument: 'target_update'

In [None]:
env= 