In [5]:
import pandas as pd
import numpy as np
import gym
from gym import spaces
from stable_baselines3 import PPO

class NodeEnvironment(gym.Env):
    def __init__(self, data):
        super(NodeEnvironment, self).__init__()
        self.data = data
        self.hosts = data['Host'].values

        # State includes features for all hosts
        self.state = data.iloc[:, 1:].values  # Exclude 'Host' column

        # Action space: Choose a secondary host for each primary host
        self.action_space = spaces.Discrete(len(self.hosts))

        # Observation space: Host features (normalized)
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(self.state.shape[1],), dtype=np.float32
        )

        # Current primary host index
        self.current_primary_idx = 0

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_primary_idx = 0
        self._reset_host_states()
        return self.state[self.current_primary_idx], {}

    def step(self, action):
        if action < 0 or action >= len(self.hosts):
            raise ValueError(f"Invalid action: {action}")

        reward = self._calculate_reward(self.current_primary_idx, action)

        # Update secondary host's state dynamically
        self._update_host_state(action)

        # Move to next primary host
        self.current_primary_idx += 1
        done = self.current_primary_idx >= len(self.hosts)
        truncated = False  # This can be updated based on specific use cases

        if not done:
            next_state = self.state[self.current_primary_idx]
        else:
            next_state = np.zeros(self.state.shape[1])  # Return a zero vector when done

        return next_state, reward, done, truncated, {}

    def _calculate_reward(self, primary_idx, secondary_idx):
        primary_features = self.state[primary_idx]
        secondary_features = self.state[secondary_idx]

        # Example reward function: prioritize low latency and high health
        latency = primary_features[secondary_idx % len(primary_features)]
        health = secondary_features[-1]

        if np.isnan(latency) or np.isnan(health):
            return 0

        reward = health - latency
        return reward

    def _update_host_state(self, host_idx):
        """Dynamically update the state of the selected secondary host."""
        # Decrease health significantly
        self.state[host_idx, -1] *= 0.7  # Reduce health by 30%
        # Increase latency or other metrics significantly
        self.state[host_idx, :-1] += 0.3  # Increase latency or other metrics

    def _reset_host_states(self):
        """Reset the dynamic state of hosts."""
        self.state = self.data.iloc[:, 1:].values.copy()

# Load the CSV data into a pandas DataFrame
data = pd.read_csv("node_features_with_pairwise_metrics.csv")

# Normalize feature columns
feature_cols = [col for col in data.columns if col.startswith("Latency") or col.startswith("HopCount") or col not in ['Host']]
data[feature_cols] = data[feature_cols].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

data.fillna(0, inplace=True)  # Replace NaN values with 0

# Create the DRL environment
env_train = NodeEnvironment(data)

# Define a DRL agent (PPO example)
class DRLAgent:
    def __init__(self, env):
        self.env = env

    def get_model(self, algo, model_kwargs):
        if algo == "ppo":
            return PPO("MlpPolicy", self.env, verbose=1, **model_kwargs)
        else:
            raise ValueError("Unsupported algorithm")

# Initialize the agent and the PPO model
agent = DRLAgent(env=env_train)
PPO_PARAMS = {
    "n_steps": 512,
    "ent_coef": 0.01,
    "learning_rate": 0.00025,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo", model_kwargs=PPO_PARAMS)

# Training the model
model_ppo.learn(total_timesteps=10000)

# Modify PRIMARY_TO_SECONDARY dynamically
def predict_backup_mapping(model, env, primary_hosts):
    mapping = {}
    for primary in primary_hosts:
        primary_idx = env.hosts.tolist().index(primary)
        state = env.state[primary_idx]
        action, _ = model.predict(state, deterministic=True)

        # Exclude invalid secondary options
        valid_secondary_hosts = [i for i in range(len(env.hosts)) if env.hosts[i] not in ['stamper', 'client', primary]]
        if action not in valid_secondary_hosts:  # If selected secondary is invalid, choose the best available
            valid_secondary_hosts = sorted(valid_secondary_hosts, key=lambda i: env.state[i, -1], reverse=True)  # Sort based on health or any other metric
            if valid_secondary_hosts:
                action = valid_secondary_hosts[0]
        
        secondary = env.hosts[action]
        mapping[primary] = secondary
        env._update_host_state(action)  # Update state after assigning backup

    return mapping

primary_hosts = ['h1', 'h2', 'h3', 'h5', 'h7']
mapping = predict_backup_mapping(model_ppo, env_train, primary_hosts)
print("Predicted PRIMARY_TO_SECONDARY mapping:", mapping)

import json

def save_mapping_to_json(mapping, file_path="primary_to_secondary.json"):
    with open(file_path, "w") as file:
        json.dump(mapping, file, indent=4)

# Save the mapping
save_mapping_to_json(mapping, file_path="primary_to_secondary.json")




Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 9        |
|    ep_rew_mean     | -5.07    |
| time/              |          |
|    fps             | 2210     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 512      |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9           |
|    ep_rew_mean          | -4.94       |
| time/                   |             |
|    fps                  | 2215        |
|    iterations           | 2           |
|    time_elapsed         | 0           |
|    total_timesteps      | 1024        |
| train/                  |             |
|    approx_kl            | 0.011521477 |
|    clip_fraction        | 0.0529      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.19       |
|    explained_variance   | -0.0619     |
|    learning_rate        | 0.