In [None]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback
from gym import Env
from gym.spaces import Box


In [None]:
class VMEnv(Env):
    def __init__(self, df, window_size):
        super(VMEnv, self).__init__()
        self.df = df.reset_index(drop=True)
        self.window_size = window_size
        self.current_step = 0
        for col in ['CPU_usage_MHZ', 'Memory_usage_KB', 'Network_received_throughput_KB_s']:
            self.df[col] = pd.to_numeric(self.df[col], errors='coerce')
        self.df = self.df.dropna(subset=['CPU_usage_MHZ', 'Memory_usage_KB', 'Network_received_throughput_KB_s'])
        self.observation_space = Box(low=0, high=np.inf, shape=(window_size, 3), dtype=np.float32)
        self.action_space = Box(low=-1, high=1, shape=(3,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        return self._get_observation()

    def step(self, action):
        self.current_step += 1
        obs = self._get_observation()
        reward = self._get_reward(action)
        done = self.current_step >= len(self.df) - self.window_size
        info = {
            'cpu_usage': float(self.df['CPU_usage_MHZ'].iloc[self.current_step]) if self.current_step < len(self.df) else 0.0,
            'memory_usage': float(self.df['Memory_usage_KB'].iloc[self.current_step]) if self.current_step < len(self.df) else 0.0,
            'network_received': float(self.df['Network_received_throughput_KB_s'].iloc[self.current_step]) if self.current_step < len(self.df) else 0.0
        }
        return obs, reward, done, info

    def _get_observation(self):
        start = max(0, self.current_step - self.window_size + 1)
        end = min(self.current_step + 1, len(self.df))
        window = self.df[['CPU_usage_MHZ', 'Memory_usage_KB', 'Network_received_throughput_KB_s']].iloc[start:end].values
        if len(window) < self.window_size:
            padding = np.zeros((self.window_size - len(window), 3), dtype=np.float32)
            window = np.vstack([padding, window])
        return window.astype(np.float32)

    def _get_reward(self, action):
        if self.current_step < len(self.df):
            actual = np.array([
                self.df['CPU_usage_MHZ'].iloc[self.current_step],
                self.df['Memory_usage_KB'].iloc[self.current_step],
                self.df['Network_received_throughput_KB_s'].iloc[self.current_step]
            ], dtype=np.float32)
            scaling_factors = np.array([1000.0, 1000.0, 1000.0])
            actual_normalized = actual / scaling_factors
            predicted = action
            mse = np.mean((actual_normalized - predicted) ** 2)
            return -mse
        return 0.0


In [None]:
class TrainingLoggerCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(TrainingLoggerCallback, self).__init__(verbose)
        self.episode_rewards = []
        self.episode_times = []
        self.current_episode_reward = 0
        self.current_episode_start_time = time.time()
        self.episode_count = 0

    def _on_step(self):
        self.current_episode_reward += self.locals['rewards'][0]
        if self.locals['dones'][0]:
            self.episode_count += 1
            episode_time = time.time() - self.current_episode_start_time
            self.episode_rewards.append(self.current_episode_reward)
            self.episode_times.append(episode_time)
            self.current_episode_reward = 0
            self.current_episode_start_time = time.time()
            if self.verbose > 0:
                print(f"Episode {self.episode_count}: Reward = {self.episode_rewards[-1]:.2f}, Time = {self.episode_times[-1]:.2f}s")
        return True

In [None]:
train_df = pd.read_csv('train_df.csv')
train_df['CPU_usage_MHZ'] = train_df['CPU_usage_MHZ'] * 1e6
train_df['Memory_usage_KB'] = train_df['Memory_usage_KB'] * 1e6
train_df['Network_received_throughput_KB_s'] = train_df['Network_received_throughput_KB_s'] * 1e6

# Create environment
window_size = 100
env = make_vec_env(VMEnv, n_envs=1, vec_env_cls=DummyVecEnv, env_kwargs={'df': train_df, 'window_size': window_size})

In [None]:
callback = TrainingLoggerCallback(verbose=1)
model = PPO("MlpPolicy", env, verbose=1, learning_rate=0.0003, n_steps=2048, batch_size=64, clip_range=0.2)
model.learn(total_timesteps=100000, callback=callback)
model.save("ppo_resource_prediction")

In [None]:
episode_rewards = callback.episode_rewards
episode_times = callback.episode_times
episodes = list(range(1, len(episode_rewards) + 1))


In [None]:
window_size_convergence = 10
rolling_rewards = pd.Series(episode_rewards).rolling(window=window_size_convergence).mean().values
convergence_metric = []
for i in range(1, len(rolling_rewards)):
    if i >= window_size_convergence:
        prev_avg = rolling_rewards[i - 1]
        curr_avg = rolling_rewards[i]
        if prev_avg != 0:
            perc_change = abs((curr_avg - prev_avg) / prev_avg) * 100
            convergence_metric.append(perc_change)
        else:
            convergence_metric.append(np.inf)
    else:
        convergence_metric.append(np.inf)
convergence_metric = convergence_metric[:len(episodes)]  # Align with episodes

# Find convergence episode
reward_threshold = 5.0  # 5% threshold
convergence_episode = None
for i, perc_change in enumerate(convergence_metric):
    if i >= window_size_convergence and perc_change < reward_threshold:
        convergence_episode = episodes[i]
        break

# Print convergence
if convergence_episode:
    print(f"Model converged at episode {convergence_episode} (convergence metric < {reward_threshold}%)")
else:
    print("Model did not converge within the training period")


In [None]:

# Plot Convergence vs Episode
plt.figure(figsize=(10, 5))
plt.plot(episodes, convergence_metric, label='Convergence Metric (% Change in Rolling Reward)')
plt.axhline(y=reward_threshold, color='g', linestyle='--', label=f'Convergence Threshold ({reward_threshold}%)')
if convergence_episode:
    plt.axvline(x=convergence_episode, color='r', linestyle=':', label='Convergence Episode')
plt.xlabel('Episode')
plt.ylabel('Convergence Metric (%)')
plt.title('Convergence vs Episode')
plt.legend()
plt.grid()
plt.yscale('log')  


plt.show()
# Plot Reward vs Episode
plt.figure(figsize=(10, 5))
plt.plot(episodes, episode_rewards, label='Episode Reward')
plt.plot(episodes, rolling_rewards, label='Rolling Average Reward (window=10)', linestyle='--')
if convergence_episode:
    plt.axvline(x=convergence_episode, color='r', linestyle=':', label='Convergence')
plt.xlabel('Episode')
plt.ylabel('Reward (-MSE)')
plt.title('Reward vs Episode')
plt.legend()
plt.grid()
plt.show()

In [None]:
cumulative_times = np.cumsum(episode_times)
plt.figure(figsize=(10, 5))
plt.plot(episodes, cumulative_times, label='Cumulative Training Time')
plt.xlabel('Episode')
plt.ylabel('Training Time (seconds)')
plt.title('Episode vs Training Time')
plt.legend()
plt.grid()
plt.show()

In [None]:
metrics_df = pd.DataFrame({
    'Episode': episodes,
    'Reward': episode_rewards,
    'Training_Time': episode_times,
    'Cumulative_Training_Time': cumulative_times,
    'Convergence_Metric_Percent': convergence_metric
})
metrics_df.to_csv('training_metrics.csv', index=False)
print("Training metrics saved to 'training_metrics.csv'")

In [None]:
import csv
import os
import numpy as np
from datetime import datetime
from stable_baselines3 import PPO
import pandas as pd

In [None]:
def predict_and_store_rl(time_str, model_path="ppo_resource_prediction_new.zip", output_file="predictions.csv", window_size=100, test_df_path="test_df.csv"):
    
    # Validate and parse time string
    try:
        time_obj = datetime.strptime(time_str, "%H:%M:%S")
    except ValueError:
        raise ValueError("Time must be in HH:MM:SS format (e.g., '12:34:56')")
    
    # Load and scale test data
    test_df = pd.read_csv(test_df_path)
    test_df['CPU_usage_MHZ'] = test_df['CPU_usage_MHZ'] * 1e6
    test_df['Memory_usage_KB'] = test_df['Memory_usage_KB'] * 1e6
    test_df['Network_received_throughput_KB_s'] = test_df['Network_received_throughput_KB_s'] * 1e6
    
    # Filter test data by time (approximate match)
    test_df['time'] = pd.to_datetime(test_df['time'], format='%H:%M:%S.%f')
    time_seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second
    test_df['time_seconds'] = test_df['time'].dt.hour * 3600 + test_df['time'].dt.minute * 60 + test_df['time'].dt.second
    time_diff = np.abs(test_df['time_seconds'] - time_seconds)
    closest_rows = test_df.iloc[time_diff.argsort()[:window_size]]
    
    # Create observation
    observation = closest_rows[['CPU_usage_MHZ', 'Memory_usage_KB', 'Network_received_throughput_KB_s']].values.astype(np.float32)
    if len(observation) < window_size:
        padding = np.zeros((window_size - len(observation), 3), dtype=np.float32)
        observation = np.vstack([padding, observation])
    assert observation.shape == (window_size, 3), f"Observation shape {observation.shape} does not match expected ({window_size}, 3)"
    
    # Load the trained PPO model
    try:
        model = PPO.load(model_path)
    except Exception as e:
        raise ValueError(f"Failed to load PPO model from {model_path}: {str(e)}")
    
    # Predict using the PPO model
    try:
        action, _ = model.predict(observation, deterministic=True)
    except Exception as e:
        raise ValueError(f"Prediction failed: {str(e)}")
    
    # Action is a 3D vector [cpu_adjustment, mem_adjustment, net_adjustment]
    assert action.shape == (3,), f"Action shape {action.shape} does not match expected (3,)"
    
    # Scale actions to resource values
    scaling_factors = np.array([1000.0, 1000.0, 1000.0])  # Match VMEnv reward scaling
    predicted = action * scaling_factors
    predicted_cpu, predicted_mem, predicted_net = predicted
    
    # Ensure predictions are within bounds
    predicted_cpu = max(0, min(predicted_cpu, 5000))
    predicted_mem = max(0, min(predicted_mem, 10000))
    predicted_net = max(0, min(predicted_net, 2000))
    
    # Round predictions
    predicted_cpu = round(predicted_cpu, 2)
    predicted_mem = round(predicted_mem, 2)
    predicted_net = round(predicted_net, 2)
    
    # Calculate VMs needed
    vms_needed = int(np.ceil(predicted_cpu / 500))
    
    # Prepare data to store
    prediction_data = {
        "Time": time_str,
        "CPU_usage_MHZ": predicted_cpu,
        "Memory_usage_KB": predicted_mem,
        "Network_received_throughput_KB_s": predicted_net,
        "VMs_needed": vms_needed
    }
    
    # Store predictions in CSV
    file_exists = os.path.exists(output_file)
    with open(output_file, mode='a', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=prediction_data.keys())
        if not file_exists:
            writer.writeheader()
        writer.writerow(prediction_data)
    
    print(f"Stored predictions for {time_str} in {output_file}")
    return prediction_data

In [None]:
another_time = "22:18:35"
result = predict_and_store_rl(another_time, model_path="ppo_resource_prediction.zip")
print("Predictions:", result)