In [1]:
from bigdata_env import BigDataEnv
from heuristic import heuristic_rule

env = BigDataEnv("sample_logs.csv", heuristic_rule)
state = env.reset()

while state is not None:
    # For now, use heuristic as action
    action = heuristic_rule(state)
    next_state, reward, done = env.step(action)
    print(f"State: {state}, Action: {action}, Reward: {reward}")
    state = next_state if not done else None

State: [30. 40. 10.], Action: 0, Reward: 1
State: [85. 70. 15.], Action: 1, Reward: 1
State: [45. 30. 90.], Action: 0, Reward: 1
State: [90. 95. 10.], Action: 1, Reward: 1


In [2]:
from bigdata_env import BigDataEnv
from agent import IHRLAgent
from heuristic import heuristic_rule
import pandas as pd

env = BigDataEnv("sample_logs.csv", heuristic_rule)
agent = IHRLAgent(state_size=3, action_size=2)

episodes = 100

for ep in range(episodes):
    state = env.reset()
    total_reward = 0
    steps = 0

    while state is not None:
        action = agent.get_action(state)
        next_state, reward, done = env.step(action)
        agent.update(state, action, reward, next_state)
        state = next_state
        total_reward += reward
        steps += 1

    print(f"Episode {ep+1} — Total Reward: {total_reward}, Steps: {steps}, Epsilon: {agent.epsilon:.4f}")

Episode 1 — Total Reward: 4, Steps: 4, Epsilon: 0.9801
Episode 2 — Total Reward: -2, Steps: 4, Epsilon: 0.9607
Episode 3 — Total Reward: 2, Steps: 4, Epsilon: 0.9416
Episode 4 — Total Reward: 2, Steps: 4, Epsilon: 0.9229
Episode 5 — Total Reward: -4, Steps: 4, Epsilon: 0.9046
Episode 6 — Total Reward: 4, Steps: 4, Epsilon: 0.8867
Episode 7 — Total Reward: 0, Steps: 4, Epsilon: 0.8691
Episode 8 — Total Reward: 4, Steps: 4, Epsilon: 0.8518
Episode 9 — Total Reward: 2, Steps: 4, Epsilon: 0.8349
Episode 10 — Total Reward: 2, Steps: 4, Epsilon: 0.8183
Episode 11 — Total Reward: 0, Steps: 4, Epsilon: 0.8021
Episode 12 — Total Reward: 2, Steps: 4, Epsilon: 0.7862
Episode 13 — Total Reward: 2, Steps: 4, Epsilon: 0.7705
Episode 14 — Total Reward: 0, Steps: 4, Epsilon: 0.7553
Episode 15 — Total Reward: 0, Steps: 4, Epsilon: 0.7403
Episode 16 — Total Reward: 2, Steps: 4, Epsilon: 0.7256
Episode 17 — Total Reward: 2, Steps: 4, Epsilon: 0.7112
Episode 18 — Total Reward: 4, Steps: 4, Epsilon: 0.6970

In [1]:
import psutil
import csv
import time
from datetime import datetime

def collect_logs(filename="system_logs.csv", record_limit=1000, interval=0.5):
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["timestamp", "cpu_percent", "memory_percent", "active_processes"])
        
        for _ in range(record_limit):
            timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            cpu = psutil.cpu_percent(interval=0.1)
            memory = psutil.virtual_memory().percent
            processes = len(psutil.pids())
            writer.writerow([timestamp, cpu, memory, processes])
            time.sleep(interval)

    print(f"{record_limit} logs written to {filename}")

# Collect 1000 logs at 0.5 second intervals
if __name__ == "__main__":
    collect_logs()

1000 logs written to system_logs.csv


In [2]:
from bigdata_env import BigDataEnv
from heuristic import heuristic_rule
from dqn_agent import DeepIHRLAgent
import numpy as np

env = BigDataEnv("system_logs.csv", heuristic_rule)
agent = DeepIHRLAgent(state_size=3, action_size=3)

episodes = 300
batch_size = 64

for e in range(episodes):
    state = env.reset()
    total_reward = 0

    while state is not None:
        action = agent.get_action(state)
        next_state, reward, done = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

    agent.replay(batch_size)
    if e % 5 == 0:
        agent.update_target_model()

    print(f"Episode {e+1}: Total Reward = {total_reward}, Epsilon = {agent.epsilon:.4f}")

agent.save()
agent.save_hyperparams()

Episode 1: Total Reward = -266, Epsilon = 0.9800
Episode 2: Total Reward = -316, Epsilon = 0.9604
Episode 3: Total Reward = -354, Epsilon = 0.9412
Episode 4: Total Reward = -324, Epsilon = 0.9224
Episode 5: Total Reward = -280, Epsilon = 0.9039
Episode 6: Total Reward = -314, Epsilon = 0.8858
Episode 7: Total Reward = -276, Epsilon = 0.8681
Episode 8: Total Reward = -306, Epsilon = 0.8508
Episode 9: Total Reward = -222, Epsilon = 0.8337
Episode 10: Total Reward = -188, Epsilon = 0.8171
Episode 11: Total Reward = -170, Epsilon = 0.8007
Episode 12: Total Reward = -88, Epsilon = 0.7847
Episode 13: Total Reward = -82, Epsilon = 0.7690
Episode 14: Total Reward = -190, Epsilon = 0.7536
Episode 15: Total Reward = -126, Epsilon = 0.7386
Episode 16: Total Reward = -82, Epsilon = 0.7238
Episode 17: Total Reward = -110, Epsilon = 0.7093
Episode 18: Total Reward = -26, Epsilon = 0.6951
Episode 19: Total Reward = -6, Epsilon = 0.6812
Episode 20: Total Reward = -28, Epsilon = 0.6676
Episode 21: Tota

In [3]:
from dqn_agent import DeepIHRLAgent
from bigdata_env import BigDataEnv
from heuristic import heuristic_rule

# Recreate env and agent
env = BigDataEnv("system_logs.csv", heuristic_rule)
agent = DeepIHRLAgent(state_size=3, action_size=3)

agent.load()
agent.load_hyperparams("params.pkl")

# Resume training
for e in range(300, 501):  # Continue from episode 300
    state = env.reset()
    total_reward = 0
    while state is not None:
        action = agent.get_action(state)
        next_state, reward, done = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
    agent.replay(batch_size=64)
    if e % 5 == 0:
        agent.update_target_model()
    print(f"Episode {e}: Total Reward = {total_reward}, Epsilon = {agent.epsilon:.4f}")

Episode 300: Total Reward = 962, Epsilon = 0.0023
Episode 301: Total Reward = 932, Epsilon = 0.0022
Episode 302: Total Reward = 890, Epsilon = 0.0022
Episode 303: Total Reward = 898, Epsilon = 0.0022
Episode 304: Total Reward = 934, Epsilon = 0.0021
Episode 305: Total Reward = 756, Epsilon = 0.0021
Episode 306: Total Reward = 744, Epsilon = 0.0020
Episode 307: Total Reward = 838, Epsilon = 0.0020
Episode 308: Total Reward = 890, Epsilon = 0.0019
Episode 309: Total Reward = 980, Epsilon = 0.0019
Episode 310: Total Reward = 898, Epsilon = 0.0019
Episode 311: Total Reward = 890, Epsilon = 0.0018
Episode 312: Total Reward = 960, Epsilon = 0.0018
Episode 313: Total Reward = 934, Epsilon = 0.0018
Episode 314: Total Reward = 936, Epsilon = 0.0017
Episode 315: Total Reward = 960, Epsilon = 0.0017
Episode 316: Total Reward = 924, Epsilon = 0.0017
Episode 317: Total Reward = 952, Epsilon = 0.0016
Episode 318: Total Reward = 952, Epsilon = 0.0016
Episode 319: Total Reward = 916, Epsilon = 0.0016
