<a href="https://colab.research.google.com/github/KTH-SSAS/cyberPyRDDLGym/blob/master/pyRDDLGym_Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preliminary Installations

In [1]:
!pip install --upgrade ipykernel
!pip install pyRDDLGym

Collecting ipykernel
  Downloading ipykernel-6.25.2-py3-none-any.whl (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.2/154.2 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting comm>=0.1.1 (from ipykernel)
  Downloading comm-0.1.4-py3-none-any.whl (6.6 kB)
Collecting jedi>=0.16 (from ipython>=7.23.1->ipykernel)
  Downloading jedi-0.19.0-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, comm, ipykernel
  Attempting uninstall: ipykernel
    Found existing installation: ipykernel 5.5.6
    Uninstalling ipykernel-5.5.6:
      Successfully uninstalled ipykernel-5.5.6
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires ipykernel==5.5.6, but you have ipykernel 6.25.2 

*Remember* to restart the runtime because of different version of packages!

# Agent

In [2]:
import random
import gym
from pyRDDLGym.Core.Policies.Agents import BaseAgent
from collections import OrderedDict

# Attackers and defenders look the same
class PassiveCyberAgent(BaseAgent):
    def __init__(self, action_space):
        self.action_space = action_space

    def sample_action(self, state=None):
        selected_action = next(iter(self.action_space.spaces))
        action = {selected_action: self.action_space[selected_action]}
        action[selected_action] = 0
        print(f'action = {action}')
        return action

class RandomCyberAgent(BaseAgent):
    def __init__(self, action_space, seed=None):
        self.action_space = action_space
        self.rng = random.Random(seed)
        if seed is not None:
            self.action_space.seed(seed)

    def sample_action(self, state=None):
        s = self.action_space.sample()
        action = {}
        selected_action = self.rng.sample(list(s), 1)[0]
        action[selected_action] = s[selected_action]
        return action

class KeyboardCyberAgent(BaseAgent):
    def __init__(self, action_space, seed=None):
        self.action_space = action_space

    def sample_action(self, state=None):
        available_actions = list(self.action_space.spaces.keys())

        print("Available actions:")
        for i, action in enumerate(available_actions):
            print(f"{i}. {action}")

        selected_index = int(input("Enter the index of the action you want to take: "))

        if selected_index < 0 or selected_index >= len(available_actions):
            print("Invalid index. Using a default action.")
            selected_index = 0

        selected_action = available_actions[selected_index]

        return {selected_action: 1}

# Wrapper to sample both an attack and a defense action
class DoubleAgent(BaseAgent):

    def __init__(self, action_space, seed=None, attacker_policy='random', defender_policy='passive'):
        attack_steps = gym.spaces.dict.Dict({k: v for k, v in action_space.items() if 'attack' in k})
        defense_steps = gym.spaces.dict.Dict({k: v for k, v in action_space.items() if 'defend' in k})
        if attacker_policy == 'passive':
            self.attacker = PassiveCyberAgent(attack_steps)
        elif attacker_policy == 'keyboard':
            self.attacker = KeyboardCyberAgent(attack_steps)
        else:
            self.attacker = RandomCyberAgent(attack_steps, seed=seed)
        if defender_policy == 'passive':
            self.defender = PassiveCyberAgent(defense_steps)
        elif defender_policy == 'keyboard':
            self.defender = KeyboardCyberAgent(defense_steps)
        else:
            self.defender = RandomCyberAgent(defense_steps, seed=seed)

    def sample_action(self, state=None):
        attack_action = self.attacker.sample_action(state=state)
        defense_action = self.defender.sample_action(state=state)
        print(f'attack_action = {attack_action}')
        print(f'defense_action = {defense_action}')
        action = attack_action | defense_action
        return action

# RDDL

In [4]:
base_path = '/content/'

In [5]:
DOMAIN = """
domain simple_compromise {

	types {
		host: object;
    credentials: object;
	};

    pvariables {

        // Associations
        CONNECTED(host, host) : { non-fluent, bool, default = false };
        ACCESSES(credentials, host)  : { non-fluent, bool, default = false };
        STORES(host, credentials)  : { non-fluent, bool, default = false };

        // State fluents
        compromised(host) : { state-fluent, bool, default = false };
        cracked(credentials) : { state-fluent, bool, default = false };

        // Attacker action fluents
        compromise_attack(host) : { action-fluent, bool, default = false };
        crack_attack(credentials) : { action-fluent, bool, default = false };

        // Defender action fluents
        rotate_defend(credentials) : { action-fluent, bool, default = false };

        // Initial TTCs
        ittc_crack_attack(credentials) : { non-fluent, int, default = 0 };

        // Remaining TTCs
        rttc_crack_attack(credentials) : { state-fluent, int, default = 0 };

    };

cpfs {
    compromised'(?ht) =
        if (~compromised(?ht) ^ exists_{?hs : host, ?c : credentials} [CONNECTED(?hs, ?ht) ^ compromised(?hs) ^ ACCESSES(?c, ?ht) ^ cracked(?c) ^ compromise_attack(?ht)])
          then KronDelta(true)
        else if (compromised(?ht) ^ exists_{?c : credentials} [ACCESSES(?c, ?ht) ^ cracked(?c) ^ rotate_defend(?c)])
          then KronDelta(false)
        else compromised(?ht);

    cracked'(?c) =
        if (~cracked(?c) ^ crack_attack(?c) ^ rttc_crack_attack(?c) < 1 ^ exists_{?h : host} [STORES(?h, ?c) ^ compromised(?h)])
          then KronDelta(true)
        else if (cracked(?c) ^ rotate_defend(?c))
          then KronDelta(false)
        else cracked(?c);

    rttc_crack_attack'(?c) =
        if (~cracked(?c) ^ crack_attack(?c) ^ rttc_crack_attack(?c) > 0 ^ exists_{?h : host} [STORES(?h, ?c) ^ compromised(?h)])
          then (rttc_crack_attack(?c) - 1)
        else if (rotate_defend(?c))
          then (ittc_crack_attack(?c))
        else rttc_crack_attack(?c);

};


    reward = (sum_{?h: host} [compromised(?h)]);

}
"""
domain_file = open(base_path+'domain.rddl','w')
domain_file.write(DOMAIN)
domain_file.close()

In [6]:
INSTANCE = """
non-fluents simple_network {
	domain = simple_compromise;

	objects{
		host: {h1, h2, h3};
		credentials: {c1, c2, c3};
//		host: {h1, h2, h3, h4, h5, h6, h7, h8, h9};
//		credentials: {c1, c2, c3, c4, c5, c6, c7, c8, c9};
	};

	non-fluents {
		CONNECTED(h1, h2);
		CONNECTED(h1, h3);
//		CONNECTED(h3, h4);
//		CONNECTED(h4, h5);
//		CONNECTED(h2, h5);
//		CONNECTED(h5, h6);
//		CONNECTED(h6, h7);
//		CONNECTED(h6, h8);
//		CONNECTED(h8, h9);
		ACCESSES(c1, h1);
		ACCESSES(c2, h2);
		ACCESSES(c3, h3);
//		ACCESSES(c4, h4);
//		ACCESSES(c5, h5);
//		ACCESSES(c6, h6);
//		ACCESSES(c7, h7);
//		ACCESSES(c8, h8);
//		ACCESSES(c9, h9);
		STORES(h1, c1);
		STORES(h1, c2);
		STORES(h1, c3);
//		STORES(h1, c4);
//		STORES(h1, c5);
//		STORES(h1, c6);
//		STORES(h1, c7);
//		STORES(h1, c8);
//		STORES(h1, c9);

		ittc_crack_attack(c1) = 1;
		ittc_crack_attack(c2) = 2;
		ittc_crack_attack(c3) = 0;
//		ittc_crack_attack(c4) = 2;
//		ittc_crack_attack(c5) = 1;
//		ittc_crack_attack(c6) = 2;
//		ittc_crack_attack(c7) = 3;
//		ittc_crack_attack(c8) = 2;
//		ittc_crack_attack(c9) = 1;

	};
}

instance simple_network_instance {
	domain = simple_compromise;
	non-fluents = simple_network;

	init-state{
		compromised(h1) = true;

		rttc_crack_attack(c1) = 1;
		rttc_crack_attack(c2) = 2;
		rttc_crack_attack(c3) = 0;
//		rttc_crack_attack(c4) = 2;
//		rttc_crack_attack(c5) = 1;
//		rttc_crack_attack(c6) = 2;
//		rttc_crack_attack(c7) = 3;
//		rttc_crack_attack(c8) = 2;
//		rttc_crack_attack(c9) = 1;
	};

		max-nondef-actions = 2;
		horizon = 100;
		discount = 1.0;
}
"""
instance_file = open(base_path+'instance.rddl','w')
instance_file.write(INSTANCE)
instance_file.close()

# Execution

In [7]:
from pyRDDLGym import RDDLEnv
from pyRDDLGym import ExampleManager
from pyRDDLGym.Visualizer.MovieGenerator import MovieGenerator
from pyRDDLGym.Core.Policies.Agents import RandomAgent

base_path = '/content/'
myEnv = RDDLEnv.RDDLEnv(domain=base_path+'domain.rddl', instance=base_path+'instance.rddl')
print(f'myEnv.action_space = {myEnv.action_space}')
print
agent = DoubleAgent(action_space=myEnv.action_space, attacker_policy='keyboard', defender_policy='passive', seed=42)

myEnv.action_space = Dict('compromise_attack___h1': Discrete(2), 'compromise_attack___h2': Discrete(2), 'compromise_attack___h3': Discrete(2), 'crack_attack___c1': Discrete(2), 'crack_attack___c2': Discrete(2), 'crack_attack___c3': Discrete(2), 'rotate_defend___c1': Discrete(2), 'rotate_defend___c2': Discrete(2), 'rotate_defend___c3': Discrete(2))


In [8]:
import time
import numpy
total_reward = 0
state = myEnv.reset()
start_time = time.time()
print(f'step         = 0')
print(f'attack steps = {[attackstep for attackstep, value in state.items() if type(value) is numpy.bool_ and value == True]}')
print(f'TTCs         = {[(attackstep, value) for attackstep, value in state.items() if type(value) is numpy.int64]}')
for step in range(myEnv.horizon):
    action = agent.sample_action()
    next_state, reward, done, info = myEnv.step(action)
    total_reward += reward
    print()
    print(f'step         = {step}')
    print(f'attack steps = {[attackstep for attackstep, value in state.items() if type(value) is numpy.bool_ and value == True]}')
    print(f'TTCs         = {[(attackstep, value) for attackstep, value in state.items() if type(value) is numpy.int64]}')
    print(f'action       = {action}')
    print(f'attack steps = {[attackstep for attackstep, value in next_state.items() if type(value) is numpy.bool_ and value == True]}')
    print(f'TTCs         = {[(attackstep, value) for attackstep, value in next_state.items() if type(value) is numpy.int64]}')
    print(f'reward       = {reward}')
    state = next_state
    if done:
        break
end_time = time.time()
print()
print(f'episode ended with reward {total_reward}. Execution time was {end_time-start_time} s.')

myEnv.close()

step         = 0
attack steps = ['compromised___h1']
TTCs         = [('rttc_crack_attack___c1', 1), ('rttc_crack_attack___c2', 2), ('rttc_crack_attack___c3', 0)]
Available actions:
0. compromise_attack___h1
1. compromise_attack___h2
2. compromise_attack___h3
3. crack_attack___c1
4. crack_attack___c2
5. crack_attack___c3
Enter the index of the action you want to take: 5
action = {'rotate_defend___c1': 0}
attack_action = {'crack_attack___c3': 1}
defense_action = {'rotate_defend___c1': 0}

step         = 0
attack steps = ['compromised___h1']
TTCs         = [('rttc_crack_attack___c1', 1), ('rttc_crack_attack___c2', 2), ('rttc_crack_attack___c3', 0)]
action       = {'crack_attack___c3': 1, 'rotate_defend___c1': 0}
attack steps = ['compromised___h1', 'cracked___c3']
TTCs         = [('rttc_crack_attack___c1', 1), ('rttc_crack_attack___c2', 2), ('rttc_crack_attack___c3', 0)]
reward       = 1.0
Available actions:
0. compromise_attack___h1
1. compromise_attack___h2
2. compromise_attack___h3
3. c

KeyboardInterrupt: ignored