In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pyibl
from tqdm import tqdm
from pyibl import similarity
from copy import deepcopy

<h1> Reading System Data </h1>

In [2]:
info=pd.read_excel("Cyber Attack Modeling Data-Clean.xlsx", sheet_name="System Info")

In [3]:
info.head()

Unnamed: 0,System,Real or Honeypot,OS,Port 1,Port 2,Port 3,Port 4,Service on P1,Service on P2,Service on P3,Service on P4,Vulnerability on Port 1,Vulnerability on Port 2,Vulnerability on Port 3,Vulnerability on Port 4
0,0,Honeypot,Solaris,25/tcp,21/tcp,111/tcp,135/tcp,smtp,ftp,rpcbind,msrpc,directory_harvest,brute_force,DDoS_attack,DoS_attack
1,1,Honeypot,HP-UX 11i,135/tcp,111/tcp,25/tcp,80/tcp,msrpc,rpcbind,smtp,http,DoS_attack,DDoS_attack,directory_harvest,sql_injection
2,2,Honeypot,HP-UX 11i,80/tcp,111/tcp,21/tcp,25/tcp,http,rpcbind,ftp,smtp,sql_injection,DDoS_attack,brute_force,directory_harvest
3,3,Honeypot,Windows Server 2003,111/tcp,21/tcp,135/tcp,80/tcp,rpcbind,ftp,msrpc,http,DDoS_attack,brute_force,DoS_attack,sql_injection
4,4,Honeypot,HP-UX 11i,80/tcp,21/tcp,111/tcp,25/tcp,http,ftp,rpcbind,smtp,sql_injection,brute_force,DDoS_attack,directory_harvest


<h3> System to Situation </h3>

In [4]:
system_situation=[] #will store situations of all the systems at any point of time
#to get situation of system x at any point of time-- system_situation[x]

for i in range(len(info)):
    #situation format-[sys_no,exploited,os,(p1,p2,p3,p4),(serv1,serv2,serv3,serv4),(vul1,vul2,vul3,vul4)]
    situation=[]
    situation.append(info.iloc[i][0]) #sys_number
    situation.append(False) #exploitated
    situation.append(info.iloc[i][2]) #os
    a=info.iloc[i][3] #port1
    b=info.iloc[i][4] #port2
    c=info.iloc[i][5] #port3
    d=info.iloc[i][6] #port4
    port=(a,b,c,d)
    situation.append(port) #port tuple
    a=info.iloc[i][7] #service1
    b=info.iloc[i][8] #service2
    c=info.iloc[i][9] #service3
    d=info.iloc[i][10] #service4
    service=(a,b,c,d)
    situation.append(service) #service tuple
    a=info.iloc[i][11] #vul1
    b=info.iloc[i][12] #vul2
    c=info.iloc[i][13] #vul3
    d=info.iloc[i][14] #vul4
    vul=(a,b,c,d)
    situation.append(vul) #service tuple
    
    
    #now the situation of a system has been defined-adding it to the system_info list
    system_situation.append(situation)

In [6]:
#printing situation of system 39
system_situation[39]

[39,
 False,
 'Windows Server 2003',
 ('80/tcp', '135/tcp', '21/tcp', '111/tcp'),
 ('http', 'msrpc', 'ftp', 'rpcbind'),
 ('sql_injection', 'DoS_attack', 'brute_force', 'DDoS_attack')]

In [7]:
#printing situation of system 4
system_situation[4]

[4,
 False,
 'HP-UX 11i',
 ('80/tcp', '21/tcp', '111/tcp', '25/tcp'),
 ('http', 'ftp', 'rpcbind', 'smtp'),
 ('sql_injection', 'brute_force', 'DDoS_attack', 'directory_harvest')]

<h3> System:Real/Honeypot </h3>

In [8]:
real=info["Real or Honeypot"]

In [9]:
real=np.array(real)

In [10]:
def clean(a):
    if a=="Real":
        return True
    return False

In [11]:
real=np.array([clean(a) for a in real])

In [12]:
real #1-real,0-honeypot

array([False, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False,  True, False,  True,
       False,  True, False, False, False,  True, False, False, False,
        True,  True, False, False, False,  True, False, False,  True,
       False,  True, False, False])

In [13]:
#to check if system x is real-- real[x]
real[5]

False

<h1> Reading Non-Subnet Data </h1>

In [14]:
non_subnet=pd.read_excel("Cyber Attack Modeling Data-Clean.xlsx", sheet_name="Non_Subnet-Data")

In [15]:
non_subnet.head()

Unnamed: 0,UserID,System Attacked
0,26,0
1,26,1
2,26,13
3,26,13
4,26,39


In [16]:
users=pd.unique(non_subnet['UserID'])
users

array([ 26,  32,  34,  36,  37,  39,  40,  41,  43,  44,  45,  47,  52,
        53,  56,  57,  59,  60,  62,  64,  68,  69,  70,  71,  72,  75,
        76,  77,  78,  80,  81,  82,  83,  84,  85,  86,  87,  88,  90,
        92,  93,  94,  95,  98,  99, 106, 107, 109, 111, 113, 114, 116,
       118, 124, 125, 129, 130, 133, 134], dtype=int64)

In [17]:
user_data=non_subnet[non_subnet["UserID"]==26] 
user_data #data frame for a particular user

Unnamed: 0,UserID,System Attacked
0,26,0
1,26,1
2,26,13
3,26,13
4,26,39
5,26,27
6,26,26
7,26,7
8,26,16
9,26,18


<h1> Defining Reward Function </h1>

In [18]:
def reward_simulation(choice): #choice-situation is a list with a predefined format
    if real[choice[0]]: #real system
        if choice[1]==False: #not exploited yet
            p=np.random.rand() #randomly generates a number between 0 and 1
            if p<0.5:
                return 5
            else:
                return 2.5
        else: #already exploited
            return -10 #to punish choices of real systems which have already been exploited
    else: #honeypot
        return -5        

<h1> Avg Human Score for Non-Subnet Data </h1>

In [19]:
num=int(input("Enter the number of times you want to run the simulation: "))

Enter the number of times you want to run the simulation: 50


In [20]:
simulation_score=np.zeros(num) #stores the average score of users in a simulation across all (num) simulations

for i in tqdm(range(num)):
    score=0 #total score of all users in a simulation
    
    for user in users: #for each user
        
        copy_system_situation=deepcopy(system_situation) #copy of original-changes in it won't modify original
        
        user_data=non_subnet[non_subnet["UserID"]==user] 
        #user_data-data frame for a particular user
        for j in range(len(user_data)):
            system_attacked=user_data.iloc[j][1]
            reward=reward_simulation(copy_system_situation[system_attacked])
            if reward==5: #real system exploited
                score+=1
                copy_system_situation[system_attacked][1]=True #updating the exploited condition
    
    score=score/len(users) #taking the average score of all the users in the simulation
    simulation_score[i]=score

100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [00:04<00:00, 10.01it/s]


In [21]:
simulation_score

array([0.93220339, 0.88135593, 0.72881356, 0.91525424, 0.88135593,
       0.88135593, 1.01694915, 0.72881356, 0.89830508, 0.93220339,
       0.79661017, 0.83050847, 1.01694915, 1.08474576, 0.94915254,
       0.96610169, 0.89830508, 0.96610169, 0.91525424, 0.86440678,
       0.72881356, 0.81355932, 0.84745763, 0.6440678 , 0.79661017,
       0.76271186, 0.93220339, 0.86440678, 0.84745763, 0.69491525,
       0.79661017, 0.77966102, 1.01694915, 0.89830508, 1.01694915,
       0.93220339, 0.91525424, 0.98305085, 0.79661017, 0.86440678,
       0.93220339, 0.86440678, 0.94915254, 0.94915254, 1.03389831,
       0.86440678, 0.91525424, 0.89830508, 0.81355932, 0.91525424])

In [22]:
np.mean(simulation_score)

0.8830508474576271