In [2]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import pandas.plotting as pplt

import baos_knx_parser as knx

import csv
import binascii
import datetime
import math

import itertools

In [68]:
DEFAULT_SRC = knx.KnxAddress('3.4.2')

In [69]:
def gen_restart(dest, src=DEFAULT_SRC, prio=knx.TelegramPriority.URGENT, hop_count=6):
    return knx.KnxStandardTelegram(
        src=src, dest=dest,
        telegram_type=knx.TelegramType.DATA, repeat=False, ack=False,
        priority=knx.TelegramPriority.URGENT, hop_count=hop_count,
        payload=knx.construct_payload(knx.TPCI.UNNUMBERED_DATA_PACKET, 0, knx.APCI.A_RESTART)
    )

def gen_device_descriptor_read(dest, src=DEFAULT_SRC, prio=knx.TelegramPriority.URGENT, hop_count=6):
    return knx.KnxStandardTelegram(
        src=src, dest=dest,
        telegram_type=knx.TelegramType.DATA, repeat=False, ack=False,
        priority=knx.TelegramPriority.URGENT, hop_count=hop_count,
        payload=knx.construct_payload(knx.TPCI.UNNUMBERED_DATA_PACKET, 0, knx.APCI.A_DEVICE_DESCRIPTOR_READ)
    )

In [70]:
def addr_range(start: knx.KnxAddress, end: knx.KnxAddress, step=1):
    if not start.group == end.group:
        raise ValueError("Address Types must be equal")
        
    group = start.group
    for addr in range(int(start), int(end), step):
        yield knx.parser.parse_knx_addr(knx.struct.STD_U16.pack(addr), group)

In [71]:
def gen_time(start: datetime.datetime, end: datetime.datetime=None, duration: datetime.timedelta=None, max_ps: float=5000.0, amount: int=math.inf):
    if not start:
        raise ValueError("No start datetime provided!")
        
    if end is None and duration is None and max_ps is None and amount is None:
        raise ValueError("A second parameter next to start is required")
        
    if duration and end:
        raise ValueError("Only one of end or duration can be set")
    
    if max_ps is None and amount is None:
        raise ValueError("Either max_ps or amount is required")
    
    if max_ps is None or max_ps <= 0:
        raise ValueError("max_ps must be >0")
    
    if duration:
        end =  start + duration
    
    if amount < math.inf and end:
        max_ps = min(amount / (end - start).total_seconds(), max_ps or math.inf)
    
    current_time = start
    current_amount = 0
    while (not end or current_time < end) and current_amount < amount:
        yield current_time.replace(microsecond=0)
        
        # increase current time by 1/max_ps seconds
        current_amount += 1
        current_time += datetime.timedelta(seconds=1)/max_ps
        
        
def gen_burst(start: datetime.datetime, bursts: int, burst_duration: datetime.timedelta=None, end: datetime.datetime=None, wait: datetime.timedelta=None, *args, **kwargs):
    if not start:
        raise ValueError("No start datetime provided!")
        
    if not bursts:
        raise ValueError("No number of bursts provided!")
        
    if end is not None and burst_duration is not None and wait is None:
        wait = ((end - start) - (burst_duration * bursts)) / bursts
    if end is not None and burst_duration is None and wait is not None:
        burst_duration = ((end - start) - (wait * bursts)) / bursts
    
    if wait is None:
        raise ValueError("Wait cannot be calculated, please provide it as parameter")
    if burst_duration is None:
        raise ValueError("burst_duration cannot be calculated, please provide it as parameter")
        
    current_time = start
    for n in range(bursts):
        yield from gen_time(
            current_time,
            duration=burst_duration,
            *args,
            **kwargs
        )
        
        current_time += wait

In [72]:
def df_from_generator(knx_gen):
    return pd.DataFrame.from_records(
        map(lambda t: [t.timestamp, binascii.hexlify(t.to_binary())], knx_gen),
        index='timestamp', columns=('timestamp', 'telegram')
    )

In [73]:
# list(itertools.islice(gen_time(datetime.datetime(2018,1,1), end=datetime.datetime(2018,1,2), amount=5), 10))

In [74]:
# list(itertools.islice(gen_burst(datetime.datetime(2017, 2, 12, 8, 0), bursts=3, burst_duration=datetime.timedelta(minutes=15), wait=datetime.timedelta(minutes=5), max_ps=5000), 20))

In [75]:
def simulate_network_probing(timegen, start_addr: knx.KnxAddress, end_addr: knx.KnxAddress, src: knx.KnxAddress=DEFAULT_SRC, *args, **kwargs):
    # get in looser, we're going probing!
    
    for addr in addr_range(start_addr, end_addr):
        time = next(timegen)
        if not time:
            break
        
        # do not probe yourself
        if int(addr) == int(src):
            continue
            
        telegram = gen_device_descriptor_read(addr, src=src, *args, **kwargs)
        telegram.timestamp = time
        yield telegram
        
        
def simulate_dos(timegen, start_addr: knx.KnxAddress, end_addr: knx.KnxAddress, src: knx.KnxAddress=DEFAULT_SRC, *args, **kwargs):
    addr_gen = addr_range(start_addr, end_addr)
    
    for time in timegen:
        try:
            addr = next(addr_gen)
        except StopIteration:
            addr_gen = addr_range(start_addr, end_addr)
            addr = next(addr_gen)
        
        # do not probe yourself
        if int(addr) == int(src):
            continue
            
        telegram = gen_restart(addr, src=src, *args, **kwargs)
        telegram.timestamp = time
        yield telegram

In [76]:
# r = addr_range(knx.KnxAddress('0.0.0'), knx.KnxAddress('15.15.255'))

probing_df = df_from_generator(simulate_network_probing(
    gen_time(datetime.datetime(2018,1,1), end=datetime.datetime(2018,1,2), amount=255),
    knx.KnxAddress('3.4.0'),
    knx.KnxAddress('3.4.255')
))

In [77]:
probing_df.head() # ['2018-01-01 15:00':'2018-01-01 16:00']
#df.dtypes

Unnamed: 0_level_0,telegram
timestamp,Unnamed: 1_level_1
2018-01-01 00:00:00,b'2900ba6034023400000300'
2018-01-01 00:05:38,b'2900ba6034023401000300'
2018-01-01 00:16:56,b'2900ba6034023403000300'
2018-01-01 00:22:35,b'2900ba6034023404000300'
2018-01-01 00:28:14,b'2900ba6034023405000300'


In [4]:
OVERALL_DATE_RANGE = (datetime.datetime(2017, 1 , 21, 0, 0, 0), datetime.datetime(2017, 2, 21, 0, 0, 0))
PERIODS = {
    'training': (datetime.date(2017, 1, 21), datetime.date(2017, 1, 21) + datetime.timedelta(weeks=2)),
    'validation': (datetime.date(2017, 2, 4), datetime.date(2017, 2, 4) + datetime.timedelta(weeks=1)),
    'test': (datetime.datetime(2017, 2, 11), datetime.date(2017, 2, 11) + datetime.timedelta(weeks=1)),
}
PERIODS

{'test': (datetime.datetime(2017, 2, 11, 0, 0), datetime.date(2017, 2, 18)),
 'training': (datetime.date(2017, 1, 21), datetime.date(2017, 2, 4)),
 'validation': (datetime.date(2017, 2, 4), datetime.date(2017, 2, 11))}

In [79]:
overall_df = pd.read_csv('knx_dataset.csv', sep=';', names=('timestamp', 'telegram'), index_col=0)
overall_df.index = pd.to_datetime(overall_df.index)
overall_df.head()

Unnamed: 0_level_0,telegram
timestamp,Unnamed: 1_level_1
2017-01-21 00:00:18,b'2900bce0361612be0200800000'
2017-01-21 00:00:37,b'2900bce0361012730200800000'
2017-01-21 00:00:52,b'2900bce0361612b60200800cb0'
2017-01-21 00:01:19,b'2900bce0361012670200800c5a'
2017-01-21 00:01:31,b'2900bce03610126b02008007d1'


In [80]:
training_df = overall_df[str(PERIODS['training'][0]):str(PERIODS['training'][1])]
validation_df = overall_df[str(PERIODS['validation'][0]):str(PERIODS['validation'][1])]
#test_df = overall_df[str(PERIODS['test'][0]):str(PERIODS['test'][1])]

In [81]:
training_df.to_csv('dataset_training.csv', sep=';', header=False)
validation_df.to_csv('dataset_validation.csv', sep=';', header=False)

## Time Map

**Overall Date Range:** 2017-01-21 - 2017-02-21  
**Train Period:** 2 weeks  
**Validation Period:** 1 week  
**Test Period:** 1 week  

## Attack Types

* DoS via A_Restart
* Network Scan/Sweep
* New Device
    * copy activity from another device and change the address
    
## Timeline of Attacks

* 2017-02-12 02:00:00 to 2017-02-12 07:00:00
    * unusual behaviour
    * traffic copied from 2017-02-06 09:00:00 to 2017-02-06 14:00:00
    * original traffic in this time was replaced
* 2017-02-13 08:00:00 to 2017-02-13 08:29:59
    * DoS on line 3.4
    * 3 x 15min bursts, with 5min pause in between
    * max 500 telegrams per second
* 2017-02-13 20:00:00 to 2017-02-13 20:02:11
    * network scan/sweep
    * 65534 addrs scanned, from 0.0.0 to 15.15.255
    * max 500 telegrams per second
* 2017-02-14
    * 2 new devices for a whole day
        * 3.6.26 (copied from 3.6.7 at 2017-02-07)
        * 3.5.18 (copied from 2.6.42 at 2017-02-08)

In [82]:
"""Generate DoS Traffic"""
dos_df = df_from_generator(simulate_dos(
    gen_burst(datetime.datetime(2017, 2, 13, 8, 0), bursts=4, burst_duration=datetime.timedelta(minutes=15), wait=datetime.timedelta(minutes=5), max_ps=500),
    #gen_time(datetime.datetime(2017, 2, 12, 8, 0), duration=datetime.timedelta(minutes=20), max_ps=5000),
    knx.KnxAddress('3.4.0'),
    knx.KnxAddress('3.4.255'),
    prio=knx.TelegramPriority.SYSTEM,
    hop_count=7,
))
len(dos_df)

1792941

In [83]:
dos_df.tail()

Unnamed: 0_level_0,telegram
timestamp,Unnamed: 1_level_1
2017-02-13 08:29:59,b'2900ba70340234cd000380'
2017-02-13 08:29:59,b'2900ba70340234ce000380'
2017-02-13 08:29:59,b'2900ba70340234cf000380'
2017-02-13 08:29:59,b'2900ba70340234d0000380'
2017-02-13 08:29:59,b'2900ba70340234d1000380'


In [84]:
"""Generate Probing"""
probing_df = df_from_generator(simulate_network_probing(
    gen_time(datetime.datetime(2017, 2, 13, 20, 0), max_ps=500),
    knx.KnxAddress('0.0.0'),
    knx.KnxAddress('15.15.255'),
    prio=knx.TelegramPriority.NORMAL,
    hop_count=4,
))
len(probing_df)

65534

In [85]:
probing_df.tail()

Unnamed: 0_level_0,telegram
timestamp,Unnamed: 1_level_1
2017-02-13 20:02:11,b'2900ba403402fffa000300'
2017-02-13 20:02:11,b'2900ba403402fffb000300'
2017-02-13 20:02:11,b'2900ba403402fffc000300'
2017-02-13 20:02:11,b'2900ba403402fffd000300'
2017-02-13 20:02:11,b'2900ba403402fffe000300'


In [86]:
"""Generate new device traffic"""
def copy_device(input_df: pd.DataFrame, template_addr: knx.KnxAddress, template_date: datetime.date, new_addr: knx.KnxAddress, new_date: datetime.date):
    for index, row in input_df[str(template_date)].iterrows():
        telegram = knx.parse_knx_telegram(binascii.unhexlify(row['telegram'][2:-1]))
        
        if int(telegram.src) == int(template_addr):
            telegram.timestamp = datetime.datetime.combine(new_date, index.time())
            telegram.src = new_addr
            # print(row['telegram'][2:-1], telegram.timestamp)
            yield telegram
        

new_device_df = df_from_generator(copy_device(
    overall_df,
    knx.KnxAddress('3.6.7'),
    datetime.date(2017, 2, 7),  # a Tuesday
    knx.KnxAddress('3.6.26'),  # addr is not in data set (validated by select distinct query)
    datetime.date(2017, 2, 14)
))
len(new_device_df)

22

In [87]:
new_device_df

Unnamed: 0_level_0,telegram
timestamp,Unnamed: 1_level_1
2017-02-14 07:21:31,b'2900bce0361a0940000081'
2017-02-14 07:32:22,b'2900bce0361a0940000080'
2017-02-14 08:31:46,b'2900bce0361a0940000081'
2017-02-14 08:44:03,b'2900bce0361a0940000080'
2017-02-14 08:54:06,b'2900bce0361a0940000081'
2017-02-14 09:04:22,b'2900bce0361a0940000080'
2017-02-14 10:41:29,b'2900bce0361a0940000081'
2017-02-14 10:53:31,b'2900bce0361a0940000080'
2017-02-14 11:01:09,b'2900bce0361a0940000081'
2017-02-14 11:11:36,b'2900bce0361a0940000080'


In [88]:
new_device2_df = df_from_generator(copy_device(
    overall_df,
    knx.KnxAddress('3.6.42'),
    datetime.date(2017, 2, 8),  # a Tuesday
    knx.KnxAddress('3.5.18'),  # addr is not in data set (validated by select distinct query)
    datetime.date(2017, 2, 14)
))
len(new_device2_df)

187

In [89]:
"""
c = pd.concat([new_device_df, new_device2_df], join='outer')
len(c)
"""
pass

In [90]:
"""unusual traffic for that time of the day"""
def move_traffic(input_df: pd.DataFrame, src_time: datetime.datetime, dest_time: datetime.datetime, duration: datetime.timedelta):
    for index, row in input_df[str(src_time):str(src_time+duration)].iterrows():
        yield (dest_time + (index - src_time), row['telegram'])
              
unusual_df = pd.DataFrame.from_records(
    move_traffic(
        overall_df,
        datetime.datetime(2017, 2, 6, 9, 0, 0),
        datetime.datetime(2017, 2, 12, 2, 0, 0),
        datetime.timedelta(hours=5)
    ),
    index='timestamp', columns=('timestamp', 'telegram')
)
len(unusual_df)

4520

In [91]:
"""
concatinate everything
"""
test_df = overall_df[str(PERIODS['test'][0]):str(PERIODS['test'][1])]

# first remove the pices of time, that is supposed to be filled with unusual traffic
test_df.drop('2017-02-06')

test_df = pd.concat(
    [
        test_df,
        dos_df,
        probing_df,
        new_device_df,
        new_device2_df,
        unusual_df,
    ],
    join='outer'
).sort_index()
test_df.to_csv('dataset_test.csv', sep=';', header=False)
len(test_df)

2041278