In [40]:
import random
import bisect
import math
import csv
import os
import time
import ipaddress
import sympy

from pympler import asizeof

import hashlib
import numpy as np
import random


In [41]:
class UniversalHash:
    def __init__(self, n, m, seed = 114514):
        # random.seed(seed) For getting deterministic output for analyzing
        self.p = sympy.nextprime(n)
        self.a = random.randint(1, self.p-1)
        self.b = random.randint(0, self.p-1)
        self.m = m
    def hash(self, x):
        return (pow(self.a, x, self.p) + self.b) % self.p % self.m

In [42]:

class CountSketch:
    def __init__(self, w, d, seed=None):
        self.w = w
        self.d = d
        self.seed = seed if seed is not None else random.randint(0, 2**32 - 1)
        self.sketch = np.zeros((d, w), dtype=np.int64)

    def _hash(self, x, i):
        m = hashlib.md5()
        m.update(x.encode('utf-8'))
        m.update(str(self.seed + i).encode('utf-8'))
        return int(m.hexdigest(), 16)

    def update(self, item, delta=1):
        for i in range(self.d):
            j = self._hash(item, i) % self.w
            c = 1 if self._hash(item, i + self.d) % 2 == 0 else -1
            self.sketch[i, j] += delta * c

    def estimate(self, item):
        estimates = np.zeros(self.d)
        for i in range(self.d):
            j = self._hash(item, i) % self.w
            c = 1 if self._hash(item, i + self.d) % 2 == 0 else -1
            estimates[i] = self.sketch[i, j] * c
        return np.median(estimates)

def find_l2_heavy_hitters(stream, w, d, threshold):
    count_sketch = CountSketch(w, d)
    for item in stream:
        count_sketch.update(item)

    heavy_hitters = set()
    for item in set(stream):
        estimate = count_sketch.estimate(item)
        if estimate >= threshold:
            heavy_hitters.add(item)

    return heavy_hitters

In [43]:
epsilon = 0.05
r_num = 0

#h_size = 5
h_size = math.ceil(1/(epsilon*epsilon))

B = [0 for _ in range(h_size)]
B_list = [list(B) for _ in range(32)]
h_list = [[UniversalHash(2**32, h_size), UniversalHash(2**32, 1)] for _ in range(32)]


In [44]:
cwd = os.getcwd()
file_path = os.path.join(cwd, 'CoAPDDoS.csv')

start_time = time.time()

with open(file_path, 'r') as file:
    
    counter = 0
    reader = csv.reader(file)
    
    for row in reader:
        ip_address_str = row[r_num]                    # Change column here
        if counter == 0:
            print(ip_address_str)
        if counter > 0:
            
            integer_ip_address = int(ipaddress.IPv4Address(ip_address_str))
            
            for i, itemi in enumerate(B_list):
                itemi[h_list[i][0].hash(integer_ip_address)] += (h_list[i][1].hash(integer_ip_address)-0.5)*2
            
        #if counter == 10:
        #    break
        counter += 1

end_time = time.time()
elapsed_time = end_time - start_time
print("Elapsed time: ", elapsed_time, " seconds")

IP.src
Elapsed time:  132.72727465629578  seconds


In [52]:
integer_query = int(ipaddress.IPv4Address('10.200.7.199'))

B_extract = [0 for _ in range(32)]

for i, itemi in enumerate(B_list):
    B_extract[i] = itemi[h_list[i][0].hash(integer_query)]
    
l2count = abs(np.median(B_extract))
print(l2count)

0.0


In [53]:
for i, itemi in enumerate(B_list):
    estimate = 0
    for j, itemj in enumerate(itemi):
        estimate+= itemj**2
    

print(l2count**2/estimate)
print(l2count/counter)

0.0
0.0


In [None]:
print(estimate)

In [36]:
integer_query = int(ipaddress.IPv4Address('192.168.1.12'))

In [37]:
# Brute Force
with open(file_path, 'r') as file:
    
    counter = 0
    l2ct = 0
    reader = csv.reader(file)
    for row in reader:
        ip_address_str = row[r_num]                    # Change column here
        if counter == 0:
            print(ip_address_str)
        if counter > 0:
            integer_ip_address = int(ipaddress.IPv4Address(ip_address_str))
            if integer_query == integer_ip_address:
                l2ct += 1
            
        #if counter == 10:
        #    break
        counter += 1

IP.src


In [38]:
print("l2:",l2ct)

print("Freq", l2ct**2/F2)

l2: 138029
Freq 0.1560535894682968


In [17]:
def compute_second_frequency_moment_from_file(file_path):
    frequency_count = {}

    with open(file_path, 'r') as file:
        counter = 0
        reader = csv.reader(file)

        for row in reader:
            ip_address_str = row[r_num]                    # Change column here
            if counter == 0:
                print(ip_address_str)
            if counter > 0:

                integer_ip_address = int(ipaddress.IPv4Address(ip_address_str))

                if integer_ip_address in frequency_count:
                    frequency_count[integer_ip_address] += 1
                else:
                    frequency_count[integer_ip_address] = 1
                    
            counter += 1

    F2 = sum(count**2 for count in frequency_count.values())
    return F2


F2 = compute_second_frequency_moment_from_file(file_path)

print("Second frequency moment (F2):", F2)


IP.src
Second frequency moment (F2): 122086296803


In [13]:
print(F2/2)

130356275583329.5
