In [1]:
import math
import numpy as np
import pandas as pd

In [2]:
from pyprojroot import here
from sklearn.metrics import roc_auc_score
from tqdm import tqdm, trange

In [3]:
# MIDAS | -R | -F

In [4]:
# src:source | dst:destination | ts:timestamp

In [5]:
class Midas:
    def __init__(self, row: int, col: int):
        self.nameAlg = 'MIDAS'
        self.ts: int = 1
        self.row = row
        self.col = col
        self.param = np.random.randint(1, 1 << 16, 2 * row).astype(int) # 2^16
        self.current = np.zeros(row * col, int) # store counts
        self.total = np.zeros(row * col, int)

    @staticmethod # 静态方法无需实例化
    def ChiSquaredTest(a: float, s: float, t: float) -> float:
        return 0 if s == 0 or t - 1 == 0 else pow((a - s / t) * t, 2) / (s * (t - 1))

    def Call(self, src: int, dst: int, ts: int) -> float:
        if self.ts < ts:
            self.current *= 0
            self.ts = ts
        minCurrent = minTotal = np.inf
        for i in range(self.row):
            i = i * self.col + ((src + 347 * dst) * self.param[i] + self.param[i + self.row]) % self.col
            self.current[i] += 1
            self.total[i] += 1
            minCurrent = min(minCurrent, self.current[i])
            minTotal = min(minTotal, self.total[i])
        return self.ChiSquaredTest(minCurrent, minTotal, ts)

In [6]:
class CMSGroup:
    def __init__(self, length: int):
        self.current = np.zeros(length, float)
        self.total = np.zeros(length, float)
class MidasR:
    def __init__(self, row: int, col: int, factor: float = 0.5):
        self.nameAlg = 'MIDAS-R'
        self.ts = 1
        self.row = row
        self.col = col
        self.factor = factor
        self.param = np.random.randint(1, 1 << 16, 2 * row).astype(int)
        # create 3 CMS for edge uv, node u, node v
        self.edge = CMSGroup(row * col)
        self.source = CMSGroup(row * col)
        self.destination = CMSGroup(row * col)

    @staticmethod
    def ChiSquaredTest(a: float, s: float, t: float) -> float:
        return 0 if s == 0 or t - 1 == 0 else pow((a - s / t) * t, 2) / (s * (t - 1))

    def Update(self, a: int, b: int, cms: CMSGroup) -> float:
        minCurrent = minTotal = np.inf
        for i in range(self.row):
            i = i * self.col + ((a + 347 * b) * self.param[i] + self.param[i + self.row]) % self.col
            cms.current[i] += 1
            cms.total[i] += 1
            minCurrent = min(minCurrent, cms.current[i])
            minTotal = min(minTotal, cms.total[i])
        return self.ChiSquaredTest(minCurrent, minTotal, self.ts)

    def Call(self, src: int, dst: int, ts: int) -> float:
        if self.ts < ts:
            for cms in [self.edge, self.source, self.destination]:
                cms.current *= self.factor
            self.ts = ts
        return max(
            self.Update(src, dst, self.edge),
            self.Update(src, 0, self.source),
            self.Update(dst, 0, self.destination),)

In [7]:
class CMSGroup:
    def __init__(self, length: int):
        self.index = np.zeros(length, int)
        self.current = np.zeros(length, float)
        self.total = np.zeros(length, float)
        self.score = np.zeros(length, float)
class MidasF:
    def __init__(self, row: int, col: int, threshold: float, factor: float = 0.5):
        self.nameAlg = 'MIDAS-F'
        self.ts = 1
        self.row = row
        self.col = col
        self.threshold = threshold
        self.factor = factor
        self.param = np.random.randint(1, 1 << 16, 2 * row).astype(int)
        self.edge = CMSGroup(row * col)
        self.source = CMSGroup(row * col)
        self.destination = CMSGroup(row * col)
        self.tsReciprocal = 0

    @staticmethod
    def ChiSquaredTest(a: float, s: float, t: float) -> float:
        return 0 if s == 0 else pow(a + s - a * t, 2) / (s * (t - 1))

    def Update(self, a: int, b: int, cms: CMSGroup) -> float:
        minCurrent = minTotal = np.inf
        for i in range(self.row):
            cms.index[i] = i * self.col + ((a + 347 * b) * self.param[i] + self.param[i + self.row]) % self.col
            i = cms.index[i]
            cms.current[i] += 1
            minCurrent = min(minCurrent, cms.current[i])
            minTotal = min(minTotal, cms.total[i])
        score = self.ChiSquaredTest(minCurrent, minTotal, self.ts)
        for i in cms.index:
            cms.score[i] = score
        return score

    def Call(self, src: int, dst: int, ts: int) -> float:
        if self.ts < ts:
            for cms in [self.edge, self.source, self.destination]:
                for i in range(self.row * self.col):
                    cms.total[i] += cms.current[i] if cms.score[i] < self.threshold else cms.total[i] * self.tsReciprocal
                cms.current *= self.factor
            self.tsReciprocal = 1 / (ts - 1)
            self.ts = ts
        return max(
            self.Update(src, dst, self.edge),
            self.Update(src, 0, self.source),
            self.Update(dst, 0, self.destination),)

In [9]:
if __name__ == '__main__':
    prefix = here()  # Detect your project root
    pathData = prefix / 'ISCX/Data.csv'
    pathLabel = prefix / 'ISCX/Label.csv'
    pathScore = prefix / 'ISCX/Score.txt'
    
#     pathData = prefix / 'DARPA/Data.csv'
#     pathLabel = prefix / 'DARPA/Label.csv'
#     pathScore = prefix / 'DARPA/Score.txt'

    data = [[int(item) for item in line.split(b',')] for line in tqdm(pathData.read_bytes().splitlines(), 'Load Dataset', unit_scale=True)]
    label = list(map(int, pathLabel.read_bytes().splitlines()))
    midas = Midas(2, 1024)
#     midas = MidasR(2, 1024)
#     midas = MidasF(2, 1024, 1e3)
    score = [0.0] * len(label)
    for i in trange(len(label), desc=midas.nameAlg, unit_scale=True):
        score[i] = midas.Call(*data[i])
    print(f"ROC-AUC = {roc_auc_score(label, score):.4f}")
#     print(f"# Raw scores will be exported to")  # Comment this line and below if you don't need to export
#     print(f"# {prefix / 'desktop/AnomalyDetection/Code/Score.txt'}")
#     pathScore.parent.mkdir(exist_ok=True)
#     with pathScore.open('w', newline='\n') as file:
#         for line in tqdm(score, 'Export Scores', unit_scale=True):
#             file.write(f'{line}\n')
#     pass

Load Dataset: 100%|██████████| 1.10M/1.10M [00:03<00:00, 313kit/s] 
MIDAS: 100%|██████████| 1.10M/1.10M [00:34<00:00, 31.5kit/s]


ROC-AUC = 0.4091


In [10]:
if __name__ == '__main__':
    prefix = here()  # Detect your project root
#     pathData = prefix / 'ISCX/Data.csv'
#     pathLabel = prefix / 'ISCX/Label.csv'
#     pathScore = prefix / 'ISCX/Score.txt'
    
    pathData = prefix / 'DARPA/Data.csv'
    pathLabel = prefix / 'DARPA/Label.csv'
    pathScore = prefix / 'DARPA/Score.txt'

    data = [[int(item) for item in line.split(b',')] for line in tqdm(pathData.read_bytes().splitlines(), 'Load Dataset', unit_scale=True)]
    label = list(map(int, pathLabel.read_bytes().splitlines()))
    midas = Midas(2, 1024)
#     midas = MidasR(2, 1024)
#     midas = MidasF(2, 1024, 1e3)
    score = [0.0] * len(label)
    for i in trange(len(label), desc=midas.nameAlg, unit_scale=True):
        score[i] = midas.Call(*data[i])
    print(f"ROC-AUC = {roc_auc_score(label, score):.4f}")
    print(f"# Raw scores will be exported to")  # Comment this line and below if you don't need to export
    print(f"# {prefix / 'desktop/AnomalyDetection/Code/Score.txt'}")
#     pathScore.parent.mkdir(exist_ok=True)
#     with pathScore.open('w', newline='\n') as file:
#         for line in tqdm(score, 'Export Scores', unit_scale=True):
#             file.write(f'{line}\n')
#     pass

Load Dataset: 100%|██████████| 4.55M/4.55M [00:14<00:00, 325kit/s] 
MIDAS: 100%|██████████| 4.55M/4.55M [02:20<00:00, 32.4kit/s]


ROC-AUC = 0.8945
# Raw scores will be exported to
# /Users/luoyao/Desktop/AnomalyDetection/Code/desktop/AnomalyDetection/Code/Score.txt
