In [99]:
import pickle
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import random
import json
import os
import re
import seaborn as sns
import math
import sys
import copy

In [100]:
def read_single_window_fsd_data(algo='cm', row=3, width=4096, level=1, seed=1, count=1, flowkey='srcIP', 
              epochs=['10'], dataset='caida0517-125w_10_.pcap', window_size=200):
    res = []
    for epoch in epochs:
        
        path = f"../SketchPatternQuery/{algo}/{dataset}/"\
                f"{flowkey}/row_{row}_width_{width}_level_{level}_epoch_{epoch}_count_{count}_seed_{seed}/"
        
        for dir in sorted(os.listdir(path)):
            p = os.path.join(path, dir)
            if os.path.isdir(p): 
                window_dir = "window_" + str(window_size)
                dynamic_full_path = os.path.join(path, dir, window_dir, "single_window_randk_summation")
                
                for file in sorted(os.listdir(dynamic_full_path)):  
                    fsd_file = os.path.join(dynamic_full_path, file)
                    fsd = {}
                    with open(fsd_file, 'r') as f:
                        for line in f:
                            if int(line.strip().split()[0]) == 0:
                                continue
                            fsd[int(line.strip().split()[0])] = int(line.strip().split()[1])
                            
                    res.append(fsd)
            
    return res

# read_fsd_data()

In [101]:
def read_single_window_gt_fsd_data(algo='cm', row=3, width=4096, level=1, seed=1, count=1, flowkey='srcIP', 
              epochs=['10'], dataset='caida0517-125w_10_.pcap', window_size=200):
    res = []
    for epoch in epochs:
        
        path = f"../SketchPatternQuery/{algo}/{dataset}/"\
                f"{flowkey}/row_{row}_width_{width}_level_{level}_epoch_{epoch}_count_{count}_seed_{seed}/"
        
        for dir in sorted(os.listdir(path)):
            p = os.path.join(path, dir)
            if os.path.isdir(p): 
                window_dir = "window_" + str(window_size)
                dynamic_full_path = os.path.join(path, dir, window_dir, "single_window_randk_gt_summation")
                
                for file in sorted(os.listdir(dynamic_full_path)):  
                    fsd_file = os.path.join(dynamic_full_path, file)
                    fsd = {}
                    with open(fsd_file, 'r') as f:
                        for line in f:
                            if int(line.strip().split()[0]) == 0:
                                continue
                            fsd[int(line.strip().split()[0])] = int(line.strip().split()[1])
                            
                    res.append(fsd)
            
    return res

# read_fsd_data()

In [124]:
## parameters

caida0517 = ["caida0517-500w", "caida0517-250w", "caida0517-150w", "caida0517-070w", "caida0517-030w"]
zipf2a = ["zipf2a-150w", "zipf2a-070w", "zipf2a-030w"]
zipf10 = ["zipf10-070w", "zipf10-030w"]


lens = [
        # ["5", "5"],
        ["6", "4"],
        # ["7", "3"],
        # ["8", "2"],
        ]

pcap_file = []
    
# # single dataset
pcap_file.append("caida0517-500w_10_.pcap")
pcap_file.append("caida0517-250w_10_.pcap")
pcap_file.append("caida0517-150w_10_.pcap")
pcap_file.append("caida0517-070w_10_.pcap")
pcap_file.append("caida0517-030w_10_.pcap")
pcap_file.append("zipf2a-150w_10_.pcap") 
pcap_file.append("zipf2a-070w_10_.pcap") 
pcap_file.append("zipf2a-030w_10_.pcap") 
pcap_file.append("zipf10-070w_10_.pcap") 
pcap_file.append("zipf10-030w_10_.pcap") 

### caida + zipf2a [10:25]
for a in caida0517:
    for b in zipf2a:
        for l in lens:
            pcap_file.append(f'{a}_{l[0]}_{b}_{l[1]}.pcap')
            
### zipf2a + caida [25:40]
for a in zipf2a:
    for b in caida0517:
        for l in lens:
            pcap_file.append(f'{a}_{l[0]}_{b}_{l[1]}.pcap')
            
### caida + zipf10 [40:50]
for a in caida0517:
    for b in zipf10:
        for l in lens:
            pcap_file.append(f'{a}_{l[0]}_{b}_{l[1]}.pcap')
            
### zipf10 + caida [50:60]
for a in zipf10:
    for b in caida0517:
        for l in lens:
            pcap_file.append(f'{a}_{l[0]}_{b}_{l[1]}.pcap')
            
### zipf2a + zipf10 [60:66]
for a in zipf2a:
    for b in zipf10:
        for l in lens:
            pcap_file.append(f'{a}_{l[0]}_{b}_{l[1]}.pcap')
            
### zipf10 + zipf2a [66:72]
for a in zipf10:
    for b in zipf2a:
        for l in lens:
            pcap_file.append(f'{a}_{l[0]}_{b}_{l[1]}.pcap')
            
### caida + caida [72:92]
for a in caida0517:
    for b in caida0517:
        if a == b:
            continue
        for l in lens:
            pcap_file.append(f'{a}_{l[0]}_{b}_{l[1]}.pcap')
            
### zipf2a + zipf2a [92:98]
for a in zipf2a:
    for b in zipf2a:
        if a == b:
            continue
        for l in lens:
            pcap_file.append(f'{a}_{l[0]}_{b}_{l[1]}.pcap')
            
### zipf10 + zipf10 [98:100]
for a in zipf10:
    for b in zipf10:
        if a == b:
            continue
        for l in lens:
            pcap_file.append(f'{a}_{l[0]}_{b}_{l[1]}.pcap')
            
        
            
print(pcap_file)    
print(f'Total Pcap File Number: {len(pcap_file)}')
# widths = [2048, 4096, 8192, 16384, 32768, 65536, 131072]
widths = [4096]
# widths = [1024]



['caida0517-500w_10_.pcap', 'caida0517-250w_10_.pcap', 'caida0517-150w_10_.pcap', 'caida0517-070w_10_.pcap', 'caida0517-030w_10_.pcap', 'zipf2a-150w_10_.pcap', 'zipf2a-070w_10_.pcap', 'zipf2a-030w_10_.pcap', 'zipf10-070w_10_.pcap', 'zipf10-030w_10_.pcap', 'caida0517-500w_6_zipf2a-150w_4.pcap', 'caida0517-500w_6_zipf2a-070w_4.pcap', 'caida0517-500w_6_zipf2a-030w_4.pcap', 'caida0517-250w_6_zipf2a-150w_4.pcap', 'caida0517-250w_6_zipf2a-070w_4.pcap', 'caida0517-250w_6_zipf2a-030w_4.pcap', 'caida0517-150w_6_zipf2a-150w_4.pcap', 'caida0517-150w_6_zipf2a-070w_4.pcap', 'caida0517-150w_6_zipf2a-030w_4.pcap', 'caida0517-070w_6_zipf2a-150w_4.pcap', 'caida0517-070w_6_zipf2a-070w_4.pcap', 'caida0517-070w_6_zipf2a-030w_4.pcap', 'caida0517-030w_6_zipf2a-150w_4.pcap', 'caida0517-030w_6_zipf2a-070w_4.pcap', 'caida0517-030w_6_zipf2a-030w_4.pcap', 'zipf2a-150w_6_caida0517-500w_4.pcap', 'zipf2a-150w_6_caida0517-250w_4.pcap', 'zipf2a-150w_6_caida0517-150w_4.pcap', 'zipf2a-150w_6_caida0517-070w_4.pcap', 'zi

### Predict FSD Changes

In [103]:
def changes_occured(sec_var, n, ws=20):
    
    time_sec_var = -1.0
    
    val = 0
    max_time = -1.0
    
    # for each sliding window           
    for i in range(len(sec_var) - ws + 1):            
        sec_var_mean = np.mean(sec_var[0+i:ws+i])
        sec_var_std_dev = np.std(sec_var[0+i:ws+i])
        
        # check by sec var outliers
        if abs(sec_var[ws+i-1] - sec_var_mean) > (n * sec_var_std_dev):
            if time_sec_var == -1.0:
                time_sec_var = (4+(ws+i-1)-1)/5
                val = sec_var[ws+i-1]
                break
                
    if val == 0:
        val = max(sec_var)
        max_time = (4 + sec_var.index(val) - 1)/5
        
    sec_var_hit = 0
    
    if time_sec_var != -1.0:
        sec_var_hit = 1
    
    return sec_var_hit, time_sec_var, val, max_time
    
    

In [104]:
def calculate_mrd(fsd1, fsd2):
    MRD_nom = 0
    MRD_denom = 0
    for i in range(1, max(fsd1.keys())+1):
        if i in fsd1.keys():
            true = fsd1[i]
        else:
            true = 0
            
        if i in fsd2.keys():
            est = fsd2[i]
        else:
            est = 0
            
        MRD_nom += abs(true - est)
        MRD_denom += float(true + est)/2
    MRD = MRD_nom/MRD_denom
    
    return MRD

In [105]:
def normalize_fsd(fsd):
    fn = sum(list(fsd.values()))
    
    return {key: (val/fn) for key,val in fsd.items()}
            
    # calcualte CDF
    cdf = {}
    culmulative_prob = 0.0
    for fs, freq in sorted(fsd.items()):
        culmulative_prob += (freq/fn)
        cdf[fs] = culmulative_prob
        
    # get bins
    bin = {}
    idx = 0
    for fs, prob in sorted(cdf.items()):
        while idx < round(prob * 100):
            bin[idx] = fs
            idx += 1
            
    qfsd = {}
    idx = 0
    for fs, freq in sorted(fsd.items()):
        while fs > bin[idx]:
            if idx < len(bin) - 1:
                idx += 1
            else:
                break
            
        if bin[idx] in qfsd.keys():
            qfsd[bin[idx]] += freq/fn
        else:
            qfsd[bin[idx]] = freq/fn
             
    return qfsd

In [106]:
def plot_mrd_variation(mrd_var, name, window_size=200, typ='Var'):
    
    res = []
    
    res.append(mrd_var)
    
    print(name)
    print(mrd_var)

    plt.figure(figsize=(12, 4))
        
    for i in range(len(res)):
        plt.plot(res[i], label=f'MRD {typ}')

    # Add labels and legend
    plt.xlabel('Time (sec)')
    if typ == "MRD":
        plt.ylabel('MRD')
    else:
        plt.ylabel('Value')
    plt.title('%s MRD Variation' % (name))
    # plt.axhline(10000, c="black")
    ticks = [i for i in range(int(10*1000/window_size) + 1)]
    adjusted_ticks = [tick * (window_size / 1000) for tick in ticks[0::int(1000 / window_size)]]
    plt.xticks(ticks[0::int(1000/window_size)], adjusted_ticks)
    plt.legend(loc='upper left', ncol=math.ceil(len(res)/4))

    # Show the plot
    plt.show()
    

In [132]:
def predict(algo='cm', row=3, width=[4096], level=1, seed=1, count=1, flowkey='srcIP', 
              epochs=['10'], datasets=['caida0517-125w_10_.pcap'], window_size=200, dev=3, ws=20):
    
    # top 100
    res_fsd = {}
    res_mrd = {}
    res_mrd_var = {}
    res_mrd_sec_var = {}
    # res_sec_var = {}
    
    label = []
    answer = []
    
    for d in datasets:
        for w in width:
            name = f'{d[:-5]}_{w}'
            res_mrd[name] = [None, None] # 0, 1
            res_mrd_var[name] = [None, None, None] # 0, 1, 2
            res_mrd_sec_var[name] = [None, None, None, None] # 0, 1, 2, 3
            
            # fsd_total = read_fsd_data(algo, row, w, level, seed, count, flowkey, epochs, d, window_size)
            # fsd_total = read_gt_fsd_data(algo, row, w, level, seed, count, flowkey, epochs, d, window_size)
            fsd_total = read_single_window_fsd_data(algo, row, w, level, seed, count, flowkey, epochs, d, window_size)
            # fsd_total = read_single_window_gt_fsd_data(algo, row, w, level, seed, count, flowkey, epochs, d, window_size)

            label.append(name)
            answer.append(int(d.split('_')[1]))
            # answer.append(10)
            
            res_fsd[name] = fsd_total[:-1] # ignore last window (less than 200ms)
            
            for i in range(1, len(res_fsd[name])):
                # res_mrd[name].append(calculate_mrd(res_fsd[name][i-1], res_fsd[name][i]))
                res_mrd[name].append(calculate_mrd(normalize_fsd(res_fsd[name][i-1]), normalize_fsd(res_fsd[name][i])))
                # print(normalize_fsd(res_fsd[name][i-1]))
                # print(normalize_fsd(res_fsd[name][i]))
                
            # print(len(res_fsd[name]), len(res_mrd[name]), res_mrd[name])
                
            # plot_mrd_variation(res_mrd[name], d[:-5], window_size, 'MRD')
                
            for i in range(3, len(res_mrd[name])):
                res_mrd_var[name].append(abs(res_mrd[name][i] - res_mrd[name][i-1]))
                
            # plot_mrd_variation(res_mrd_var[name], d[:-5], window_size, 'Var')
                
            for i in range(4, len(res_mrd_var[name])):
                res_mrd_sec_var[name].append(abs(res_mrd_var[name][i] - res_mrd_var[name][i-1]))
                
            # plot_mrd_variation(res_mrd_sec_var[name], d[:-5], window_size, 'Sec Var')
            # print(len(res_mrd_var[name]), len(res_mrd_sec_var[name]))

    # # print(answer)
    ignore = int(1000/window_size)
    choose = 5
    # dev = 3.4 # 99.90%
    # dev = 3 # 99.90%
    # dev = 3.719 # 99.99%
    iqr = 1.5
    
    pred = []
    
    norm_hit = 0
    iqr_hit = 0
    
    hit = []
    
    for i in range(len(label)):
        
        sec_var_hit, time_sec_var, val, max_time = changes_occured(res_mrd_sec_var[label[i]][4:], dev, ws)
        
        pred.append(time_sec_var)
           
        print(i)
        print(label[i])
        print('\tFSD MRD ')
        # print('\t\t', var_max3, var_avg3, var_diff3)
        # print('\t\t', max(res_sec_var3[label[i]][(1+ignore):end]), sum(res_sec_var3[label[i]][(1+ignore):end]) / len(res_sec_var3[label[i]][(1+ignore):end]), max(res_sec_var3[label[i]][(1+ignore):end]) - (sum(res_sec_var3[label[i]][(1+ignore):end]) / len(res_sec_var3[label[i]][(1+ignore):end])))
        print('\t\t', res_mrd_sec_var[label[i]])
        print(f'\t\tsec var: {time_sec_var}, ans: {answer[i]}, change: {sec_var_hit}')
        print(f'\t\t{val}, max time: {max_time}')
        print()
        
        if answer[i] == 10:
            if sec_var_hit == 0:
                hit.append(1)
            else:
                hit.append(0)
        else:
            if sec_var_hit == 1:
                hit.append(1)
            else:
                hit.append(0)
            
        

    succ = 0

    for i in range(len(label)):
        if hit[i] == 1:
            if answer[i] == 10:
                succ += 1
            else:
                if abs(pred[i] - answer[i]) <= 1.0:
                    succ += 1
    
    print("single comprehensive predict")
    print(f' total flow  predict: {succ}/{len(pred)} {succ*100/len(pred)}%')
    print()
    
    
    
    
        
# predict_final_dynamic_topk(width=widths, datasets=pcap_file)

In [134]:
predict(width=widths, datasets=pcap_file[:10], window_size=200, dev=3.4, ws=25)
predict(width=widths, datasets=pcap_file[10:25], window_size=200, dev=3.4, ws=25)
predict(width=widths, datasets=pcap_file[25:40], window_size=200, dev=3.4, ws=25)
predict(width=widths, datasets=pcap_file[40:50], window_size=200, dev=3.4, ws=25)
predict(width=widths, datasets=pcap_file[50:60], window_size=200, dev=3.4, ws=25)
predict(width=widths, datasets=pcap_file[60:66], window_size=200, dev=3.4, ws=25)
predict(width=widths, datasets=pcap_file[66:72], window_size=200, dev=3.4, ws=25)

predict(width=widths, datasets=pcap_file[72:92], window_size=200, dev=3.4, ws=25)
predict(width=widths, datasets=pcap_file[92:98], window_size=200, dev=3.4, ws=25)
predict(width=widths, datasets=pcap_file[98:100], window_size=200, dev=3.4, ws=25)

0
caida0517-500w_10__4096
	FSD MRD 
		 [None, None, None, None, 0.17159959165440086, 0.12127051969227504, 0.2067160967681995, 0.001232304916337204, 0.0021498312133712005, 0.02309941751974391, 0.01566620184981657, 0.021854031057060852, 0.02823000446504234, 0.002624550758679095, 0.01719815949727116, 0.006424209007559856, 0.028953288191346307, 0.0004482978679631322, 0.0038167444274569973, 0.006860743697001098, 0.014144713177812476, 0.007921890406783538, 0.0041742092509556294, 0.003990684227344124, 0.004040482618810348, 0.0009056381463610902, 0.0056764597467702516, 0.006872447301387419, 0.013993431086604724, 0.012412014925085396, 0.004688974555542749, 0.016068765003658003, 0.013979557277258442, 0.003628939315133875, 0.004990822415286547, 0.015387155918442988, 0.011353868391574151, 0.009916353471708633, 0.0010318817041026496, 0.02809391116011009, 0.042747066720873084, 0.015544979726888841, 0.005932168457009626, 0.01662503591818512, 0.009954104891786178, 0.003967800590211262, 0.0189892541947

### Predict Current FSD

In [108]:
def prepare_profiler_dirty_fsd_data(algo='cm', row=3, width=4096, level=1, seed=1, count=1, flowkey='srcIP', 
              epochs=['10'], dataset='caida0517-125w_10_.pcap', window_size=200):
    
    profiler_fsd = {}
    profiler_folder_path = "/home/ming/SketchMercator/pattern_detection/traffic_generator/pcap_file_new/"
    for file_name in sorted(os.listdir(profiler_folder_path)):
        profiler_fsd[file_name[:-9]] = []
    
    # res = {}
    for epoch in epochs:
        for pf in profiler_fsd.keys():
            path = f"../SketchPatternQueryOfflineNew/{algo}/{pf}_10_.pcap/"\
                    f"{flowkey}/row_{row}_width_{width}_level_{level}_epoch_{epoch}_count_{count}_seed_{seed}/"
            
            for dir in sorted(os.listdir(path)):
                p = os.path.join(path, dir)
                if os.path.isdir(p): 
                    window_dir = "window_" + str(window_size)
                    dynamic_full_path = os.path.join(path, dir, window_dir, "single_window_randk_summation")
                    
                    for file in sorted(os.listdir(dynamic_full_path)):  
                        fsd_file = os.path.join(dynamic_full_path, file)
                        fsd = {}
                        with open(fsd_file, 'r') as f:
                            for line in f:
                                if int(line.strip().split()[0]) == 0:
                                    continue
                                fsd[int(line.strip().split()[0])] = int(line.strip().split()[1])
                                
                        profiler_fsd[pf].append(fsd)
                
    return profiler_fsd

# read_fsd_data()

In [109]:
def get_avg_fsd(fsd_list):
    avg_fsd = {}
    
    all_keys = set().union(*[d.keys() for d in fsd_list])
    for key in all_keys:
        avg_fsd[key] = 0
        
    for fsd in fsd_list:
        for key, val in fsd.items():
            avg_fsd[key] += val
            
    avg_fsd = {key: int(val/len(fsd_list)) for key, val in avg_fsd.items()}
    
    return avg_fsd


In [121]:
def find_dist_by_dirty_fsd(profiler_fsd, unknown_fsd):
    
    all_mrd = {}
    min_mrd = sys.float_info.max
    predict_dist = ""
    for name, dist in profiler_fsd.items():
        # mrd = calculate_mrd(dist, unknown_fsd)
        mrd = calculate_mrd(normalize_fsd(dist), normalize_fsd(unknown_fsd))
        all_mrd[name] = mrd
        if mrd < min_mrd:
            min_mrd = mrd
            predict_dist = name
    
    mrds = []
    for name, mrd in all_mrd.items():
        mrds.append(mrd)
        
    # print(predict_dist, all_mrd)
    
    # print(len(mrds))
    # return predict_dist, profiler_fsd[predict_dist]
    return predict_dist, mrds
    

In [117]:
def predict_dist_by_dirty_fsd(algo='cm', row=3, width=[4096], level=1, seed=1, count=1, flowkey='srcIP', 
              epochs=['10'], datasets=['caida0517-125w_10_.pcap'], window_size=200, dev=3, ws=20, start=0, predict_time = 3):
    
    # top 10000
    res_fsd = {}
    
    label = []
    answer = []
    
    for d in datasets:
        for w in width:
            name = f'{d[:-5]}_{w}'
            
            # fsd_total = read_fsd_data(algo, row, w, level, seed, count, flowkey, epochs, d, window_size)
            fsd_total = read_single_window_fsd_data(algo, row, w, level, seed, count, flowkey, epochs, d, window_size)
            # fsd_total = read_gt_fsd_data(algo, row, w, level, seed, count, flowkey, epochs, d, window_size)

            label.append(name)
            answer.append(int(d.split('_')[1]))
            # answer.append(10)
            
            res_fsd[name] = fsd_total
            
    
    # find dist
    success = 0
    profiler_fsd = prepare_profiler_dirty_fsd_data()
    # prepare profiler avg dirty fsd
    profiler_avg_fsd = {}
    for name, fsd_list in profiler_fsd.items():
        profiler_avg_fsd[name] = get_avg_fsd(fsd_list)
        
    for i in range(len(label)):
        pcap_file_name = label[i][:-5] + ".pcap"
        # print(len(sampled_flowkey))
        print("now finding ", pcap_file_name, "'s distribution")
        vote = {}
        all_mrds = []
        
        ### avg
        # selected_fsd = res_fsd[label[i]][start:min(start+int(predict_time * 1000 / window_size), len(res_fsd[label[i]]))]
        # avg_selected_fsd = get_avg_fsd(selected_fsd)
        # dist_name, mrds = find_dist_by_dirty_fsd(profiler_avg_fsd, avg_selected_fsd)
        # vote[dist_name] = 1
        
        ### vote
        for j in range(min(int(predict_time * 1000 / window_size), len(res_fsd[label[i]])-start)):
            # print(start + j)
            dist_name, mrds = find_dist_by_dirty_fsd(profiler_avg_fsd, res_fsd[label[i]][start+j])
            all_mrds.append(mrds)
            # break
            # print(dist_name)
            if dist_name in vote:
                vote[dist_name] += 1
            else:
                vote[dist_name] = 1
        
        if start == 0 or start == 5:
            if pcap_file_name.split("_")[0] == max(vote, key=vote.get).split("_")[0]:
                success += 1
            else:
                if pcap_file_name.split("-")[0] == max(vote, key=vote.get).split("-")[0]:
                    success += 1
        else:
            if pcap_file_name.split("_")[2] == max(vote, key=vote.get).split("_")[0]:
                success += 1
            else:
                if pcap_file_name.split("-")[1].split("_")[-1] == max(vote, key=vote.get).split("-")[0]:
                    success += 1
        
        print("==> ", max(vote, key=vote.get), vote)
        # plot_mrd(pcap_file_name[:-5], all_mrds, window_size, predict_time, pcap_file_name.split("_")[0])
        print("---\n")
        
    
    print(f"predict : {success}/{len(label)} , ({success*100/len(label)} %)")
 

In [125]:
predict_dist_by_dirty_fsd(width=widths, datasets=pcap_file[:10], window_size=200, dev=3, ws=25, start=0, predict_time = 2)

predict_dist_by_dirty_fsd(width=widths, datasets=pcap_file[72:92], window_size=200, dev=3, ws=25,start=30, predict_time = 2)
predict_dist_by_dirty_fsd(width=widths, datasets=pcap_file[92:98], window_size=200, dev=3, ws=25,start=30, predict_time = 2)
predict_dist_by_dirty_fsd(width=widths, datasets=pcap_file[98:100], window_size=200, dev=3, ws=25,start=30, predict_time = 2)

predict_dist_by_dirty_fsd(width=widths, datasets=pcap_file[10:25], window_size=200, dev=3, ws=25,start=30, predict_time = 2)
predict_dist_by_dirty_fsd(width=widths, datasets=pcap_file[25:40], window_size=200, dev=3, ws=25,start=30, predict_time = 2)
predict_dist_by_dirty_fsd(width=widths, datasets=pcap_file[40:50], window_size=200, dev=3, ws=25,start=30, predict_time = 2)
predict_dist_by_dirty_fsd(width=widths, datasets=pcap_file[50:60], window_size=200, dev=3, ws=25,start=30, predict_time = 2)
predict_dist_by_dirty_fsd(width=widths, datasets=pcap_file[60:66], window_size=200, dev=3, ws=25,start=30, predict_time = 2)
predict_dist_by_dirty_fsd(width=widths, datasets=pcap_file[66:72], window_size=200, dev=3, ws=25,start=30, predict_time = 2)

now finding  caida0517-500w_10_.pcap 's distribution
==>  caida0517-500w {'caida0517-500w': 10}
---

now finding  caida0517-250w_10_.pcap 's distribution
==>  caida0517-250w {'caida0517-250w': 10}
---

now finding  caida0517-150w_10_.pcap 's distribution
==>  caida0517-150w {'caida0517-150w': 9, 'caida0517-250w': 1}
---

now finding  caida0517-070w_10_.pcap 's distribution
==>  caida0517-070w {'caida0517-030w': 1, 'caida0517-070w': 9}
---

now finding  caida0517-030w_10_.pcap 's distribution
==>  caida0517-030w {'caida0517-030w': 9, 'caida0517-070w': 1}
---

now finding  zipf2a-150w_10_.pcap 's distribution
==>  zipf2a-150w {'caida0517-070w': 1, 'caida0517-150w': 1, 'zipf2a-150w': 8}
---

now finding  zipf2a-070w_10_.pcap 's distribution
==>  zipf2a-070w {'zipf2a-070w': 8, 'caida0517-030w': 1, 'zipf2a-150w': 1}
---

now finding  zipf2a-030w_10_.pcap 's distribution
==>  zipf2a-070w {'zipf2a-070w': 5, 'zipf2a-030w': 5}
---

now finding  zipf10-070w_10_.pcap 's distribution
==>  zipf10-0