In [4]:
import pickle
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import random
import json
import os
import re
import seaborn as sns
import math

In [5]:
def read_sum_data(algo='cm', row=3, width=4096, level=1, seed=1, count=1, flowkey='srcIP', 
              epochs=['10'], dataset='zipf2a_3_caida20180517_7.pcap', window_size=200):
    res = []
    for epoch in epochs:
        
        path = f"../lstm/SketchPatternQuery/{algo}/{dataset}/"\
                f"{flowkey}/row_{row}_width_{width}_level_{level}_epoch_{epoch}_count_{count}_seed_{seed}/"
        
        for dir in sorted(os.listdir(path)):
            p = os.path.join(path, dir)
            if os.path.isdir(p): 
                window_dir = "summation_" + str(window_size)
                final_full_path = os.path.join(path, dir, window_dir, "final_topk_summation.txt")    
                dynamic_full_path = os.path.join(path, dir, window_dir, "dynamic_topk_summation.txt")    
                
                with open(final_full_path, 'r') as f:
                    line = f.readline().strip()
                    final_list = [int(num) for num in line.split()]
                    res.append(final_list)
                    
                with open(dynamic_full_path, 'r') as f:
                    line = f.readline().strip()
                    dynamic_list = [int(num) for num in line.split()]
                    res.append(dynamic_list)
            
    return res

# read_sum_data()

In [6]:
def read_total_size_data(algo='cm', row=3, width=4096, level=1, seed=1, count=1, flowkey='srcIP', 
              epochs=['10'], dataset='caida20180517_10_caida20180816_0.pcap', window_size=200):
    res = []
    for epoch in epochs:
        
        path = f"../lstm/SketchPatternQuery/{algo}/{dataset}/"\
                f"{flowkey}/row_{row}_width_{width}_level_{level}_epoch_{epoch}_count_{count}_seed_{seed}/"
        
        for dir in sorted(os.listdir(path)):
            p = os.path.join(path, dir)
            if os.path.isdir(p): 
                window_dir = "window_" + str(window_size)
                full_path = os.path.join(path, dir, window_dir,'total_flow_size.txt')    
                with open(full_path, 'r') as f:
                    for val in f:
                        res.append(int(val))
            
    return res

# read_total_size_data()

## Prepare Dataset

In [7]:
## parameters

name1 = ["zipf2a", "zipf4", "zipf2b"]
name2 = ["caida20180816", "caida20180517"]

lens = [["3", "7"],
        ["4", "6"],
        ["5", "5"],
        ["6", "4"],
        ["7", "3"],
        ["8", "2"],]

pcap_file = []


# # caida + caida
for l in lens:
    name = f'{name2[0]}_{l[0]}_{name2[1]}_{l[1]}.pcap'
    pcap_file.append(name)

    
# single dataset
pcap_file.append("caida20180517_10_caida20180816_0.pcap")
pcap_file.append("caida20180816_10_caida20180816_0.pcap")
pcap_file.append("zipf2a_10_caida20180816_0.pcap") 
pcap_file.append("zipf2b_10_caida20180816_0.pcap") 
pcap_file.append("zipf4_10_caida20180816_0.pcap")


# # caida + zipf
# for n1 in name1:
#     for l in lens:
#         name = f'{name2[0]}_{l[0]}_{n1}_{l[1]}.pcap'
#         pcap_file.append(name)


# # zipf + caida
# for n1 in name1:
#     for n2 in name2:
#         for l in lens:
#             name = f'{n1}_{l[0]}_{n2}_{l[1]}.pcap'
#             pcap_file.append(name)

print(pcap_file)    
print(f'Total Pcap File Number: {len(pcap_file)}')
# widths = [2048, 4096, 8192, 16384, 32768, 65536, 131072]
widths = [4096]



['caida20180816_3_caida20180517_7.pcap', 'caida20180816_4_caida20180517_6.pcap', 'caida20180816_5_caida20180517_5.pcap', 'caida20180816_6_caida20180517_4.pcap', 'caida20180816_7_caida20180517_3.pcap', 'caida20180816_8_caida20180517_2.pcap', 'caida20180517_10_caida20180816_0.pcap', 'caida20180816_10_caida20180816_0.pcap', 'zipf2a_10_caida20180816_0.pcap', 'zipf2b_10_caida20180816_0.pcap', 'zipf4_10_caida20180816_0.pcap']
Total Pcap File Number: 11


### Plot Both Final & Dynamic TopK

In [8]:
def predict_final_dynamic_topk(algo='cm', row=3, width=[4096], level=1, seed=1, count=1, flowkey='srcIP', 
              epochs=['10'], datasets=['zipf2a_3_caida20180517_7.pcap'], window_size=200):
    
    res_sum = {}
    res_var = {}
    res_sec_var = {}
    res_sum2 = {}
    res_var2 = {}
    res_sec_var2 = {}
    label = []
    answer = []
    
    for d in datasets:
        for w in width:
            name = f'{d[:-5]}_{w}'
            res_var[name] = [None]
            res_var2[name] = [None]
            res_sec_var[name] = [None, None]
            res_sec_var2[name] = [None, None]
            
            res = read_sum_data(algo, row, w, level, seed, count, flowkey, epochs, d, window_size)

            label.append(name)
            answer.append(int(d.split('_')[1]))
            
            res_sum[name] = res[0]
            res_sum2[name] = res[1]
            
            for i in range(1, len(res[0])):
                res_var[name].append(res[0][i] - res[0][i-1])
                res_var2[name].append(res[1][i] - res[1][i-1])
                
            for i in range(2, len(res_var[name])):
                res_sec_var[name].append(abs(res_var[name][i] - res_var[name][i-1]))
                res_sec_var2[name].append(abs(res_var2[name][i] - res_var2[name][i-1]))
            
    # res = read_data(algo, row, width, level, seed, count, flowkey, epochs, dataset, type, window_size)
    # print(label, len(label))
    # print(res_sum, len(res_sum))
    # print(res_var, len(res_var))
    # print(res_sec_var, len(res_sec_var))
    
    # print(answer)
    ignore = int(1000/window_size)
    pred = []
    pred2 = []
    for i in range(len(label)):
        # final
        end = min(int(10*1000/window_size)+1-ignore, len(res_var[label[i]]))
        # print("+++++++")
        # print(len(res_var[label[i]]))
        # print(end-ignore-1)
        # print("+++++++")
        var_max = max(res_var[label[i]][(1+ignore):end])
        var_avg = sum(res_var[label[i]][(1+ignore):end]) / len(res_var[label[i]][(1+ignore):end])
        var_diff = var_max - var_avg
        # print(var_max, var_avg, var_diff)
        max_idx = ignore+1
        max_val = -1
        for j in range(ignore+1, min(int(10*1000/window_size)+1-ignore, len(res_sec_var[label[i]]))):
            if res_sec_var[label[i]][j] > max_val:
                max_val = res_sec_var[label[i]][j] 
                max_idx = j
                
        pred.append((max_idx-1)*window_size/1000)
        
        # dynamic
        var_max2 = max(res_var2[label[i]][(1+ignore):end])
        var_avg2 = sum(res_var2[label[i]][(1+ignore):end]) / len(res_var2[label[i]][(1+ignore):end])
        var_diff2 = var_max2 - var_avg2
        # print(var_max, var_avg, var_diff)
        max_idx2 = ignore+1
        max_val2 = -1
        for j in range(ignore+1, min(int(10*1000/window_size)+1-ignore, len(res_sec_var2[label[i]]))):
            if res_sec_var2[label[i]][j] > max_val2:
                max_val2 = res_sec_var2[label[i]][j] 
                max_idx2 = j
                
        pred2.append((max_idx2-1)*window_size/1000)
        
        print(i)
        print(label[i])
        print('\tFinal TopK')
        print('\t\t', var_max, var_avg, var_diff)
        print('\t\t', max(res_sec_var[label[i]][(1+ignore):end]), sum(res_sec_var[label[i]][(1+ignore):end]) / len(res_sec_var[label[i]][(1+ignore):end]), max(res_sec_var[label[i]][(1+ignore):end]) - (sum(res_sec_var[label[i]][(1+ignore):end]) / len(res_sec_var[label[i]][(1+ignore):end])))
        print('\t\t', pred[i], answer[i])
        print('\tDynamic TopK')
        print('\t\t', var_max2, var_avg2, var_diff2)
        print('\t\t', max(res_sec_var2[label[i]][(1+ignore):end]), sum(res_sec_var2[label[i]][(1+ignore):end]) / len(res_sec_var2[label[i]][(1+ignore):end]), max(res_sec_var2[label[i]][(1+ignore):end]) - (sum(res_sec_var2[label[i]][(1+ignore):end]) / len(res_sec_var2[label[i]][(1+ignore):end])))
        print('\t\t', pred2[i], answer[i])
        print()
        
    succ = 0
    succ2 = 0
    for i in range(len(pred)):
        if abs(pred[i] - answer[i]) < 1.0:
            succ += 1
            
        if abs(pred2[i] - answer[i]) < 1.0:
            succ2 += 1
    
    print(f'final   topk predict: {succ}/{len(pred)} {succ*100/len(pred)}%')
    print(f'dynamic topk predict: {succ2}/{len(pred2)} {succ2*100/len(pred2)}%')
        
# predict_final_dynamic_topk(width=widths, datasets=pcap_file)

In [9]:
def plot_both_topk(algo='cm', row=3, width=4096, level=1, seed=1, count=1, flowkey='srcIP', 
              epochs=['10'], dataset='zipf2a_3_caida20180517_7.pcap', window_size=500):
    
    labels = ["Final TopK Sum", "Dynamic TopK Sum", "Final TopK Var", "Dynamic TopK Var", "Final TopK Sec Var", "Dynamic TopK Sec Var"]
    labels = ["Final TopK Sum", "Dynamic TopK Sum", "Final TopK Sec Var", "Dynamic TopK Sec Var"]

    # sum
    res= read_sum_data(algo, row, width, level, seed, count, flowkey, epochs, dataset, window_size)
    
    # variation
    fin_res_var = [None]
    dyn_res_var = [None]
    for i in range(1, len(res[0])):
        fin_res_var.append(res[0][i] - res[0][i-1])
        dyn_res_var.append(res[1][i] - res[1][i-1])
        
    # res.append(fin_res_var)
    # res.append(dyn_res_var)
    
    # second variation
    fin_sec_res_var = [None, None]
    dyn_sec_res_var = [None, None]
    for i in range(2, len(res[0])):
        fin_sec_res_var.append(abs(fin_res_var[i] - fin_res_var[i-1]))
        dyn_sec_res_var.append(abs(dyn_res_var[i] - dyn_res_var[i-1]))
        
    res.append(fin_sec_res_var)
    res.append(dyn_sec_res_var)
    
    print(labels, len(labels))
    print(res, len(res))
    print(fin_sec_res_var, len(res))
    print(dyn_sec_res_var, len(res))
    print(f'max: {max(fin_res_var[1:])}, avg: {sum(fin_res_var[1:])/len(fin_res_var[1:])}, diff: {max(fin_res_var[1:]) - (sum(fin_res_var[1:])/len(fin_res_var[1:]))}')
    print(f'max: {max(dyn_res_var[1:])}, avg: {sum(dyn_res_var[1:])/len(dyn_res_var[1:])}, diff: {max(dyn_res_var[1:]) - (sum(dyn_res_var[1:])/len(dyn_res_var[1:]))}')
    
    plt.figure(figsize=(12, 4))
    
    # sns.lineplot(data=res, dashes=False, markers=True, markersize=4)
    # for i, data_set in enumerate(res):
    #     plt.plot(range(1,21), data_set, label=f'Data Set {i + 1}')
        
    # print(dataset)
    for i in range(len(res)):
        plt.plot(res[i], label=labels[i])
        

    # Add labels and legend
    plt.xlabel('Time (sec)')
    plt.ylabel('Value')
    plt.title('%s Fixed Window (Window Size = %d)' % (dataset, window_size))
    # plt.axhline(10000, c="black")
    ticks = [i for i in range(int(10*1000/window_size) + 1)]
    adjusted_ticks = [tick * (window_size / 1000) for tick in ticks[0::int(1000 / window_size)]]
    plt.xticks(ticks[0::int(1000/window_size)], adjusted_ticks)
    plt.legend(loc='upper left', ncol=math.ceil(len(res)/4))

    # Show the plot
    plt.show()
    

# datasets = ['10_0.pcap/', '5_5.pcap/', '4_6.pcap/', '3_7.pcap/']
datasets = ["zipf2a_3_caida20180517_7.pcap/"]
# window_sizes = [100, 200, 500]
window_sizes = [200]
# plot_single_both(dataset=pcap_file[4], window_size=200)
# plot_single_both(dataset=datasets[0], window_size=500)

### Total Flow Size Predict

In [10]:
def predict_with_total_size(algo='cm', row=3, width=[4096], level=1, seed=1, count=1, flowkey='srcIP', 
              epochs=['10'], datasets=['zipf2a_3_caida20180517_7.pcap'], window_size=200):
    
    res_sum = {}
    res_var = {}
    res_sec_var = {}
    label = []
    answer = []
    
    for d in datasets:
        for w in width:
            name = f'{d[:-5]}_{w}'
            res_var[name] = [None]
            res_sec_var[name] = [None, None]
            
            res = read_total_size_data(algo, row, w, level, seed, count, flowkey, epochs, d, window_size)

            label.append(name)
            answer.append(int(d.split('_')[1]))
            
            res_sum[name] = res
            
            for i in range(1, len(res)):
                res_var[name].append(res[i] - res[i-1])
                
            for i in range(2, len(res_var[name])):
                res_sec_var[name].append(abs(res_var[name][i] - res_var[name][i-1]))
            
    # res = read_data(algo, row, width, level, seed, count, flowkey, epochs, dataset, type, window_size)
    # print(label, len(label))
    # print(res_sum, len(res_sum))
    # print(res_var, len(res_var))
    # print(res_sec_var, len(res_sec_var))
    
    # print(answer)
    ignore = int(1000/window_size)
    pred = []
    for i in range(len(label)):
        # final
        print(res_var[label[i]])
        end = min(int(10*1000/window_size)+1-ignore, len(res_var[label[i]]))
        
        var_max = max(res_var[label[i]][1+ignore:end])
        var_avg = sum(res_var[label[i]][1+ignore:end]) / len(res_var[label[i]][1+ignore:end])
        var_diff = var_max - var_avg
        # print(var_max, var_avg, var_diff)
        max_idx = 1+ignore
        max_val = -1
        for j in range(1+ignore, min(int(10*1000/window_size)+1-ignore, len(res_sec_var[label[i]]))):
            if res_sec_var[label[i]][j] > max_val:
                max_val = res_sec_var[label[i]][j] 
                max_idx = j
                
        pred.append((max_idx-1)*window_size/1000)
        
        
        print(i)
        print(label[i])
        print('\tTotal Size')
        print('\t\t', var_max, var_avg, var_diff)
        print('\t\t', max(res_sec_var[label[i]][2:]), sum(res_sec_var[label[i]][2:]) / len(res_sec_var[label[i]][2:]), max(res_sec_var[label[i]][2:]) - (sum(res_sec_var[label[i]][2:]) / len(res_sec_var[label[i]][2:])))
        print('\t\t', pred[i], answer[i])
        print()
        
    succ = 0
    for i in range(len(pred)):
        if abs(pred[i] - answer[i]) < 1.0:
            succ += 1
    
    print(f'total flow size predict: {succ}/{len(pred)} {succ*100/len(pred)}%')
        
# predict_with_total_size(width=widths, datasets=pcap_file)

In [11]:
def plot_single_total_size(algo='cm', row=3, width=4096, level=1, seed=1, count=1, flowkey='srcIP', 
              epochs=['10'], dataset='zipf2a_3_caida20180517_7.pcap', window_size=500):
    
    # labels = ["Final TopK Sum", "Dynamic TopK Sum", "Final TopK Var", "Dynamic TopK Var", "Final TopK Sec Var", "Dynamic TopK Sec Var"]
    # labels = ["Final TopK Sum", "Dynamic TopK Sum", "Final TopK Sec Var", "Dynamic TopK Sec Var"]
    labels = ["Final TopK Sum", "Final TopK Sec Var",]

    res = []
    # sum
    result = read_total_size_data(algo, row, width, level, seed, count, flowkey, epochs, dataset, window_size)
    
    res.append(result)
    
    # variation
    fin_res_var = [None]
    for i in range(1, len(result)):
        fin_res_var.append(result[i] - result[i-1])
        
    # res.append(fin_res_var)
    
    # second variation
    fin_sec_res_var = [None, None]
    for i in range(2, len(result)):
        fin_sec_res_var.append(abs(fin_res_var[i] - fin_res_var[i-1]))
        
    res.append(fin_sec_res_var)
    
    print(labels, len(labels))
    print(res, len(res))
    print(result)
    print(fin_res_var)
    print(fin_sec_res_var, len(res))
    print(f'max: {max(fin_res_var[1:])}, avg: {sum(fin_res_var[1:])/len(fin_res_var[1:])}, diff: {max(fin_res_var[1:]) - (sum(fin_res_var[1:])/len(fin_res_var[1:]))}')
    
    plt.figure(figsize=(12, 4))
    
    # sns.lineplot(data=res, dashes=False, markers=True, markersize=4)
    # for i, data_set in enumerate(res):
    #     plt.plot(range(1,21), data_set, label=f'Data Set {i + 1}')
        
    # print(dataset)
    for i in range(len(res)):
        plt.plot(res[i], label=labels[i])
        

    # Add labels and legend
    plt.xlabel('Time (sec)')
    plt.ylabel('Value')
    plt.title('%s Flow Size Variation (Window Size = %d)' % (dataset[:-5], window_size))
    # plt.axhline(10000, c="black")
    ticks = [i for i in range(int(10*1000/window_size) + 1)]
    adjusted_ticks = [tick * (window_size / 1000) for tick in ticks[0::int(1000 / window_size)]]
    plt.xticks(ticks[0::int(1000/window_size)], adjusted_ticks)
    plt.legend(loc='upper left', ncol=math.ceil(len(res)/4))

    # Show the plot
    plt.show()
    

# datasets = ['10_0.pcap/', '5_5.pcap/', '4_6.pcap/', '3_7.pcap/']
datasets = ["zipf2a_3_caida20180517_7.pcap/"]
# window_sizes = [100, 200, 500]
window_sizes = [200]
# plot_single_both(dataset=pcap_file[4], window_size=200)
# plot_single_both(dataset=datasets[0], window_size=500)

In [12]:
# changing_time_predict_final_topk(width=widths, datasets=pcap_file)
# plot_single_final_topk(dataset=pcap_file[0], type_="accumulate.txt", window_size=200)

# predict_final_dynamic_topk(width=widths, datasets=pcap_file)
# plot_both_topk(dataset=pcap_file[0], window_size=200)

# predict_with_total_size(width=widths, datasets=pcap_file)
# plot_single_total_size(dataset=pcap_file[1], window_size=200)

### Predict Final TopK & Dynamic TopK & Total Flow Size

In [13]:
def predict(algo='cm', row=3, width=[4096], level=1, seed=1, count=1, flowkey='srcIP', 
              epochs=['10'], datasets=['zipf2a_3_caida20180517_7.pcap'], window_size=200):
    
    res_sum = {}
    res_var = {}
    res_sec_var = {}
    
    res_sum2 = {}
    res_var2 = {}
    res_sec_var2 = {}
    
    res_sum3 = {}
    res_var3 = {}
    res_sec_var3 = {}
    
    label = []
    answer = []
    
    for d in datasets:
        for w in width:
            name = f'{d[:-5]}_{w}'
            res_var[name] = [None]
            res_var2[name] = [None]
            res_var3[name] = [None]
            res_sec_var[name] = [None, None]
            res_sec_var2[name] = [None, None]
            res_sec_var3[name] = [None, None]
            
            res = read_sum_data(algo, row, w, level, seed, count, flowkey, epochs, d, window_size)
            res_total = read_total_size_data(algo, row, w, level, seed, count, flowkey, epochs, d, window_size)

            label.append(name)
            answer.append(int(d.split('_')[1]))
            
            res_sum[name] = res[0]
            res_sum2[name] = res[1]
            res_sum3[name] = res_total
            
            for i in range(1, len(res[0])):
                res_var[name].append(res[0][i] - res[0][i-1])
                res_var2[name].append(res[1][i] - res[1][i-1])
                res_var3[name].append(res_total[i] - res_total[i-1])
                
            for i in range(2, len(res_var[name])):
                res_sec_var[name].append(abs(res_var[name][i] - res_var[name][i-1]))
                res_sec_var2[name].append(abs(res_var2[name][i] - res_var2[name][i-1]))
                res_sec_var3[name].append(abs(res_var3[name][i] - res_var3[name][i-1]))
            
    # res = read_data(algo, row, width, level, seed, count, flowkey, epochs, dataset, type, window_size)
    # print(label, len(label))
    # print(res_sum, len(res_sum))
    # print(res_var, len(res_var))
    # print(res_sec_var, len(res_sec_var))
    
    # print(answer)
    ignore = int(1000/window_size)
    pred = []
    pred2 = []
    pred3 = []
    for i in range(len(label)):
        # final
        end = min(int(10*1000/window_size)+1-ignore, len(res_var[label[i]]))
        # print("+++++++")
        # print(len(res_var[label[i]]))
        # print(end-ignore-1)
        # print("+++++++")
        var_max = max(res_var[label[i]][(1+ignore):end])
        var_avg = sum(res_var[label[i]][(1+ignore):end]) / len(res_var[label[i]][(1+ignore):end])
        var_diff = var_max - var_avg
        # print(var_max, var_avg, var_diff)
        max_idx = ignore+1
        max_val = -1
        for j in range(ignore+1, min(int(10*1000/window_size)+1-ignore, len(res_sec_var[label[i]]))):
            if res_sec_var[label[i]][j] > max_val:
                max_val = res_sec_var[label[i]][j] 
                max_idx = j
                
        pred.append((max_idx-1)*window_size/1000)
        
        # dynamic
        var_max2 = max(res_var2[label[i]][(1+ignore):end])
        var_avg2 = sum(res_var2[label[i]][(1+ignore):end]) / len(res_var2[label[i]][(1+ignore):end])
        var_diff2 = var_max2 - var_avg2
        # print(var_max, var_avg, var_diff)
        max_idx2 = ignore+1
        max_val2 = -1
        for j in range(ignore+1, min(int(10*1000/window_size)+1-ignore, len(res_sec_var2[label[i]]))):
            if res_sec_var2[label[i]][j] > max_val2:
                max_val2 = res_sec_var2[label[i]][j] 
                max_idx2 = j
                
        pred2.append((max_idx2-1)*window_size/1000)
        
        # total flow size
        var_max3 = max(res_var3[label[i]][(1+ignore):end])
        var_avg3 = sum(res_var3[label[i]][(1+ignore):end]) / len(res_var3[label[i]][(1+ignore):end])
        var_diff3 = var_max3 - var_avg3
        # print(var_max, var_avg, var_diff)
        max_idx3 = ignore+1
        max_val3 = -1
        for j in range(ignore+1, min(int(10*1000/window_size)+1-ignore, len(res_sec_var3[label[i]]))):
            if res_sec_var3[label[i]][j] > max_val3:
                max_val3 = res_sec_var3[label[i]][j] 
                max_idx3 = j
                
        pred3.append((max_idx3-1)*window_size/1000)
        
        print(i)
        print(label[i])
        print('\tFinal TopK')
        print('\t\t', var_max, var_avg, var_diff)
        print('\t\t', max(res_sec_var[label[i]][(1+ignore):end]), sum(res_sec_var[label[i]][(1+ignore):end]) / len(res_sec_var[label[i]][(1+ignore):end]), max(res_sec_var[label[i]][(1+ignore):end]) - (sum(res_sec_var[label[i]][(1+ignore):end]) / len(res_sec_var[label[i]][(1+ignore):end])))
        print('\t\t', pred[i], answer[i])
        print('\tDynamic TopK')
        print('\t\t', var_max2, var_avg2, var_diff2)
        print('\t\t', max(res_sec_var2[label[i]][(1+ignore):end]), sum(res_sec_var2[label[i]][(1+ignore):end]) / len(res_sec_var2[label[i]][(1+ignore):end]), max(res_sec_var2[label[i]][(1+ignore):end]) - (sum(res_sec_var2[label[i]][(1+ignore):end]) / len(res_sec_var2[label[i]][(1+ignore):end])))
        print('\t\t', pred2[i], answer[i])
        print('\tTotal Flow ')
        print('\t\t', var_max3, var_avg3, var_diff3)
        print('\t\t', max(res_sec_var3[label[i]][(1+ignore):end]), sum(res_sec_var3[label[i]][(1+ignore):end]) / len(res_sec_var3[label[i]][(1+ignore):end]), max(res_sec_var3[label[i]][(1+ignore):end]) - (sum(res_sec_var3[label[i]][(1+ignore):end]) / len(res_sec_var3[label[i]][(1+ignore):end])))
        print('\t\t', pred3[i], answer[i])
        print()
        
    succ = 0
    succ2 = 0
    succ3 = 0

    for i in range(len(pred)):
        if abs(pred[i] - answer[i]) < 1.0:
            succ += 1
            
        if abs(pred2[i] - answer[i]) < 1.0:
            succ2 += 1
            
        if abs(pred3[i] - answer[i]) < 1.0:
            succ3 += 1
    
    print(f'final   topk predict: {succ}/{len(pred)} {succ*100/len(pred)}%')
    print(f'dynamic topk predict: {succ2}/{len(pred2)} {succ2*100/len(pred2)}%')
    print(f' total flow  predict: {succ3}/{len(pred3)} {succ3*100/len(pred3)}%')
        
# predict_final_dynamic_topk(width=widths, datasets=pcap_file)

In [15]:
predict(width=widths, datasets=pcap_file)

0
caida20180816_3_caida20180517_7_4096
	Final TopK
		 8928 5316.075 3611.925
		 5274 853.475 4420.525
		 3.0 3
	Dynamic TopK
		 8731 4722.225 4008.7749999999996
		 3064 1102.45 1961.55
		 3.8 3
	Total Flow 
		 121535 108175.925 13359.074999999997
		 36141 4725.3 31415.7
		 3.4 3

1
caida20180816_4_caida20180517_6_4096
	Final TopK
		 8928 4700.0 4228.0
		 5155 793.675 4361.325
		 4.0 4
	Dynamic TopK
		 7862 4141.45 3720.55
		 3831 1108.675 2722.325
		 4.0 4
	Total Flow 
		 121782 110026.95 11755.050000000003
		 36144 4571.45 31572.55
		 4.4 4

2
caida20180816_5_caida20180517_5_4096
	Final TopK
		 10258 4117.85 6140.15
		 6809 1081.55 5727.45
		 5.0 5
	Dynamic TopK
		 7665 3585.65 4079.35
		 3067 1082.4 1984.6
		 5.8 5
	Total Flow 
		 121782 111260.25 10521.75
		 36152 4460.375 31691.625
		 5.4 5

3
caida20180816_6_caida20180517_4_4096
	Final TopK
		 6718 3665.825 3052.175
		 3096 1129.725 1966.275
		 6.8 6
	Dynamic TopK
		 6567 3426.175 3140.825
		 3550 1072.35 2477.65
		 6.0 6
	Total F