In [32]:
import pandas as pd
import os, re
import csv
import numpy as np

In [33]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.rc('font', family='serif')
plt.rcParams.update({'font.size': 11})

plt.rcParams['figure.dpi'] = 200
plt.rcParams['savefig.dpi'] = 300

In [34]:
# Metadata for specific SFC configurations
exp_ls = "2000s"


In [35]:
def print_headers(exp_type, exp_num, file):
    file_path = os.path.join("..\\ndpi-stats-random", exp_type, exp_num, file)

    with open(file_path, newline='') as csvfile:
        reader = csv.reader(csvfile)
        headers = next(reader)  
        print("header：", headers)
    return headers


#print_headers("fixed_rate", exp_ls, "bridge-pcm-new.csv")


In [36]:
def parse_seq(exp_type, exp_num, file, index=5):
    "Parse sequential data, specialized for the TX/RX/Latency experiments"
    tmp = []
    
    if file.startswith("latency.csv"):
        with open(os.path.join("ndpi-stats-random", exp_type, exp_num, file)) as f:
            lines = f.readlines()[4:] if exp_type == "ndpi-stats-random" else f.readlines()
            for i in range(len(lines)):
                try:
                    value = float(lines[i].strip()) / 1000
                except ValueError:
                    value= np.nan
                tmp.append(value)
    else:
        with open(os.path.join("..\\ndpi-stats-random", exp_type, exp_num, file)) as f:
            next(f)
            for l in f:
                try:
                    value = float(l.strip().split(",")[index])
                except ValueError:
                    value = np.nan
                tmp.append(value)
                
    return tmp

In [37]:
def parse_reg(exp_type, exp_num, file, stat_name):
    stat = []
    file_path = os.path.join("..\\ndpi-stats-random", exp_type, exp_num, file)

    with open(file_path, newline='') as csvfile:
        reader = csv.reader(csvfile)
        headers = next(reader)
        try:
            index = headers.index(stat_name)
        except ValueError:
            print(f"Column {stat_name} not found")
            return stat
        for row in reader:
            if len(row) <= index: 
                continue  

            raw_value = row[index]
            try:
                value = float(raw_value)
            except ValueError:
                value = raw_value
            stat.append(value)


    return stat

In [38]:
tx_stats = parse_seq("ndpi-stats-random", exp_ls,  "tx_stats.csv")
len(tx_stats)

2013

In [39]:
rx_stats = parse_seq("ndpi-stats-random", exp_ls,  "rx_stats.csv")
len(rx_stats)

2013

In [40]:
bridge_L3MISS = parse_reg("ndpi-stats-random", exp_ls,  "ndpi_stats-pcm_new.csv", "Socket 0-L3HIT")
print(len(bridge_L3MISS))

2010


In [41]:
features = print_headers("ndpi-stats-random", exp_ls,  "ndpi_stats-pcm_new.csv")

header： ['System-Date', 'System-Time', 'Socket 0-EXEC', 'Socket 0-IPC', 'Socket 0-FREQ', 'Socket 0-AFREQ', 'Socket 0-L3MISS', 'Socket 0-L2MISS', 'Socket 0-L3HIT', 'Socket 0-L2HIT', 'Socket 0-L3MPI', 'Socket 0-L2MPI', 'Socket 0-L3OCC', 'Socket 0-READ', 'Socket 0-WRITE', 'Socket 0-LOCAL', 'Socket 0-TEMP', 'Socket 0-INST', 'Socket 0-ACYC', 'Socket 0-TIME(ticks)', 'Socket 0-PhysIPC', 'Socket 0-PhysIPC%', 'Socket 0-INSTnom', 'Socket 0-INSTnom%', 'Socket 1-EXEC', 'Socket 1-IPC', 'Socket 1-FREQ', 'Socket 1-AFREQ', 'Socket 1-L3MISS', 'Socket 1-L2MISS', 'Socket 1-L3HIT', 'Socket 1-L2HIT', 'Socket 1-L3MPI', 'Socket 1-L2MPI', 'Socket 1-L3OCC', 'Socket 1-READ', 'Socket 1-WRITE', 'Socket 1-LOCAL', 'Socket 1-TEMP', 'Socket 1-INST', 'Socket 1-ACYC', 'Socket 1-TIME(ticks)', 'Socket 1-PhysIPC', 'Socket 1-PhysIPC%', 'Socket 1-INSTnom', 'Socket 1-INSTnom%', 'SKT0 Core C-State-C0res%', 'SKT0 Core C-State-C1res%', 'SKT0 Core C-State-C3res%', 'SKT0 Core C-State-C6res%', 'SKT0 Core C-State-C7res%', 'SKT0 Pac

In [42]:
features_pcie = print_headers("ndpi-stats-random", exp_ls,   "pcm-pcie_new.csv")

header： ['skt-0_PCIRdCur-total', 'skt-0_RFO-total', 'skt-0_CRd-total', 'skt-0_DRd-total', 'skt-0_ItoM-total', 'skt-0_PRd-total', 'skt-0_WiL-total', 'skt-0_PCIRdCur-miss', 'skt-0_RFO-miss', 'skt-0_CRd-miss', 'skt-0_DRd-miss', 'skt-0_ItoM-miss', 'skt-0_PRd-miss', 'skt-0_WiL-miss', 'skt-0_PCIRdCur-hit', 'skt-0_RFO-hit', 'skt-0_CRd-hit', 'skt-0_DRd-hit', 'skt-0_ItoM-hit', 'skt-0_PRd-hit', 'skt-0_WiL-hit', 'skt-1_PCIRdCur-total', 'skt-1_RFO-total', 'skt-1_CRd-total', 'skt-1_DRd-total', 'skt-1_ItoM-total', 'skt-1_PRd-total', 'skt-1_WiL-total', 'skt-1_PCIRdCur-miss', 'skt-1_RFO-miss', 'skt-1_CRd-miss', 'skt-1_DRd-miss', 'skt-1_ItoM-miss', 'skt-1_PRd-miss', 'skt-1_WiL-miss', 'skt-1_PCIRdCur-hit', 'skt-1_RFO-hit', 'skt-1_CRd-hit', 'skt-1_DRd-hit', 'skt-1_ItoM-hit', 'skt-1_PRd-hit', 'skt-1_WiL-hit']


In [43]:
features_memory = print_headers("ndpi-stats-random", exp_ls, "pcm-memory_new.csv")

header： ['SKT0-Ch0Read', 'SKT0-Ch0Write', 'SKT0-Ch1Read', 'SKT0-Ch1Write', 'SKT0-Mem Read (MB/s)', 'SKT0-Mem Write (MB/s)', 'SKT0-P. Write (T/s)', 'SKT0-Memory (MB/s)', 'SKT1-Ch0Read', 'SKT1-Ch0Write', 'SKT1-Ch1Read', 'SKT1-Ch1Write', 'SKT1-Mem Read (MB/s)', 'SKT1-Mem Write (MB/s)', 'SKT1-P. Write (T/s)', 'SKT1-Memory (MB/s)', 'System-Read', 'System-Write', 'System-Memory']


In [44]:
features_ddio= print_headers("ndpi-stats-random", exp_ls, "ddio_ways_new.csv")

header： ['cos_way', 'binary', 'ddio_ways']


In [45]:
features_latency= print_headers("ndpi-stats-random", exp_ls, "latency_new.csv")

header： ['latency']


In [46]:
features_pktsize=  [ 'PacketRate', 'Mbit', 'MbitWithFraming', 'TotalPackets', 'TotalBytes']
#print_headers("ndpi-stats-random", exp_ls, "tx_stats.csv")

In [47]:
Infos=["ndpi_stats-pcm_new.csv"]
Infos_pcie = ["pcm-pcie_new.csv"]
Infos_memory =["pcm-memory_new.csv"]
Infos_ddioways = ["ddio_ways_new.csv"]
Infos_latency = ["latency_new.csv"]
Infos_pktsize = ["tx_stats.csv"]
def make_df():
    tmp = []
    dic = dict()
    tx_stats = parse_seq("ndpi-stats-random", exp_ls, "tx_stats.csv")
    rx_stats = parse_seq("ndpi-stats-random", exp_ls, "rx_stats.csv")

    seq_length = min(len(tx_stats), len(rx_stats),
                     len(parse_reg("ndpi-stats-random", exp_ls, "ndpi_stats-pcm_new.csv", 'System-Date')), 
                     len(parse_reg("ndpi-stats-random", exp_ls, "pcm-pcie_new.csv", 'skt-0_PCIRdCur-total')),
                    len(parse_reg("ndpi-stats-random", exp_ls, "ddio_ways_new.csv", 'ddio_ways')),)
    dic["input_rate"] = tx_stats[:seq_length]
    dic["output_rate"] = rx_stats[:seq_length]
    for f in features:
        for v in Infos:
            tmp =parse_reg("ndpi-stats-random", exp_ls, "ndpi_stats-pcm_new.csv", f)
            tmp = tmp[:seq_length]
            if len(tmp) < seq_length:
                tmp = np.pad(tmp, (0, seq_length - len(tmp)), mode='constant')
            dic[v.split(".")[0] + "_" + f.replace(",", "")] = tmp
            
    for f in features_pcie:
        for v in Infos_pcie:
            tmp =parse_reg("ndpi-stats-random", exp_ls, "pcm-pcie_new.csv", f)
            tmp = tmp[:seq_length]
            if len(tmp) < seq_length:
                tmp = np.pad(tmp, (0, seq_length - len(tmp)), mode='constant')
            dic[v.split(".")[0] + "_" + f.replace(",", "")] = tmp 
    for f in features_memory:
        for v in Infos_memory:
            tmp =parse_reg("ndpi-stats-random", exp_ls, "pcm-memory_new.csv", f)
            tmp = tmp[:seq_length]
            if len(tmp) < seq_length:
                tmp = np.pad(tmp, (0, seq_length - len(tmp)), mode='constant')
                
            dic[v.split(".")[0] + "_" + f.replace(",", "")] = tmp 
    for f in features_ddio:
        for v in Infos_ddioways:
            tmp =parse_reg("ndpi-stats-random", exp_ls, "ddio_ways_new.csv", f)
            tmp = tmp[:seq_length]
            if len(tmp) < seq_length:
                tmp = np.pad(tmp, (0, seq_length - len(tmp)), mode='constant')
            dic[v.split(".")[0] + "_" + f.replace(",", "")] = tmp 
    for f in features_latency:
        for v in Infos_latency:
            tmp =parse_reg("ndpi-stats-random", exp_ls, "latency_new.csv", f)
            tmp = tmp[:seq_length]
            if len(tmp) < seq_length:
                tmp = np.pad(tmp, (0, seq_length - len(tmp)), mode='constant')
            dic[v.split(".")[0] + "_" + f.replace(",", "")] = tmp
    for f in features_pktsize:
        for v in Infos_pktsize:
            tmp =parse_reg("ndpi-stats-random", exp_ls, "tx_stats.csv", f)
            tmp = tmp[:seq_length]
            if len(tmp) < seq_length:
                tmp = np.pad(tmp, (0, seq_length - len(tmp)), mode='constant')
            dic[v.split(".")[0] + "_" + f.replace(",", "")] = tmp 
    
    return pd.DataFrame(dic)

#bridge_L3MISS = parse_reg("fixed_rate", exp_ls,  "bridge-pcm.csv", "Socket 0-L3HIT")
#print(len(bridge_L3MISS))

In [48]:
df = make_df()


In [49]:
print(df)

       input_rate  output_rate ndpi_stats-pcm_new_System-Date  \
0     4028.678192  3085.749267                     2024-02-23   
1     2227.764527  2421.899994                     2024-02-23   
2     5044.639954  5672.548015                     2024-02-23   
3     9330.484868  8792.036006                     2024-02-23   
4     9753.134207  9751.988007                     2024-02-23   
...           ...          ...                            ...   
1981  9997.011577  9977.467982                     2024-02-23   
1982  9996.433078  9980.944845                     2024-02-23   
1983  9991.918653  9971.382209                     2024-02-23   
1984  9995.661939  9980.144482                     2024-02-23   
1985  9996.230662  9980.263182                     2024-02-23   

     ndpi_stats-pcm_new_System-Time  ndpi_stats-pcm_new_Socket 0-EXEC  \
0                      00:35:06.288                              0.36   
1                      00:35:07.289                              0.32   


In [50]:
def classify_packetrate(rate):
    if rate > 10.5:
        return 64
    elif 7 <= rate <= 10.5:
        return 128
    elif 3 <= rate < 7:
        return 256
    elif 1.8 <= rate < 3:
        return 512
    elif 1 <= rate < 1.8:
        return 1024
    else: # rate < 1
        return 1514

# Apply the function to create a new column
df['packetsize'] = df['tx_stats_PacketRate'].apply(classify_packetrate)

In [51]:
abs_path = os.path.join("..\\ndpi-stats-random", "ndpi-stats-random_"+ exp_ls +".csv")
df.to_csv(abs_path, sep=',')


In [52]:
df.columns

Index(['input_rate', 'output_rate', 'ndpi_stats-pcm_new_System-Date',
       'ndpi_stats-pcm_new_System-Time', 'ndpi_stats-pcm_new_Socket 0-EXEC',
       'ndpi_stats-pcm_new_Socket 0-IPC', 'ndpi_stats-pcm_new_Socket 0-FREQ',
       'ndpi_stats-pcm_new_Socket 0-AFREQ',
       'ndpi_stats-pcm_new_Socket 0-L3MISS',
       'ndpi_stats-pcm_new_Socket 0-L2MISS',
       ...
       'ddio_ways_new_cos_way', 'ddio_ways_new_binary',
       'ddio_ways_new_ddio_ways', 'latency_new_latency', 'tx_stats_PacketRate',
       'tx_stats_Mbit', 'tx_stats_MbitWithFraming', 'tx_stats_TotalPackets',
       'tx_stats_TotalBytes', 'packetsize'],
      dtype='object', length=172)