In [None]:
import lzma
from typing import Dict, Tuple
import numpy as np
import pandas as pd
import gzip

def perf_data_preprocess(file_path: str) -> Dict[str, pd.DataFrame]:
    # idk why warn me, but code runs smoothly
    # supress warning does not work
    # with gzip.open(file_path, 'r') as f:
    with lzma.open(file_path, 'r') as f:
        temp = pd.read_table(f, sep='\s\s+', header=None).drop(columns=[0, 5])
    temp[1] = temp[1].apply(lambda s: s.split(' ')[1][1: -1]).astype(np.uint8)
    temp.rename(columns={1: 'Cpu', 2: 'Time'}, inplace=True)

    ans = [v.drop(columns=4) for _, v in temp.groupby(4, dropna=False)]
    del temp, f
    t = ans[-1]
    ans.append(t[t[3].isna()].drop(columns=3))
    ans[2] = t[~(t[3].isna())]
    ans[3] = pd.concat([ans[3].Cpu, 
                        ans[3].Time
                            .str.split(expand=True)
                            .drop(columns=[1,2,3,5])], axis=1)\
                .rename(columns={0: 'Time', 4: 'Load'})
    ans[2] = pd.concat([ans[2].Cpu, ans[2].Time,
                        ans[2][3].str.split(expand=True).drop(columns=[0,1,2])]
                    ,axis=1)

    for t in ans[:3]:
        t.rename(columns={3: 'Load'}, inplace=True)
    for t in ans[2:]:
        t.Load = t.Load.str[4:].astype(np.uint32)
    min_time = 1e100
    for t in ans:
        t.Time = t.Time.str[:-1].astype(np.float64)
        min_time = min(min_time, t.Time.min())
    for t in ans:
        t.Time -= min_time
    for t in ans[:2]:
        t.Load = t.Load.astype(np.uint32)
        
    table_name = ['Cycles', 'Instructions', 'Net_dev_xmit', 'Netif_rx']
    return dict(zip(table_name, ans))

data = perf_data_preprocess('./aes_test.perf.data.txt.xz')

max_time = max(v.Time.max() for v in data.values())

记录隐私计算库ABY AES加密任务服务器端的perf data，调用方法为:
```bash
sudo perf record -e cycles,instructions,net:net_dev_xmit,net:netif_rx 
    -- ./aes_test -r 0 -n 1000 &&
sudo perf script | gzip aes_test.perf.data.txt.gz
```
由于时间难以对齐，客户端是直接启动的:
```bash
./aes_test -r 1 -n 1000
```

获得人类可读的perf.data，处理并画出随时间变化的直方图:
|进程名|PID[CPU]|时间|计数|事件|触发函数名|
|-|-|-|-|-|-|
(data内容略)

In [None]:
from matplotlib import pyplot as plt

BIN_SIZE = 50

t = data['Cycles']
num_c, bin_c = np.histogram(t.Time,
             np.linspace(0, max_time, BIN_SIZE),
             weights=t.Load)

t = data['Instructions']
num_i, bin_i = np.histogram(t.Time,
             np.linspace(0, max_time, BIN_SIZE),
             weights=t.Load)

t = data['Netif_rx']
num_rx, bin_rx = np.histogram(t.Time,
                    np.linspace(0, max_time, BIN_SIZE),
                    weights=t.Load)

t = data['Net_dev_xmit']
num_tx, bin_tx = np.histogram(t.Time,
                    np.linspace(0, max_time, BIN_SIZE),
                    weights=t.Load)

corr = np.correlate(num_i, num_c, 'full')

fig, axs = plt.subplots(4, sharex=True)
axs[0].stairs(num_c, bin_c, label='cycles')
axs[1].stairs(num_i, bin_c, label='instructions')
axs[2].stairs(num_rx, bin_c, label='rx(bytes)')
axs[3].stairs(num_tx, bin_c, label='tx(bytes)')

axs[3].set_xlabel('time(s)')
axs[0].set_title('aes_test(n=1000)')
for ax in axs:
    ax.get_yaxis().set_visible(False)
    ax.legend()
# plt.plot(corr)

In [None]:
plt.plot(num_rx[:-1]-num_rx[1:])

In [None]:
2103144/(max_time/1000)/1024/1024/1024

In [None]:
t = num_rx[num_rx>0]
plt.plot(t)

In [None]:
tt = num_c.argsort(kind='stable')
np.vstack((tt[-30:], num_c[tt[-30:]]))

发现与存在的问题：
1. 隐私计算任务在计算期间存在多次的网络传输，且网络传输总量很大（177MB，这才是1000次128bit数据的加密任务），传输分布比较均匀，网络传输在隐私计算任务中很重要
2. 计算过程中存在明显的停顿，例如程序启动后一段时间程序执行速度慢，第一次网络传输后，速度就变快了。再如0.3s与0.95s左右的短暂停顿。这说明隐私计算任务的执行速度可能受网络传输影响
3. 接受与发送数据的统计信息过于接近了，是不是监听事件名出错了
4. 使用本地回环网络，不能确定由于较差网络连接造成的速度减慢
5. 尝试采用互相关等简单数学方法探索周期数与传输数据之间的规律，但没有效果，
6. 5,浮点数溢出导致互相关无效果，采用修改后的函数，发现任务的传输与计算同时开始
7. 根据在情况，隐私子任务模型可能变更，可能修改成网络与计算子任务可并行，但会发生阻塞，也就是DAG任务

In [None]:
# 2488371.8888550415*22979.0/1e9/max_time
# 24015.0*1019769.3227149699/1e9/max_time
print(f'time(ms): {max_time/10000*1000}')

In [None]:
max_time

In [None]:
data['Instructions'].describe()

In [None]:
data['Net_dev_xmit'].describe()

In [None]:
from numpy import gcd
from numpy.typing import NDArray
from scipy.fft import fft, ifft
def _corr(s1: NDArray, s2: NDArray):
    # PHAT corr
    l1, l2 = len(s1), len(s2)
    return np.fft.fftshift(np.abs(ifft(
        np.exp(1j*np.angle(fft(s1, l1+l2-1))*np.conj(
            np.exp(1j*np.angle(fft(s2, l1+l2-1))))))))