In [None]:
%matplotlib inline
import matplotlib
import numpy as np
from mpmath import mp
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import host_subplot
import mpl_toolkits.axisartist as AA
import math
import warnings
from ipywidgets import interact, interactive, fixed, interact_manual
plt.rcParams['figure.dpi'] = 180
plt.rcParams['figure.figsize'] = [12.0, 8.0]
plt.rcParams['text.latex.unicode'] = True
plt.rcParams['text.usetex'] = True
plt.rcParams['mathtext.fontset'] = 'stix'
plt.rcParams['font.family'] = 'STIXGeneral'

In [None]:
log_sizes = np.array([i for i in range(1,35)])
sizes = np.array([2**i for i in range(1,35)])

In [None]:
# Register file size is speculative
# Defaults are for AWS EC2 c5.2xlarge, an "Intel(R) Xeon(R) Platinum 8124M CPU @ 3.00GHz"
def init_plot(reg_file=128 * 8, l1=32*1024, l2=1024**2, l3=24 * 1024**2, ram=16 * 1024**3, disk=64 * 1024**3):
    plt.xlabel('Size $\\left[\\log_2 n\\right]$')
    plt.xscale('log')
    plt.xticks(sizes, [str(n) for n in log_sizes])
    plt.gca().xaxis.set_minor_locator(plt.NullLocator())
    
    def mem_line(size, label):
        plt.axvline(size / 32, color='grey', linestyle='--')
        plt.text(size / 32, 100, label)
    
    mem_line(reg_file, "REG")
    mem_line(l1, "L1")
    mem_line(l2, "L2")
    mem_line(l3, "L3")
    mem_line(ram, "RAM")
    mem_line(disk, "SWAP")

    plt.yscale('log')
    plt.ylabel('Time $\\left[\\mathtt{sec}\\right]$')

In [None]:
def series(data, color='tab:blue'):
    plt.plot(sizes[:len(data)], data, color=color, marker='.')

## Benchmark c5.2xlarge

On AWS EC2 instance type `c5.2xlarge`. Root drive size increased to 256GB and a 64GB swapfile is added. Using `--allocation heap`.

In [None]:
fft = np.fromstring("""
1       0.000005542
2       0.000004446
3       0.000004638
4       0.0000079
5       0.000009583
6       0.000020141
7       0.00004044
8       0.000072858
9       0.000145678
10      0.000409865
11      0.0001865
12      0.000293731
13      0.000634836
14      0.001284636
15      0.00258338
16      0.005211545
17      0.010685081
18      0.021668024
19      0.044215935
20      0.096148812
21      0.204491085
22      0.496917142
23      0.924625169
24      1.9200900170000001
25      4.173365369
26      8.299160712
27      18.231577741
28      35.409767256
29      440.499620014
30      4744.720864158
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
fft_rec = np.fromstring("""
1       0.000009126
2       0.000007478
3       0.000007798
4       0.000008998
5       0.000012303
6       0.00002079
7       0.000042862
8       0.000087762
9       0.000189393
10      0.000413419
11      0.000908061
12      0.002005121
13      0.004342772
14      0.009419408
15      0.020323254
16      0.043684882
17      0.093593267
18      0.199308118
19      0.424677042
20      0.899421716
21      1.897257578
22      3.986821923
23      8.378155862
24      17.569708915
25      36.84961684
26      77.344462714
27      162.030825806
28      338.051808485
29      5298.041942403
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
fft_zexe = np.fromstring("""
1       0.000001776
2       0.000000975
3       0.000002353
4       0.000046696
5       0.000028108
6       0.000041613
7       0.000057104
8       0.00010488
9       0.000201595
10      0.00041062
11      0.000805603
12      0.00182214
13      0.003414361
14      0.007236913
15      0.016095236
16      0.02642837
17      0.05192269
18      0.106633829
19      0.210918476
20      0.431393236
21      0.853075165
22      1.6934878389999999
23      3.320207399
24      6.991504787
25      16.408369934
26      34.369542877
27      71.426919205
28      924.756653007
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
fft_zexe4 = np.fromstring("""
1       0.000002307
2       0.000000987
3       0.000031233
4       0.000018194
5       0.000020868
6       0.000034673
7       0.00004588
8       0.000087116
9       0.000123568
10      0.000254123
11      0.000483919
12      0.000995008
13      0.002030165
14      0.004256531
15      0.008891671
16      0.018062251
17      0.038498366
18      0.08012556
19      0.167480744
20      0.351965991
21      0.730701499
22      1.520455885
23      3.136694241
24      6.481154143
25      13.478910881000001
26      27.592663638
27      69.620524261
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
fft_zexe2 = np.fromstring("""
1       0.000001511
2       0.000045553
3       0.000016016
4       0.000019259
5       0.00001857
6       0.000031509
7       0.000045211
8       0.000103274
9       0.000175587
10      0.000370783
11      0.000768036
12      0.001642119
13      0.003336863
14      0.007002769
15      0.014819617
16      0.030811886
17      0.065517493
18      0.137940412
19      0.293378682
20      0.620789557
21      1.288642621
22      2.590663931
23      5.496503293
24      11.074616963
25      22.988761963
26      47.066935046
27      113.235295401
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
fft_zexe1 = np.fromstring("""
1       0.000002628
2       0.000001987
3       0.000003241
4       0.00000618
5       0.000012376
6       0.000028478
7       0.000059008
8       0.000122222
9       0.000272142
10      0.000571271
11      0.001224632
12      0.002650468
13      0.00565461
14      0.012066601
15      0.025778269
16      0.054527995
17      0.115925208
18      0.246241083
19      0.524339766
20      1.101234196
21      2.298312957
22      4.8182941889999995
23      10.005232897
24      20.747169828
25      43.123351244
26      89.637718552
27      272.380300462
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
fftr = np.fromstring("""
1       0.000008734
2       0.000007463
3       0.000007432
4       0.000008757
5       0.000011968
6       0.000020103
7       0.000042482
8       0.000087487
9       0.00019032
10      0.000861135
11      0.000506981
12      0.000748031
13      0.001069595
14      0.001629936
15      0.003228023
16      0.006696491
17      0.013680891
18      0.027710505
19      0.057679885
20      0.121986071
21      0.253739433
22      0.564594692
23      1.142924603
24      2.367203099
25      5.057864464
26      10.074481558
27      22.237774659
28      43.521765592
29      431.088060496
30      5069.400916583
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
fft_zexe

In [None]:
init_plot()
# series(fft, 'lightgrey')
series(fftr, 'blue')
# series(fft_rec, 'orange')
series(fft_zexe, 'red')
#series(fft_zexe4, 'orangered')
#series(fft_zexe2, 'darkorange')
#series(fft_zexe1, 'gold')

In [None]:
fft_zexe / fftr[:-2]

In [None]:
x = np.array([1, 2, 4, 8])
y = np.array([272.380300462, 113.235295401, 69.620524261, 71.426919205])
y = np.array([89.637718552, 47.066935046, 27.592663638, 34.369542877])
ideal = y[0] / x
print(ideal)
plt.ylabel('Parallelization efficiency $\\left[\\frac{t_1}{n \\cdot t_n}\\right]$')
plt.xlabel('Threads $\\left[n\\right]$')
plt.ylim((0, 1.3))

plt.plot(x, ideal / y, marker='.', color='red')

In [None]:
xo = np.array([1, 2, 3, 4, 5,  6, 7, 8])
yo = np.array([200.17979819, 102.906475979, 71.403303, 54.178760671, 50.905617932, 48.08577293, 45.611340696, 43.444763733])
idealo = yo[0] / xo
print(idealo)
plt.ylabel('Parallelization efficiency $\\left[\\frac{t_1}{n \\cdot t_n}\\right]$')
plt.xlabel('Threads $\\left[n\\right]$')
plt.ylim((0, 1.1))

plt.plot(x, ideal / y, marker='.', color='red')
plt.plot(xo, idealo / yo, marker='.', color='blue')

print(idealo / yo)

## Benchmark c5.2xlarge (64MiB RAM)

On AWS EC2 instance type `c5.2xlarge`. Root drive size increased to 256GiB and a 64GiB swapfile is added.

```
L1 Instruction-Cache: (32 KiB, 8-way associativity, direct-mapped)
L1 Data-Cache: (32 KiB, 8-way associativity, direct-mapped)
L2 Unified-Cache: (1024 KiB, 16-way associativity, direct-mapped)
L3 Unified-Cache: (24 MiB, 11-way associativity, hash-based-mapping)
```

Memory is restricted to 64MiB RAM using cgroups:

```
sudo cgcreate -t $USER:$USER -a $USER:$USER -g memory:limited
echo 67108864 > /sys/fs/cgroup/memory/limited/memory.limit_in_bytes
cgexec -g memory:limited ./fft
```

In [None]:
fft_heap = np.fromstring("""
1	0.00034372
2	0.000039486
3	0.000035912
4	0.000037605
5	0.000041504
6	0.000042154
7	0.000055761
8	0.000055802
9	0.000086509
10	0.000104544
11	0.000249147
12	0.000277132
13	0.000917944
14	0.001096818
15	0.00341159
16	0.004738028
17	0.015229078
18	0.020695995
19	0.070773651
20	0.091959886
21	30.928260041
22	40.091185241
23	163.181767742
24	184.1793349
25	832.334289622
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
fft_mmap = np.fromstring("""
1	0.000327282
2	0.000035651
3	0.000028616
4	0.000040656
5	0.000043662
6	0.000036332
7	0.000054138
8	0.000056564
9	0.000078225
10	0.000108604
11	0.000244958
12	0.000287139
13	0.000859705
14	0.001116579
15	0.003552164
16	0.004715079
17	0.015413409
18	0.021136953
19	0.073038566
20	0.092504724
21	76.226779587
22	68.887253346
23	726.08383654
24	527.209818911
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
transpose_heap = np.fromstring("""
1	0.000000407
2	0.000000201
3	0.000000508
4	0.000000106
5	0.000000378
6	0.000000184
7	0.000004731
8	0.000000604
9	0.000007464
10	0.000001546
11	0.000031108
12	0.000005326
13	0.00012615
14	0.000028176
15	0.000708114
16	0.000138124
17	0.003178672
18	0.000945967
19	0.015287759
20	0.005595849
21	10.570897667
22	11.281882518
23	48.99322635
24	44.853152764
25	685.421398633
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
transpose_mmap = np.fromstring("""
1	0.000001035
2	0.000000202
3	0.000000511
4	0.000000152
5	0.000000656
6	0.000000288
7	0.000007064
8	0.000000804
9	0.000011185
10	0.000002899
11	0.000031892
12	0.00000538
13	0.000134255
14	0.000028246
15	0.000710412
16	0.000142058
17	0.003230439
18	0.001012398
19	0.015829714
20	0.005500206
21	10.533343567
22	11.358831156
23	48.803206975
24	44.780711319
25	186.202560067
26	145.289483772
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
init_plot(ram=64*1024**2)
series(fft_heap, 'tab:red')
series(fft_mmap, 'tab:blue')
series(transpose_heap, 'tab:orange')
series(transpose_mmap, 'tab:cyan')

## Benchmark single thread

c5.2xlarge

In [None]:
fft = np.fromstring("""
1	0.000010837
2	0.00003163
3	0.000021725
4	0.000027168
5	0.000025975
6	0.000034325
7	0.000043394
8	0.000069395
9	0.00013186
10	0.000252507
11	0.000502172
12	0.001078727
13	0.00234277
14	0.00504515
15	0.010528185
16	0.022451856
17	0.046697365
18	0.099328349
19	0.207932491
20	0.444358145
21	0.93529772
22	1.964368904
23	4.132257624
24	8.600371875
25	18.178107202
26	37.357505465
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
fft_iterative = np.fromstring("""
1	0.000005481
2	0.000004502
3	0.00000692
4	0.000008215
5	0.000011034
6	0.000020269
7	0.000032115
8	0.000056194
9	0.000098589
10	0.000203465
11	0.000447601
12	0.000986478
13	0.002059617
14	0.004453699
15	0.009803471
16	0.022309177
17	0.047588306
18	0.106476312
19	0.26432274
20	0.60108076
21	1.2733614389999999
22	2.688171277
23	5.689161517
24	12.132268398
25	25.850465222
26	55.871878074
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
fft_depth_first = np.fromstring("""
1	0.000005586
2	0.000005259
3	0.000009514
4	0.000010033
5	0.000013038
6	0.000027076
7	0.000037728
8	0.000062199
9	0.000136259
10	0.000282032
11	0.000628744
12	0.001382235
13	0.003047723
14	0.006714292
15	0.014992353
16	0.033965664
17	0.073531157
18	0.170479579
19	0.409346494
20	1.049937156
21	2.444037146
22	5.505871476
23	12.131428218
24	26.768131456
25	61.079266758
26	133.57830445
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
fft_recursive = np.fromstring("""
1	0.000005541
2	0.000004514
3	0.000004572
4	0.000005506
5	0.000008072
6	0.000014245
7	0.000031183
8	0.00006486
9	0.000143123
10	0.000324709
11	0.000689698
12	0.00150638
13	0.003293801
14	0.007106039
15	0.01528987
16	0.032758035
17	0.069945614
18	0.148304918
19	0.315367176
20	0.670398505
21	1.417594193
22	2.980605325
23	6.24611864
24	13.096406455
25	27.498664066
26	57.386700655
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
init_plot()
series(fft, 'black')
series(fft_iterative, 'tab:blue')
series(fft_depth_first, 'tab:orange')
series(fft_recursive, 'tab:pink')
series(fft2_heap, 'tab:cyan')

## Memory access pattern

In [None]:
def fft_df(values, size, offset, stride, loop):
    if size == 1:
        values += [offset]
    else:
        if stride == loop and loop < 128:
            fft_df(values, size // 2, offset, 2 * stride, 2 * loop)
        else:
            fft_df(values, size // 2, offset, 2 * stride, loop)
            fft_df(values, size // 2, offset + stride, 2 * stride, loop)
        for i in range(size // 2):
            for j in range(loop):
                values += [offset + 2 * i * stride + j]
                values += [offset + 2 * i * stride + j + stride]
    return values

In [None]:
a = fft_df([], 16384, 0, 1, 1)

In [None]:
plt.plot(a, linestyle='', marker='.')

In [None]:
2**15 / 64 / 8

## Threads

In [None]:
fft_threads = np.fromstring("""
24	8.602388099
24	4.433958019
24	3.055124852
24	2.35013958
24	2.18961738
24	2.051058217
24	1.933472554
24	1.826348783
24	1.8278633229999999
24	1.837789269
""", sep=' ').reshape((-1, 2))[:,1]

In [None]:
threads = np.array(range(1,20))
plt.xticks(threads, [str(n) for n in threads])
plt.axvline(4, color='grey', linestyle='--')
plt.text(4, 1, 'Cores')
plt.axvline(8, color='grey', linestyle='--')
plt.text(8, 1, 'Hyper threads')
plt.plot(threads[:len(fft_threads)], fft_threads * threads[:len(fft_threads)] / fft_threads[0], marker = '.')

In [None]:
threads[:len(fft_threads)]

In [None]:
2.99


$69.7 billion

In [None]:
104 / 2.3

In [None]:
fft1 = np.fromstring("""

""", sep=' ').reshape((-1, 2))[:,1]
fft8 = np.fromstring("""
1       0.000008426
2       0.000008893
3       0.000010214
4       0.000012597
5       0.000019022
6       0.000026637
7       0.000050391
8       0.000094944
9       0.000201205
10      0.000137291
11      0.00019364
12      0.000338354
13      0.000708646
14      0.001616905
15      0.003150218
16      0.006628072
17      0.01326452
18      0.027186793
19      0.056188265
20      0.120694244
21      0.255611657
22      0.557198205
23      1.135928568
24      2.363512228
25      5.028374935
26      10.039596217
27      22.10682207
28      43.368996506
""", sep=' ').reshape((-1, 2))[:,1]