## Hardware Details
[GCP](https://cloud.google.com/) VM: [n1-highmem-16](https://cloud.google.com/compute/docs/machine-types#n1_machine_types) (16 vCPUs, 104 GB memory)

In [1]:
cat(system("lscpu", intern=TRUE), sep='\n')

Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                16
On-line CPU(s) list:   0-15
Thread(s) per core:    2
Core(s) per socket:    8
Socket(s):             1
NUMA node(s):          1
Vendor ID:             GenuineIntel
CPU family:            6
Model:                 63
Model name:            Intel(R) Xeon(R) CPU @ 2.30GHz
Stepping:              0
CPU MHz:               2300.000
BogoMIPS:              4600.00
Hypervisor vendor:     KVM
Virtualization type:   full
L1d cache:             32K
L1i cache:             32K
L2 cache:              256K
L3 cache:              46080K
NUMA node0 CPU(s):     0-15
Flags:                 fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc eagerfpu pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hyperviso

In [2]:
cat(system("cat /proc/meminfo | head -n1", intern=TRUE), sep='\n')

MemTotal:       107091244 kB


## Basic functions

In [3]:
library(data.table)
library(stringi)
library(microbenchmark)

In [4]:
createTable <- function(rowCount) {
    gc()
    data.table(
        bucket = factor(
            sample(1:26^2, rowCount, replace = TRUE),
            levels = 1:26^2,
            labels = apply(expand.grid(letters, letters), FUN = paste, MARGIN = 1, collapse = '')),
        qty = sample(1:100, rowCount, replace = TRUE),
        risk = sample(1:10, rowCount, replace = TRUE),
        weight = runif(rowCount, 0, 2)
    )
}

In [5]:
executeQuery <- function(t) {
    t[, .(
        NR = .N,
        TOTAL_QTY = sum(qty), AVG_QTY = mean(qty),
        TOTAL_RISK = sum(risk), AVG_RISK = mean(risk),
        WEIGHTED_QTY = weighted.mean(qty, weight),
        WEIGHTED_RISK = weighted.mean(risk, weight)
    ), by = bucket]
}

In [6]:
setDTthreads(threads = 1, restore_after_fork = FALSE)

## 10k

In [7]:
t <- createTable(10 * 1000)

In [8]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),10.19783,10.36306,11.26613,10.47613,10.98221,19.3714,100


## 100k

In [9]:
t <- createTable(100 * 1000)

In [10]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),18.19039,18.5278,20.19558,18.71863,19.05117,33.26562,100


## 1M

In [11]:
t <- createTable(1000 * 1000)

In [12]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),103.5477,109.2683,112.9028,111.1418,113.3463,165.32,100


## 10M

In [13]:
t <- createTable(10 * 1000 * 1000)

In [14]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),949.9135,969.7432,1004.169,984.649,1010.516,1357.781,100


# 100M
We execute the tests ten times only!

In [15]:
t <- createTable(100 * 1000 * 1000)

In [16]:
summary(microbenchmark(executeQuery(t), times = 10))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),9.936694,9.945228,10.24156,10.09912,10.23078,11.78579,10


## 1B
We execute the tests ten times only!

In [17]:
t <- createTable(1000 * 1000 * 1000)

In [18]:
summary(microbenchmark(executeQuery(t), times = 10))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),99.28312,99.64165,101.5662,100.4893,101.6806,110.4732,10
