## Hardware Details
[GCP](https://cloud.google.com/) VM: [n1-highmem-16](https://cloud.google.com/compute/docs/machine-types#n1_machine_types) (16 vCPUs, 104 GB memory)

In [1]:
cat(system("lscpu", intern=TRUE), sep='\n')

Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                16
On-line CPU(s) list:   0-15
Thread(s) per core:    2
Core(s) per socket:    8
Socket(s):             1
NUMA node(s):          1
Vendor ID:             GenuineIntel
CPU family:            6
Model:                 63
Model name:            Intel(R) Xeon(R) CPU @ 2.30GHz
Stepping:              0
CPU MHz:               2300.000
BogoMIPS:              4600.00
Hypervisor vendor:     KVM
Virtualization type:   full
L1d cache:             32K
L1i cache:             32K
L2 cache:              256K
L3 cache:              46080K
NUMA node0 CPU(s):     0-15
Flags:                 fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc eagerfpu pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hyperviso

In [2]:
cat(system("cat /proc/meminfo | head -n1", intern=TRUE), sep='\n')

MemTotal:       107091244 kB


## Basic functions

In [3]:
library(data.table)
library(stringi)
library(microbenchmark)
library(bit64)

Loading required package: bit
Attaching package bit
package:bit (c) 2008-2012 Jens Oehlschlaegel (GPL-2)
creators: bit bitwhich
coercion: as.logical as.integer as.bit as.bitwhich which
operator: ! & | xor != ==
querying: print length any all min max range sum summary
bit access: length<- [ [<- [[ [[<-
for more help type ?bit

Attaching package: ‘bit’

The following object is masked from ‘package:data.table’:

    setattr

The following object is masked from ‘package:base’:

    xor

Attaching package bit64
package:bit64 (c) 2011-2012 Jens Oehlschlaegel
creators: integer64 seq :
coercion: as.integer64 as.vector as.logical as.integer as.double as.character as.bin
logical operator: ! & | xor != == < <= >= >
arithmetic operator: + - * / %/% %% ^
math: sign abs sqrt log log2 log10
math: floor ceiling trunc round
querying: is.integer64 is.vector [is.atomic} [length] format print str
values: is.na is.nan is.finite is.infinite
aggregation: any all min max range sum prod
cumulation: diff cummin

In [4]:
createTable <- function(rowCount) {
    gc()
    data.table(
        bucket = factor(
            sample(1:26^2, rowCount, replace = TRUE),
            levels = 1:26^2,
            labels = apply(expand.grid(letters, letters), FUN = paste, MARGIN = 1, collapse = '')),
        qty = as.integer64(sample(1:100, rowCount, replace = TRUE)),
        risk = as.integer64(sample(1:10, rowCount, replace = TRUE)),
        weight = runif(rowCount, 0, 2)
    )
}

In [5]:
executeQuery <- function(t) {
    t[, .(
        NR = .N,
        TOTAL_QTY = sum(qty), AVG_QTY = mean(qty),
        TOTAL_RISK = sum(risk), AVG_RISK = mean(risk),
        WEIGHTED_QTY = weighted.mean(qty, weight),
        WEIGHTED_RISK = weighted.mean(risk, weight)
    ), by = bucket]
}

In [6]:
setDTthreads(threads = 1, restore_after_fork = FALSE)

## 10k

In [7]:
t <- createTable(10 * 1000)

In [8]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),95.53203,102.6646,104.8992,104.5066,106.0698,168.6395,100


## 100k

In [9]:
t <- createTable(100 * 1000)

In [10]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),150.5477,157.2284,160.0232,159.4093,161.2983,228.9737,100


## 1M

In [11]:
t <- createTable(1000 * 1000)

In [12]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),685.9177,697.522,703.3271,700.578,706.1533,766.8386,100


## 10M

In [13]:
t <- createTable(10 * 1000 * 1000)

In [14]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),5.866679,5.894065,5.972583,5.912521,5.934756,9.375695,100


# 100M
We execute the tests ten times only!

In [15]:
t <- createTable(100 * 1000 * 1000)

In [16]:
summary(microbenchmark(executeQuery(t), times = 10))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),58.70234,58.843,59.47993,59.13116,59.26086,61.46067,10


## 1B
We execute the tests ten times only!

In [17]:
t <- createTable(1000 * 1000 * 1000)

In [19]:
summary(microbenchmark(executeQuery(t), times = 10))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),594.6486,596.3934,607.8454,598.3287,603.7581,681.8925,10
