## Hardware Details
[GCP](https://cloud.google.com/) VM: [n1-highmem-16](https://cloud.google.com/compute/docs/machine-types#n1_machine_types) (16 vCPUs, 104 GB memory)

In [1]:
cat(system("lscpu", intern=TRUE), sep='\n')

Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                16
On-line CPU(s) list:   0-15
Thread(s) per core:    2
Core(s) per socket:    8
Socket(s):             1
NUMA node(s):          1
Vendor ID:             GenuineIntel
CPU family:            6
Model:                 63
Model name:            Intel(R) Xeon(R) CPU @ 2.30GHz
Stepping:              0
CPU MHz:               2300.000
BogoMIPS:              4600.00
Hypervisor vendor:     KVM
Virtualization type:   full
L1d cache:             32K
L1i cache:             32K
L2 cache:              256K
L3 cache:              46080K
NUMA node0 CPU(s):     0-15
Flags:                 fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc eagerfpu pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hyperviso

In [2]:
cat(system("cat /proc/meminfo | head -n1", intern=TRUE), sep='\n')

MemTotal:       107091244 kB


## Basic functions

In [3]:
library(data.table)
library(stringi)
library(microbenchmark)

In [4]:
createTable <- function(rowCount) {
    gc()
    data.table(
    bucket = stri_rand_strings(rowCount, 2, pattern = "[a-z]"),
    qty = sample(1:100, rowCount, replace = TRUE),
    risk = sample(1:10, rowCount, replace = TRUE),
    weight = runif(rowCount, 0, 2)
    )
}

In [5]:
executeQuery <- function(t) {
    t[, .(
    NR = .N,
    TOTAL_QTY = sum(qty), AVG_QTY = mean(qty),
    TOTAL_RISK = sum(risk), AVG_RISK = mean(risk),
    WEIGHTED_QTY = weighted.mean(qty, weight),
    WEIGHTED_RISK = weighted.mean(risk, weight)
    ), by = bucket]
}

## 10k

In [6]:
t <- createTable(10 * 1000)

In [7]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),10.70515,10.95544,11.96342,11.30507,11.77354,19.66042,100


## 100k

In [8]:
t <- createTable(100 * 1000)

In [9]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),17.46591,18.02288,19.91307,18.43539,18.93765,31.8212,100


## 1M

In [10]:
t <- createTable(1000 * 1000)

In [11]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),107.3113,115.1284,121.1965,118.0327,123.2412,191.4626,100


## 10M

In [12]:
t <- createTable(10 * 1000 * 1000)

In [13]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),1.015818,1.028831,1.054474,1.038424,1.052872,1.332118,100


# 100M
We execute the tests ten times only!

In [14]:
t <- createTable(100 * 1000 * 1000)

In [15]:
summary(microbenchmark(executeQuery(t), times = 10))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),11.57054,11.73743,12.1193,11.85043,12.10259,14.44422,10


## 1B
We execute the tests ten times only!

In [16]:
t <- createTable(1000 * 1000 * 1000)

ERROR: Error: cannot allocate vector of size 3.7 Gb


In [None]:
summary(microbenchmark(executeQuery(t), times = 10))