## Hardware Details
[GCP](https://cloud.google.com/) VM: [n1-highmem-16](https://cloud.google.com/compute/docs/machine-types#n1_machine_types) (16 vCPUs, 104 GB memory)

In [1]:
cat(system("lscpu", intern=TRUE), sep='\n')

Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                16
On-line CPU(s) list:   0-15
Thread(s) per core:    2
Core(s) per socket:    8
Socket(s):             1
NUMA node(s):          1
Vendor ID:             GenuineIntel
CPU family:            6
Model:                 63
Model name:            Intel(R) Xeon(R) CPU @ 2.30GHz
Stepping:              0
CPU MHz:               2300.000
BogoMIPS:              4600.00
Hypervisor vendor:     KVM
Virtualization type:   full
L1d cache:             32K
L1i cache:             32K
L2 cache:              256K
L3 cache:              46080K
NUMA node0 CPU(s):     0-15
Flags:                 fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc eagerfpu pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hyperviso

In [2]:
cat(system("cat /proc/meminfo | head -n1", intern=TRUE), sep='\n')

MemTotal:       107091244 kB


In [3]:
R.version

               _                           
platform       x86_64-redhat-linux-gnu     
arch           x86_64                      
os             linux-gnu                   
system         x86_64, linux-gnu           
status                                     
major          3                           
minor          6.0                         
year           2019                        
month          04                          
day            26                          
svn rev        76424                       
language       R                           
version.string R version 3.6.0 (2019-04-26)
nickname       Planting of a Tree          

## Basic functions

In [4]:
library(microbenchmark)
library(dplyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [5]:
createTable <- function(rowCount) {
    gc()
    data.frame(
        bucket = factor(
            sample(1:26^2, rowCount, replace = TRUE),
            levels = 1:26^2,
            labels = apply(expand.grid(letters, letters), FUN = paste, MARGIN = 1, collapse = '')),
        qty = sample(1:100, rowCount, replace = TRUE),
        risk = sample(1:10, rowCount, replace = TRUE),
        weight = runif(rowCount, 0, 2)
    )
}

In [6]:
executeQuery <- function(t) {
    t %>%
        group_by(bucket) %>%
        summarise(
            NR = n(),
            TOTAL_QTY = sum(qty), AVG_QTY = mean(qty),
            TOTAL_RISK = sum(risk), AVG_RISK = mean(risk),
            WEIGHTED_QTY = weighted.mean(qty, weight),
            WEIGHTED_RISK = weighted.mean(risk, weight)
        )
}

## 10k

In [7]:
t <- createTable(10 * 1000)

In [8]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),12.15123,12.48152,14.16519,14.38958,14.82958,19.84698,100


## 100k

In [9]:
rm(t)
t <- createTable(100 * 1000)

In [10]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),24.70136,26.44561,29.08584,27.66485,28.81909,44.70791,100


## 1M

In [11]:
rm(t)
t <- createTable(1000 * 1000)

In [12]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),149.2304,165.0563,182.0598,185.3958,190.0563,251.8039,100


## 10M

In [13]:
rm(t)
t <- createTable(10 * 1000 * 1000)

In [14]:
summary(microbenchmark(executeQuery(t), times = 100))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),2.000388,2.140791,2.192599,2.197837,2.23085,2.414765,100


# 100M
We execute the tests ten times only!

In [15]:
rm(t)
t <- createTable(100 * 1000 * 1000)

In [16]:
summary(microbenchmark(executeQuery(t), times = 10))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),23.98742,24.11272,24.33816,24.27295,24.36662,25.45767,10


## 1B
We execute the tests ten times only!

In [17]:
rm(t)
t <- createTable(1000 * 1000 * 1000)

In [18]:
summary(microbenchmark(executeQuery(t), times = 10))

expr,min,lq,mean,median,uq,max,neval
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
executeQuery(t),240.7534,241.226,242.945,241.7437,242.8427,250.8354,10
