# Analyse results of benchmark

And because i love it, we will do it using SQL & duckdb, yeah!

In [2]:
import duckdb

In [8]:
duckdb.sql("""
SELECT 
    function,
    round(avg(time_in_s),4) as avg_time_in_s,
    round(avg(memory_in_mb), 4) as avg_memory_in_mb,
    round(max(memory_in_mb), 4) as max_memory_in_mb
FROM read_json('data/benchmark_results.json')
GROUP BY 1
ORDER BY max_memory_in_mb 
""")

┌─────────────────────────┬───────────────┬──────────────────┬──────────────────┐
│        function         │ avg_time_in_s │ avg_memory_in_mb │ max_memory_in_mb │
│         varchar         │    double     │      double      │      double      │
├─────────────────────────┼───────────────┼──────────────────┼──────────────────┤
│ dlt_flatten             │        5.0815 │          18.0576 │          77.6918 │
│ unpack_operator_flatten │        0.0294 │          18.6702 │          84.0011 │
│ manual_flatten          │        0.4781 │           18.671 │          84.0019 │
│ generator_flatten       │        0.4128 │          18.6827 │           84.012 │
│ flatdict_flatten        │        5.5198 │          42.1399 │         189.5238 │
│ pandas_flatten          │         2.864 │          81.4875 │         366.5389 │
└─────────────────────────┴───────────────┴──────────────────┴──────────────────┘

In [4]:
duckdb.sql("""
SELECT 
    function,
    round(avg(time_in_s),4) as avg_time_in_s,
    round(avg(memory_in_mb), 4) as avg_memory_in_mb,
    round(median(memory_in_mb), 4) as median_memory_in_mb
FROM read_json('data/benchmark_results.json')
GROUP BY 1
ORDER BY median_memory_in_mb 
""")

┌─────────────────────────┬───────────────┬──────────────────┬─────────────────────┐
│        function         │ avg_time_in_s │ avg_memory_in_mb │ median_memory_in_mb │
│         varchar         │    double     │      double      │       double        │
├─────────────────────────┼───────────────┼──────────────────┼─────────────────────┤
│ unpack_operator_flatten │        0.0294 │          18.6702 │              0.8409 │
│ manual_flatten          │        0.4781 │           18.671 │              0.8417 │
│ generator_flatten       │        0.4128 │          18.6827 │              0.8519 │
│ flatdict_flatten        │        5.5198 │          42.1399 │              1.9187 │
│ pandas_flatten          │         2.864 │          81.4875 │              3.6984 │
│ dlt_flatten             │        5.0815 │          18.0576 │               3.735 │
└─────────────────────────┴───────────────┴──────────────────┴─────────────────────┘

In [6]:
duckdb.sql("""
SELECT 
    num_players,
    function,
    min(time_in_s) OVER(PARTITION BY num_players ORDER BY time_in_s ) as min_time,
    max(time_in_s) OVER(PARTITION BY num_players ORDER BY time_in_s ) as max_time,
    avg(time_in_s) OVER(PARTITION BY num_players ORDER BY time_in_s ) as avg_time_in_s,
FROM read_json('data/benchmark_results.json')
ORDER BY max_time DESC;
""")

┌─────────────┬─────────────────────────┬─────────────┬──────────────┬───────────────────────┐
│ num_players │        function         │  min_time   │   max_time   │     avg_time_in_s     │
│    int64    │         varchar         │   double    │    double    │        double         │
├─────────────┼─────────────────────────┼─────────────┼──────────────┼───────────────────────┤
│      100000 │ flatdict_flatten        │ 0.132087625 │  24.74514025 │    10.761152715166666 │
│      100000 │ dlt_flatten             │ 0.132087625 │ 22.509564916 │          7.9643552082 │
│      100000 │ pandas_flatten          │ 0.132087625 │ 13.189360875 │         4.32805278125 │
│       10000 │ flatdict_flatten        │ 0.013400916 │  2.568076459 │    1.0571317568333332 │
│       10000 │ dlt_flatten             │ 0.013400916 │  2.316876333 │          0.7549428164 │
│      100000 │ manual_flatten          │ 0.132087625 │  2.142865917 │    1.3742834166666666 │
│      100000 │ generator_flatten       │ 0.132087

---

# Analyse the loop style

In [15]:
duckdb.sql("""
SELECT 
    function,
    round(avg(time_in_s),4) as avg_time_in_s,
    round(median(time_in_s),4) as median_time_in_s,
    round(avg(memory_in_mb), 4) as avg_memory_in_mb,
    round(median(memory_in_mb), 4) as median_memory

FROM read_json('data/compare_loops_results.json')
GROUP BY 1
ORDER BY avg_time_in_s 
""")

┌─────────────────────────────────────┬───────────────┬──────────────────┬──────────────────┬───────────────┐
│              function               │ avg_time_in_s │ median_time_in_s │ avg_memory_in_mb │ median_memory │
│               varchar               │    double     │      double      │      double      │    double     │
├─────────────────────────────────────┼───────────────┼──────────────────┼──────────────────┼───────────────┤
│ flatdict_flatten_gen_comprehension  │         0.119 │           0.0047 │          18.6758 │         0.847 │
│ flatdict_flatten_list_comprehension │        0.1341 │           0.0055 │          18.6754 │        0.8465 │
│ flatdict_flatten                    │        5.1494 │           0.2287 │          42.1424 │        1.9194 │
└─────────────────────────────────────┴───────────────┴──────────────────┴──────────────────┴───────────────┘