# Analyse results of benchmark

And because i love it, we will do it using SQL & duckdb, yeah!

In [2]:
import duckdb

In [3]:
duckdb.sql("""
SELECT 
    function,
    round(avg(time_in_s),4) as avg_time_in_s,
    round(avg(memory_in_mb), 4) as avg_memory_in_mb
FROM read_json('data/benchmark_results.json')
GROUP BY 1
ORDER BY avg_time_in_s 
""")

┌───────────────────┬───────────────┬──────────────────┐
│     function      │ avg_time_in_s │ avg_memory_in_mb │
│      varchar      │    double     │      double      │
├───────────────────┼───────────────┼──────────────────┤
│ manual_flatten    │         0.042 │          18.6702 │
│ generator_flatten │        0.3768 │          17.2552 │
│ pandas_flatten    │        2.8282 │          81.4875 │
│ dlt_flatten       │        5.0465 │          18.0576 │
│ flatdict_flatten  │         5.568 │          42.1465 │
└───────────────────┴───────────────┴──────────────────┘

In [5]:
duckdb.sql("""
SELECT 
    function,
    round(avg(time_in_s),4) as avg_time_in_s,
    round(avg(memory_in_mb), 4) as avg_memory_in_mb,
    round(median(memory_in_mb), 4) as median_memory_in_mb
FROM read_json('data/benchmark_results.json')
GROUP BY 1
ORDER BY median_memory_in_mb 
""")

┌───────────────────┬───────────────┬──────────────────┬─────────────────────┐
│     function      │ avg_time_in_s │ avg_memory_in_mb │ median_memory_in_mb │
│      varchar      │    double     │      double      │       double        │
├───────────────────┼───────────────┼──────────────────┼─────────────────────┤
│ generator_flatten │        0.3768 │          17.2552 │              0.7817 │
│ manual_flatten    │         0.042 │          18.6702 │              0.8409 │
│ flatdict_flatten  │         5.568 │          42.1465 │              1.9252 │
│ pandas_flatten    │        2.8282 │          81.4875 │              3.6983 │
│ dlt_flatten       │        5.0465 │          18.0576 │              3.7351 │
└───────────────────┴───────────────┴──────────────────┴─────────────────────┘

In [10]:
duckdb.sql("""
SELECT 
    num_players,
    function,
    min(time_in_s) OVER(PARTITION BY num_players ORDER BY time_in_s ) as min_time,
    max(time_in_s) OVER(PARTITION BY num_players ORDER BY time_in_s ) as max_time,
    avg(time_in_s) OVER(PARTITION BY num_players ORDER BY time_in_s ) as avg_time_in_s,
FROM read_json('data/benchmark_results.json')
ORDER BY avg_time_in_s;
""")

┌─────────────┬───────────────────┬─────────────┬──────────────┬────────────────────────┐
│ num_players │     function      │  min_time   │   max_time   │     avg_time_in_s      │
│    int64    │      varchar      │   double    │    double    │         double         │
├─────────────┼───────────────────┼─────────────┼──────────────┼────────────────────────┤
│          23 │ manual_flatten    │  2.1334e-05 │   2.1334e-05 │             2.1334e-05 │
│          23 │ flatdict_flatten  │  2.1334e-05 │   0.00010425 │             6.2792e-05 │
│         100 │ manual_flatten    │  9.3084e-05 │   9.3084e-05 │             9.3084e-05 │
│          23 │ generator_flatten │  2.1334e-05 │  0.000502708 │ 0.00020943066666666667 │
│         100 │ flatdict_flatten  │  9.3084e-05 │  0.000417708 │ 0.00025539599999999997 │
│         100 │ generator_flatten │  9.3084e-05 │   0.00169725 │            0.000736014 │
│        1000 │ manual_flatten    │ 0.001213833 │  0.001213833 │            0.001213833 │
│         

---

# Analyse the loop style

In [15]:
duckdb.sql("""
SELECT 
    function,
    round(avg(time_in_s),4) as avg_time_in_s,
    round(median(time_in_s),4) as median_time_in_s,
    round(avg(memory_in_mb), 4) as avg_memory_in_mb,
    round(median(memory_in_mb), 4) as median_memory

FROM read_json('data/compare_loops_results.json')
GROUP BY 1
ORDER BY avg_time_in_s 
""")

┌─────────────────────────────────────┬───────────────┬──────────────────┬──────────────────┬───────────────┐
│              function               │ avg_time_in_s │ median_time_in_s │ avg_memory_in_mb │ median_memory │
│               varchar               │    double     │      double      │      double      │    double     │
├─────────────────────────────────────┼───────────────┼──────────────────┼──────────────────┼───────────────┤
│ flatdict_flatten_gen_comprehension  │         0.119 │           0.0047 │          18.6758 │         0.847 │
│ flatdict_flatten_list_comprehension │        0.1341 │           0.0055 │          18.6754 │        0.8465 │
│ flatdict_flatten                    │        5.1494 │           0.2287 │          42.1424 │        1.9194 │
└─────────────────────────────────────┴───────────────┴──────────────────┴──────────────────┴───────────────┘