In [2]:
:dep polars = {version = "0.40.0", features = ["lazy",]}
use polars::prelude::*;

In [3]:
let q = LazyCsvReader::new("src/20231206_deaths_final_2014_2022_state_month_sup.csv")
    .with_has_header(true)
    .finish()?;
let df = q.collect()?;
println!("{}", df);

shape: (40_032, 12)
┌──────┬───────┬──────────────┬──────────────┬───┬───────┬─────────────┬─────────────┬─────────────┐
│ Year ┆ Month ┆ Geography_Ty ┆ Strata       ┆ … ┆ Count ┆ Annotation_ ┆ Annotation_ ┆ Data_Revisi │
│ ---  ┆ ---   ┆ pe           ┆ ---          ┆   ┆ ---   ┆ Code        ┆ Desc        ┆ on_Date     │
│ i64  ┆ i64   ┆ ---          ┆ str          ┆   ┆ i64   ┆ ---         ┆ ---         ┆ ---         │
│      ┆       ┆ str          ┆              ┆   ┆       ┆ i64         ┆ str         ┆ str         │
╞══════╪═══════╪══════════════╪══════════════╪═══╪═══════╪═════════════╪═════════════╪═════════════╡


In [4]:
let filtered_df = df.clone().lazy()
    .select(&[col("Strata"), col("Strata_Name"), col("Year"), col("Month"), col("Count")]).collect()?;
println!("{}", filtered_df)

│ 2014 ┆ 1     ┆ Occurrence   ┆ Total        ┆ … ┆ 23896 ┆ null        ┆ null        ┆ 12/06/2023  │
│      ┆       ┆              ┆ Population   ┆   ┆       ┆             ┆             ┆             │
│ 2014 ┆ 1     ┆ Occurrence   ┆ Age          ┆ … ┆ 185   ┆ null        ┆ null        ┆ 12/06/2023  │
│ 2014 ┆ 1     ┆ Occurrence   ┆ Age          ┆ … ┆ 30    ┆ null        ┆ null        ┆ 12/06/2023  │
│ 2014 ┆ 1     ┆ Occurrence   ┆ Age          ┆ … ┆ 47    ┆ null        ┆ null        ┆ 12/06/2023  │
│ 2014 ┆ 1     ┆ Occurrence   ┆ Age          ┆ … ┆ 268   ┆ null        ┆ null        ┆ 12/06/2023  │
│ …    ┆ …     ┆ …            ┆ …            ┆ … ┆ …     ┆ …           ┆ …           ┆ …           │
│ 2022 ┆ 12    ┆ Residence    ┆ Race-Ethnici ┆ … ┆ null  ┆ 1           ┆ Cell        ┆ 12/06/2023  │
│      ┆       ┆              ┆ ty           ┆   ┆       ┆             ┆ suppressed  ┆             │
│      ┆       ┆              ┆              ┆   ┆       ┆             ┆ for small   ┆     

()

In [5]:
let further_filter = filtered_df.clone().lazy().filter(
    col("Strata")
    .gt_eq(lit("Total Population"))
    )
    .collect()?;
println!("{}", further_filter)

shape: (3_240, 5)


()

┌──────────────────┬──────────────────┬──────┬───────┬───────┐
│ Strata           ┆ Strata_Name      ┆ Year ┆ Month ┆ Count │
│ ---              ┆ ---              ┆ ---  ┆ ---   ┆ ---   │
│ str              ┆ str              ┆ i64  ┆ i64   ┆ i64   │
╞══════════════════╪══════════════════╪══════╪═══════╪═══════╡
│ Total Population ┆ Total Population ┆ 2014 ┆ 1     ┆ 23896 │
│ Total Population ┆ Total Population ┆ 2014 ┆ 1     ┆ 1158  │
│ Total Population ┆ Total Population ┆ 2014 ┆ 1     ┆ 5187  │
│ Total Population ┆ Total Population ┆ 2014 ┆ 1     ┆ 1506  │
│ Total Population ┆ Total Population ┆ 2014 ┆ 1     ┆ 831   │
│ …                ┆ …                ┆ …    ┆ …     ┆ …     │
│ Total Population ┆ Total Population ┆ 2022 ┆ 12    ┆ 459   │
│ Total Population ┆ Total Population ┆ 2022 ┆ 12    ┆ 399   │
│ Total Population ┆ Total Population ┆ 2022 ┆ 12    ┆ 906   │
│ Total Population ┆ Total Population ┆ 2022 ┆ 12    ┆ 1750  │
│ Total Population ┆ Total Population ┆ 2022 ┆ 12    ┆ 

In [10]:
let grouped_filter = further_filter.clone().lazy()
    .select([col("Year"), col("Month"), col("Count")])
    .group_by(["Year", "Month"])
    .agg(
        [col("*").sum().alias("Count")]
    )
    .collect()?;

println!("{:?}", grouped_filter)

shape: (108, 3)


()

┌──────┬───────┬────────┐
│ Year ┆ Month ┆ Count  │
│ ---  ┆ ---   ┆ ---    │
│ i64  ┆ i64   ┆ i64    │
╞══════╪═══════╪════════╡
│ 2021 ┆ 4     ┆ 83757  │
│ 2019 ┆ 9     ┆ 75933  │
│ 2015 ┆ 2     ┆ 79127  │
│ 2019 ┆ 10    ┆ 81756  │
│ 2020 ┆ 3     ┆ 91333  │
│ …    ┆ …     ┆ …      │
│ 2019 ┆ 6     ┆ 78765  │
│ 2016 ┆ 3     ┆ 86800  │
│ 2018 ┆ 6     ┆ 76746  │
│ 2015 ┆ 3     ┆ 83479  │
│ 2018 ┆ 1     ┆ 104670 │
└──────┴───────┴────────┘


In [13]:
let stats = grouped_filter.describe()?;

println!("{:?}", stats);

Error: no method named `describe` found for struct `polars::prelude::DataFrame` in the current scope