In [1]:
import polars as pl
from datetime import date, timedelta
import random
import string

# Function to generate a date range from 2000 to 2024
def generate_date_range(start_year, end_year):
    start_date = date(start_year, 1, 1)
    end_date = date(end_year, 12, 31)
    delta = end_date - start_date
    return [start_date + timedelta(days=i) for i in range(delta.days + 1)]

# Generate a list of dates from 2000 to 2024
dates = generate_date_range(2, 9999)

# Generate random groups, values, and set ctr to 1 for each entry
groups = [random.choice(string.ascii_uppercase) for _ in dates]
values = [random.randint(1, 5000000) for _ in dates]
ctrs = [1 for _ in dates]

# Create the DataFrame
df = pl.DataFrame({
    "group": groups,
    "values": values,
    "date": dates,
    "ctr": ctrs
})

df.describe()

statistic,group,values,date,ctr
str,str,f64,str,f64
"""count""","""3651694""",3651694.0,"""3651694""",3651694.0
"""null_count""","""0""",0.0,"""0""",0.0
"""mean""",,2499700.0,"""5000-12-31""",1.0
"""std""",,1443800.0,,0.0
"""min""","""A""",8.0,"""0002-01-01""",1.0
"""25%""",,1248650.0,"""2501-07-02""",1.0
"""50%""",,2498380.0,"""5001-01-01""",1.0
"""75%""",,3750577.0,"""7500-07-02""",1.0
"""max""","""Z""",5000000.0,"""9999-12-31""",1.0


In [2]:
`# Define a custom function for rolling sum operation on a DataFrame grouped by 'group'
def rolling_sum(group_df):
    # Selects specific columns and applies a rolling sum on the 'ctr' column with a window of 3
    return group_df.select([
        pl.col("group"),                     # Include the 'group' column
        pl.col("date"),                      # Include the 'date' column
        pl.col("ctr").rolling_sum(window_size="1y",by="date", min_periods=1)  # Apply rolling sum on 'ctr'
    ])

# Group the DataFrame by 'group', apply the custom rolling_sum function, and sort the result
result = df.group_by("group").map_groups(rolling_sum).sort(by="date", descending=False)

# Display the first 6 rows of the resulting DataFrame to showcase the rolling sum operation
result.tail(100)


- sorting your data by your `by` column beforehand;
- setting `.set_sorted()` if you already know your data is sorted;
  (this is known to happen when combining rolling aggregations with `over`);

before passing calling the rolling aggregation function.



group,date,ctr
str,date,i64
"""E""",9999-09-23,12
"""G""",9999-09-24,15
"""Z""",9999-09-25,12
"""U""",9999-09-26,12
"""R""",9999-09-27,12
"""Y""",9999-09-28,18
"""E""",9999-09-29,12
"""N""",9999-09-30,21
"""C""",9999-10-01,15
"""B""",9999-10-02,13
