In [1]:
import polars as pl

In [9]:
df = pl.read_csv('data/bank_transactions.csv', try_parse_dates=True)
df.head()

Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
str,i64,i64,i64,datetime[μs]
"""DTB-716-679-57…",1448,100001,2,2023-03-20 00:00:00
"""DS-795-814-303…",7839,100001,2,2023-11-15 00:00:00
"""DSB-807-592-40…",5520,100005,1,2023-07-14 00:00:00
"""DS-367-545-264…",7957,100007,2,2023-08-18 00:00:00
"""DSB-474-374-85…",5375,100000,2,2023-08-26 00:00:00


In [37]:
month_dict = {
    1: "January",
    2: "February",
    3: "March",
    4: "April",
    5: "May",
    6: "June",
    7: "July",
    8: "August",
    9: "September",
    10: "October",
    11: "November",
    12: "December"
}

output_df = (
    df
    .with_columns(
        pl.col('Transaction Code').str.split('-').list.get(0).alias('Bank'),
        pl.col('Transaction Date').dt.month().map_dict(month_dict).alias('Transaction Month')
    )
    .group_by('Bank', 'Transaction Month')
    .agg(pl.col('Value').sum())
    .with_columns(
        pl.col('Value').rank("dense", descending=True).over('Transaction Month').alias('Bank Rank per Month')
    )
    .with_columns(
        pl.col('Bank Rank per Month').mean().over('Bank').round(2).alias('Avg Rank per Bank'),
        pl.col('Value').mean().over('Bank Rank per Month').round().cast(pl.Int32).alias('Avg Transaction Value per Rank')
    )
)
output_df

Bank,Transaction Month,Value,Bank Rank per Month,Avg Rank per Bank,Avg Transaction Value per Rank
str,str,i64,u32,f64,i32
"""DSB""","""July""",71515,1,2.33,66968
"""DSB""","""April""",30317,3,2.33,34621
"""DTB""","""October""",58924,2,1.75,48634
"""DSB""","""January""",45036,3,2.33,34621
"""DSB""","""February""",28622,3,2.33,34621
"""DTB""","""March""",80358,1,1.75,66968
"""DS""","""November""",96868,1,1.92,66968
"""DS""","""May""",38715,3,1.92,34621
"""DS""","""September""",41996,1,1.92,66968
"""DSB""","""December""",20785,3,2.33,34621
