In [1]:
import polars as pl

file='https://drive.google.com/u/0/uc?id=1oln2ri6nu1wDQfT3gQMLLNlmQ2h6B9d9&export=download'

df=pl.read_csv(file,try_parse_dates=True)

pl.Config.set_fmt_str_lengths(50)

polars.config.Config

In [2]:
df.head()

Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
str,i64,i64,i64,datetime[μs]
"""DTB-716-679-576""",1448,100001,2,2023-03-20 00:00:00
"""DS-795-814-303""",7839,100001,2,2023-11-15 00:00:00
"""DSB-807-592-406""",5520,100005,1,2023-07-14 00:00:00
"""DS-367-545-264""",7957,100007,2,2023-08-18 00:00:00
"""DSB-474-374-857""",5375,100000,2,2023-08-26 00:00:00


In [3]:
# Split Transaction Code into Bank and Transaction Number
# Bank is leters before first -
# Transaction Code is everything after first -

df=df.with_columns(
    pl.col('Transaction Code').str.splitn('-',2)
)

df=df.unnest('Transaction Code')
df=df.rename({'field_0':'Bank','field_1':'Transaction Code'})

df.head()

Bank,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
str,str,i64,i64,i64,datetime[μs]
"""DTB""","""716-679-576""",1448,100001,2,2023-03-20 00:00:00
"""DS""","""795-814-303""",7839,100001,2,2023-11-15 00:00:00
"""DSB""","""807-592-406""",5520,100005,1,2023-07-14 00:00:00
"""DS""","""367-545-264""",7957,100007,2,2023-08-18 00:00:00
"""DSB""","""474-374-857""",5375,100000,2,2023-08-26 00:00:00


In [4]:
df=df.with_columns(
    pl.col('Transaction Date').dt.strftime("%B")
    )

df.head()

Bank,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
str,str,i64,i64,i64,str
"""DTB""","""716-679-576""",1448,100001,2,"""March"""
"""DS""","""795-814-303""",7839,100001,2,"""November"""
"""DSB""","""807-592-406""",5520,100005,1,"""July"""
"""DS""","""367-545-264""",7957,100007,2,"""August"""
"""DSB""","""474-374-857""",5375,100000,2,"""August"""


In [5]:
df2=df.groupby(['Bank','Transaction Date']).agg(pl.col('Value').sum())

In [6]:
df2=df2.with_columns(
    pl.col('Value').rank(descending=True).over('Transaction Date').alias('Rank')
)

# df2.filter(pl.col('Transaction Date') == 'August')

In [11]:
# Group by 'bank' and calculate the average rank
avg_rank_df = df2.groupby('Bank').agg(pl.col('Rank').mean().alias('Avg Rank per Bank'))
avg_value_df=df2.groupby('Rank').agg(pl.col('Value').mean().alias('Average Transaction Value per Rank'))

# Join the result back to the original DataFrame on the 'bank' column
result_df = df2.join(avg_rank_df, on='Bank')
result_df = result_df.join(avg_value_df, on='Rank')

print(result_df)


shape: (36, 6)
┌──────┬──────────────────┬────────┬──────┬───────────────────┬────────────────────────────────────┐
│ Bank ┆ Transaction Date ┆ Value  ┆ Rank ┆ Avg Rank per Bank ┆ Average Transaction Value per Rank │
│ ---  ┆ ---              ┆ ---    ┆ ---  ┆ ---               ┆ ---                                │
│ str  ┆ str              ┆ i64    ┆ f32  ┆ f32               ┆ f64                                │
╞══════╪══════════════════╪════════╪══════╪═══════════════════╪════════════════════════════════════╡
│ DTB  ┆ April            ┆ 42360  ┆ 1.0  ┆ 1.75              ┆ 66967.75                           │
│ DS   ┆ April            ┆ 40785  ┆ 2.0  ┆ 1.916667          ┆ 48633.666667                       │
│ DSB  ┆ October          ┆ 41707  ┆ 3.0  ┆ 2.333333          ┆ 34620.833333                       │
│ DS   ┆ August           ┆ 102237 ┆ 1.0  ┆ 1.916667          ┆ 66967.75                           │
│ …    ┆ …                ┆ …      ┆ …    ┆ …                 ┆ …           