In [1]:
import pandas as pd

In [15]:
df = pd.read_csv('./data/input/PD 2023 Wk 1 Input.csv')
df.head()

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
0,DTB-716-679-576,1448,100001,2,20/03/2023 00:00:00
1,DS-795-814-303,7839,100001,2,15/11/2023 00:00:00
2,DSB-807-592-406,5520,100005,1,14/07/2023 00:00:00
3,DS-367-545-264,7957,100007,2,18/08/2023 00:00:00
4,DSB-474-374-857,5375,100000,2,26/08/2023 00:00:00


# Preprocessing

In [26]:
# Create a new field 'Bank' containing the Bank name from the Transaction Code field
monthly_transactions = ( df
    # Create the bank code column
    .assign(Bank = lambda x: x['Transaction Code'].str.split('-').str[0])
    # Change transaction date to the month of the transaction
    .assign(Date = lambda x: pd.to_datetime(x['Transaction Date']))
    .assign(Month = lambda x: x['Date'].dt.month_name())
    .assign(Month_Num = lambda x: x['Date'].dt.month)
    .drop(columns = ['Transaction Date', 'Date'])
    # Calculate the total value of transactions per bank per month
    .groupby(['Bank', 'Month', 'Month_Num'], as_index = False)
    ['Value'].sum()
    # Sort the data by bank and month
    .sort_values(['Bank', 'Month_Num'])
    .reset_index(drop = True)
    # Keep only the required columns
    [['Bank', 'Month', 'Month_Num', 'Value']]
)

# Show the first 5 rows of the prepared data
monthly_transactions.head()

  .assign(Date = lambda x: pd.to_datetime(x['Transaction Date']))


Unnamed: 0,Bank,Month,Month_Num,Value
0,DS,January,1,50207
1,DS,February,2,31204
2,DS,March,3,36799
3,DS,April,4,40785
4,DS,May,5,38715


# Calculations

In [35]:
( monthly_transactions
    # Rank the banks by the total value of transactions per month
    .assign(Rank=lambda x: x.groupby('Month')['Value'].rank(method='first', ascending=False).astype(int))
    # Add a new column for average transaction value
    .assign(Avg_Transaction_Value=lambda x: x.groupby('Rank')['Value'].transform('mean'))
    # Add a new column for average rank by bank
    .assign(Avg_Rank=lambda x: x.groupby('Bank')['Rank'].transform('mean'))
    # Sort the data by month and rank
    .sort_values(['Month_Num', 'Rank'])
    .reset_index(drop=True)
    # Keep only the required columns
    [['Month', 'Bank', 'Value', 'Rank', 'Avg_Rank', 'Avg_Transaction_Value']]
    # Rename the columns
    .rename(columns={
        'Rank': 'Bank Rank per Month',
        'Month': 'Transaction Date',
        'Avg_Transaction_Value': 'Avg Transaction Value per Value',
        'Avg_Rank': 'Avg Rank by Bank'
    })
    .to_csv('./data/output/output_2023_05.csv', index=False, quoting=1, quotechar='"', sep=';')
)