Requirements

1. Input data
2. Create the bank code by splitting out off the letters from the Transaction code, call this field 'Bank'
3. Change transaction date to the just be the month of the transaction
4. Total up the transaction values so you have one row for each bank and month combination
5. Rank each bank for their value of transactions each month against the other banks. 1st is the highest value of transactions, 3rd the lowest. 
6. Without losing all of the other data fields, find:
    The average rank a bank has across all of the months, call this field 'Avg Rank per Bank'
    The average transaction value per rank, call this field 'Avg Transaction Value per Rank'
7. Output the data

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv('Preppin Data Inputs/Transactions wk1.csv')

In [4]:
df

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
0,DTB-716-679-576,1448,100001,2,20/03/2023 00:00:00
1,DS-795-814-303,7839,100001,2,15/11/2023 00:00:00
2,DSB-807-592-406,5520,100005,1,14/07/2023 00:00:00
3,DS-367-545-264,7957,100007,2,18/08/2023 00:00:00
4,DSB-474-374-857,5375,100000,2,26/08/2023 00:00:00
...,...,...,...,...,...
360,DTB-116-439-102,6708,100001,1,29/01/2023 00:00:00
361,DS-849-981-514,8500,100000,2,29/10/2023 00:00:00
362,DS-726-686-279,9455,100006,2,10/08/2023 00:00:00
363,DS-551-937-380,475,100002,1,11/10/2023 00:00:00


In [5]:
# Create the bank code by splitting out off the letters from the Transaction code, call this field 'Bank'

df['Bank'] = df['Transaction Code'].str.split('-', expand=True)[0]

In [6]:
df

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date,Bank
0,DTB-716-679-576,1448,100001,2,20/03/2023 00:00:00,DTB
1,DS-795-814-303,7839,100001,2,15/11/2023 00:00:00,DS
2,DSB-807-592-406,5520,100005,1,14/07/2023 00:00:00,DSB
3,DS-367-545-264,7957,100007,2,18/08/2023 00:00:00,DS
4,DSB-474-374-857,5375,100000,2,26/08/2023 00:00:00,DSB
...,...,...,...,...,...,...
360,DTB-116-439-102,6708,100001,1,29/01/2023 00:00:00,DTB
361,DS-849-981-514,8500,100000,2,29/10/2023 00:00:00,DS
362,DS-726-686-279,9455,100006,2,10/08/2023 00:00:00,DS
363,DS-551-937-380,475,100002,1,11/10/2023 00:00:00,DS


In [7]:
# Change transaction date to the just be the month of the transaction

df['Transaction Date'] = pd.to_datetime(df['Transaction Date'], dayfirst=True).dt.month_name()

In [8]:
df

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date,Bank
0,DTB-716-679-576,1448,100001,2,March,DTB
1,DS-795-814-303,7839,100001,2,November,DS
2,DSB-807-592-406,5520,100005,1,July,DSB
3,DS-367-545-264,7957,100007,2,August,DS
4,DSB-474-374-857,5375,100000,2,August,DSB
...,...,...,...,...,...,...
360,DTB-116-439-102,6708,100001,1,January,DTB
361,DS-849-981-514,8500,100000,2,October,DS
362,DS-726-686-279,9455,100006,2,August,DS
363,DS-551-937-380,475,100002,1,October,DS


In [9]:
df = df[['Bank','Transaction Date','Value']]

df = df.groupby(['Bank','Transaction Date']).sum('Value').reset_index()

In [10]:
df

Unnamed: 0,Bank,Transaction Date,Value
0,DS,April,40785
1,DS,August,102237
2,DS,December,33952
3,DS,February,31204
4,DS,January,50207
5,DS,July,55002
6,DS,June,55182
7,DS,March,36799
8,DS,May,38715
9,DS,November,96868


In [11]:
# Rank each bank for their value of transactions each month against the other banks.

df['Bank Rank'] = df.groupby('Transaction Date')['Value'].rank(ascending=False)

In [12]:
df = df.sort_values(['Transaction Date','Value'], ascending=[True,False])
df

Unnamed: 0,Bank,Transaction Date,Value,Bank Rank
24,DTB,April,42360,1.0
0,DS,April,40785,2.0
12,DSB,April,30317,3.0
1,DS,August,102237,1.0
25,DTB,August,66063,2.0
13,DSB,August,38167,3.0
26,DTB,December,63797,1.0
2,DS,December,33952,2.0
14,DSB,December,20785,3.0
27,DTB,February,43400,1.0


In [13]:
# Without losing all of the other data fields, find: 
    # The average rank a bank has across all of the months, call this field 'Avg Rank per Bank'
    # The average transaction value per rank, call this field 'Avg Transaction Value per Rank'

df1 = df.groupby('Bank').mean('Bank Rank')
df1 = df1.drop('Value', axis=1)

In [14]:
df1 = df1.rename(columns={'Bank Rank' : 'Avg Rank per Bank'})
df1

Unnamed: 0_level_0,Avg Rank per Bank
Bank,Unnamed: 1_level_1
DS,1.916667
DSB,2.333333
DTB,1.75


In [15]:
df2 = df.groupby('Bank Rank').mean('Value')
df2 = df2.rename(columns={'Value':'Avg Transaction Value per Rank'})

In [16]:
df2

Unnamed: 0_level_0,Avg Transaction Value per Rank
Bank Rank,Unnamed: 1_level_1
1.0,66967.75
2.0,48633.666667
3.0,34620.833333


In [17]:
df = df.merge(right=df1, how='left', left_on='Bank', right_on='Bank')

In [18]:
df = df.merge(right=df2, how='left', left_on='Bank Rank', right_on='Bank Rank')

In [19]:
df

Unnamed: 0,Bank,Transaction Date,Value,Bank Rank,Avg Rank per Bank,Avg Transaction Value per Rank
0,DTB,April,42360,1.0,1.75,66967.75
1,DS,April,40785,2.0,1.916667,48633.666667
2,DSB,April,30317,3.0,2.333333,34620.833333
3,DS,August,102237,1.0,1.916667,66967.75
4,DTB,August,66063,2.0,1.75,48633.666667
5,DSB,August,38167,3.0,2.333333,34620.833333
6,DTB,December,63797,1.0,1.75,66967.75
7,DS,December,33952,2.0,1.916667,48633.666667
8,DSB,December,20785,3.0,2.333333,34620.833333
9,DTB,February,43400,1.0,1.75,66967.75


In [20]:
# Output the data

df.to_csv('Preppin Data Outputs/pd2023wk5_output.csv', index=False)