In [46]:
'''
Challenge link
https://preppindata.blogspot.com/2023/02/2023-week-5-dsb-ranking.html
'''

import pandas as pd
import numpy as np
import datetime

In [47]:
df = pd.read_csv('PD 2023 Wk 1 Input.csv')
df.head(5)

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
0,DTB-716-679-576,1448,100001,2,20/03/2023 00:00:00
1,DS-795-814-303,7839,100001,2,15/11/2023 00:00:00
2,DSB-807-592-406,5520,100005,1,14/07/2023 00:00:00
3,DS-367-545-264,7957,100007,2,18/08/2023 00:00:00
4,DSB-474-374-857,5375,100000,2,26/08/2023 00:00:00


In [48]:
# Create the bank code by splitting out off the letters from the Transaction code, call this field 'Bank'
df.insert(0,'Bank', '')
df['Bank'] = [x.split('-')[0] for x in df['Transaction Code']]
df.head(5)

Unnamed: 0,Bank,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
0,DTB,DTB-716-679-576,1448,100001,2,20/03/2023 00:00:00
1,DS,DS-795-814-303,7839,100001,2,15/11/2023 00:00:00
2,DSB,DSB-807-592-406,5520,100005,1,14/07/2023 00:00:00
3,DS,DS-367-545-264,7957,100007,2,18/08/2023 00:00:00
4,DSB,DSB-474-374-857,5375,100000,2,26/08/2023 00:00:00


In [49]:
# Change transaction date to the just be the month of the transaction

def date2month(x):
    x = x.split(' ')[0]
    d,m,y = list(map(int ,x.split('/')))
    date = datetime.datetime(y,m,d)
    return date.strftime('%B')

df['Transaction Date'] = [date2month(x) for x in df['Transaction Date']]
df.head(5)

Unnamed: 0,Bank,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
0,DTB,DTB-716-679-576,1448,100001,2,March
1,DS,DS-795-814-303,7839,100001,2,November
2,DSB,DSB-807-592-406,5520,100005,1,July
3,DS,DS-367-545-264,7957,100007,2,August
4,DSB,DSB-474-374-857,5375,100000,2,August


In [50]:
# total up the transaction values so you have one row for each bank and month combination

total_df = df.groupby(['Transaction Date', 'Bank'])['Value'].sum().reset_index()
total_df.head(5)

Unnamed: 0,Transaction Date,Bank,Value
0,April,DS,40785
1,April,DSB,30317
2,April,DTB,42360
3,August,DS,102237
4,August,DSB,38167


In [51]:
# Rank each bank for their value of transactions each month against the other banks. 1st is the highest value of transactions, 3rd the lowest. 

total_df['Rank'] = total_df.groupby('Transaction Date')['Value'].rank(ascending=False)
total_df['Rank'] = total_df['Rank'].astype(int)
total_df.head(5)

Unnamed: 0,Transaction Date,Bank,Value,Rank
0,April,DS,40785,2
1,April,DSB,30317,3
2,April,DTB,42360,1
3,August,DS,102237,1
4,August,DSB,38167,3


In [52]:
# find The average rank a bank has across all of the months, call this field 'Avg Rank per Bank'
avg_df = pd.DataFrame()
avg_df['Avg Rank per Bank'] = total_df.groupby('Bank')['Rank'].mean()
avg_df = avg_df.reset_index()
avg_df.head(5)

Unnamed: 0,Bank,Avg Rank per Bank
0,DS,1.916667
1,DSB,2.333333
2,DTB,1.75


In [53]:
total_df = total_df.merge(right=avg_df, on='Bank' , how='left')
total_df.head(5)

Unnamed: 0,Transaction Date,Bank,Value,Rank,Avg Rank per Bank
0,April,DS,40785,2,1.916667
1,April,DSB,30317,3,2.333333
2,April,DTB,42360,1,1.75
3,August,DS,102237,1,1.916667
4,August,DSB,38167,3,2.333333


In [54]:
# The average transaction value per rank, call this field 'Avg Transaction Value per Rank'
avg_df = pd.DataFrame()
avg_df['Avg Transaction Value per Rank'] = total_df.groupby('Rank')['Value'].mean()
avg_df = avg_df.reset_index()
avg_df.head(5)


Unnamed: 0,Rank,Avg Transaction Value per Rank
0,1,66967.75
1,2,48633.666667
2,3,34620.833333


In [55]:
total_df = total_df.merge(right=avg_df, on='Rank' , how='left')
total_df.head(5)

Unnamed: 0,Transaction Date,Bank,Value,Rank,Avg Rank per Bank,Avg Transaction Value per Rank
0,April,DS,40785,2,1.916667,48633.666667
1,April,DSB,30317,3,2.333333,34620.833333
2,April,DTB,42360,1,1.75,66967.75
3,August,DS,102237,1,1.916667,66967.75
4,August,DSB,38167,3,2.333333,34620.833333


In [56]:
total_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 36 entries, 0 to 35
Data columns (total 6 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Transaction Date                36 non-null     object 
 1   Bank                            36 non-null     object 
 2   Value                           36 non-null     int64  
 3   Rank                            36 non-null     int32  
 4   Avg Rank per Bank               36 non-null     float64
 5   Avg Transaction Value per Rank  36 non-null     float64
dtypes: float64(2), int32(1), int64(1), object(2)
memory usage: 1.8+ KB


In [None]:
total_df.to_csv('output.csv', index=False)