In [19]:
import pandas as pd

In [20]:
transactions = pd.read_csv('./data/input/PD 2023 Wk 1 Input.csv')
transactions.head()

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
0,DTB-716-679-576,1448,100001,2,20/03/2023 00:00:00
1,DS-795-814-303,7839,100001,2,15/11/2023 00:00:00
2,DSB-807-592-406,5520,100005,1,14/07/2023 00:00:00
3,DS-367-545-264,7957,100007,2,18/08/2023 00:00:00
4,DSB-474-374-857,5375,100000,2,26/08/2023 00:00:00


In [21]:
targets = pd.read_csv('./data/input/Targets.csv')
targets.head()

Unnamed: 0,Online or In-Person,Q1,Q2,Q3,Q4
0,Online,72500,70000,60000,60000
1,In-Person,75000,70000,70000,60000


# Preprocessing

In [22]:
transactions_prep = ( transactions
    # Filter the transactions to only include the ones whose bank is DSB (the first 3 letters of the Transaction Code column)
    .loc[lambda x: x['Transaction Code'].str[:3] == 'DSB']
    # Rename the Online or In-Person column to be Online or In-Person (1 = Online, 2 = In-Person)
    .replace(
        {
            'Online or In-Person': {1: 'Online', 2: 'In-Person'}
        }
    )
    # Change the Transaction Date column to be a date field and convert it to Quarter
    .assign(Date = lambda x: pd.to_datetime(x['Transaction Date']))
    .assign(Quarter = lambda x: x['Date'].dt.quarter)
)

transactions_prep.head()

  .assign(Date = lambda x: pd.to_datetime(x['Transaction Date']))


Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date,Date,Quarter
2,DSB-807-592-406,5520,100005,Online,14/07/2023 00:00:00,2023-07-14,3
4,DSB-474-374-857,5375,100000,In-Person,26/08/2023 00:00:00,2023-08-26,3
5,DSB-448-546-348,4525,100009,Online,27/05/2023 00:00:00,2023-05-27,2
11,DSB-422-218-322,118,100010,Online,12/05/2023 00:00:00,2023-05-12,2
12,DSB-669-227-170,830,100001,Online,15/04/2023 00:00:00,2023-04-15,2


In [23]:
targets_prep = ( targets
    # Pivot the table to have the quarters as one single column
    .melt(id_vars=['Online or In-Person'], var_name='Quarter', value_name='Target')
    # Remove the Q from the Quarter column and convert it to a number
    .assign(Quarter = lambda x: x['Quarter'].str[1:].astype(int))
    # Rename the Target column to be Quarterly Targets
    .rename(columns={'Target': 'Quarterly Targets'})
)

targets_prep.head()

Unnamed: 0,Online or In-Person,Quarter,Quarterly Targets
0,Online,1,72500
1,In-Person,1,75000
2,Online,2,70000
3,In-Person,2,70000
4,Online,3,60000


# Calculations

In [24]:
# Obtain the total sum of transaction values for each quarter and Online or In-Person combination
transactions_agg = ( transactions_prep
    .groupby(['Quarter', 'Online or In-Person'])
    .agg({'Value': 'sum'})
    .reset_index()
)
transactions_agg.head()

Unnamed: 0,Quarter,Online or In-Person,Value
0,1,In-Person,77576
1,1,Online,74562
2,2,In-Person,70634
3,2,Online,69325
4,3,In-Person,74189


In [26]:
# Join the aggregated transactions to the targets
( targets_prep
    .merge(transactions_agg, on=['Quarter', 'Online or In-Person'], how='left')
    # Calculate the difference between the Quarterly Target and the sum of transaction values
    .assign(Variance_To_Target = lambda x: x['Value'] - x['Quarterly Targets'])
    .to_csv(path_or_buf='./data/output/output_2023_03.csv', index = False, quoting=1, quotechar='"', sep=';')
)