In [2]:
import pandas as pd

https://preppindata.blogspot.com/2023/01/2023-week-3-targets-for-dsb.html

Requirements
- Input the data
- For the transactions file:
    - Filter the transactions to just look at DSB 
    - These will be transactions that contain DSB in the Transaction Code field
    - Rename the values in the Online or In-person field, Online of the 1 values and In-Person for the 2 values
    - Change the date to be the quarter 
- Sum the transaction values for each quarter and for each Type of Transaction (Online or In-Person) 
- For the targets file:
    - Pivot the quarterly targets so we have a row for each Type of Transaction and each Quarter 
    - Rename the fields
- Remove the 'Q' from the quarter field and make the data type numeric 

- Join the two datasets together 
- You may need more than one join clause!
- Remove unnecessary fields
Calculate the Variance to Target for each row 

In [87]:
targets = pd.read_csv("Targets.csv")
transactions = pd.read_csv(r"C:\Users\Dell\Documents\Python Scripts\preppindata\Week 1\PD 2023 Wk 1 Input.csv")

In [88]:
#Filter to only DSB Bank and replace 1 with Online and 2 with In-Person
transactions = transactions[transactions['Transaction Code'].str.startswith('DSB')]
transactions['Online or In-Person'] = transactions['Online or In-Person'].replace({1: 'Online', 2: 'In-Person'})

In [89]:
transactions

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date
2,DSB-807-592-406,5520,100005,Online,14/07/2023 00:00:00
4,DSB-474-374-857,5375,100000,In-Person,26/08/2023 00:00:00
5,DSB-448-546-348,4525,100009,Online,27/05/2023 00:00:00
11,DSB-422-218-322,118,100010,Online,12/05/2023 00:00:00
12,DSB-669-227-170,830,100001,Online,15/04/2023 00:00:00
...,...,...,...,...,...
350,DSB-618-298-395,9280,100008,Online,10/03/2023 00:00:00
351,DSB-637-369-281,6060,100002,In-Person,09/08/2023 00:00:00
353,DSB-322-596-206,900,100001,Online,04/02/2023 00:00:00
354,DSB-384-247-358,6446,100003,Online,28/06/2023 00:00:00


In [90]:
#Change Transaction Date to Quater
transactions['Transaction Date'] = transactions['Transaction Date'].astype('datetime64[D]')
transactions['Quarter'] = transactions['Transaction Date'].dt.to_period('Q').dt.strftime('Q%q')


In [91]:
transactions

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date,Quarter
2,DSB-807-592-406,5520,100005,Online,2023-07-14,Q3
4,DSB-474-374-857,5375,100000,In-Person,2023-08-26,Q3
5,DSB-448-546-348,4525,100009,Online,2023-05-27,Q2
11,DSB-422-218-322,118,100010,Online,2023-12-05,Q4
12,DSB-669-227-170,830,100001,Online,2023-04-15,Q2
...,...,...,...,...,...,...
350,DSB-618-298-395,9280,100008,Online,2023-10-03,Q4
351,DSB-637-369-281,6060,100002,In-Person,2023-09-08,Q3
353,DSB-322-596-206,900,100001,Online,2023-04-02,Q2
354,DSB-384-247-358,6446,100003,Online,2023-06-28,Q2


In [97]:
transactions[transactions['Quarter'] == 'Q2']

Unnamed: 0,Transaction Code,Value,Customer Code,Online or In-Person,Transaction Date,Quarter
5,DSB-448-546-348,4525,100009,Online,2023-05-27,Q2
12,DSB-669-227-170,830,100001,Online,2023-04-15,Q2
25,DSB-736-207-386,5726,100001,In-Person,2023-05-23,Q2
28,DSB-460-438-644,1873,100002,Online,2023-05-06,Q2
38,DSB-839-525-960,5285,100003,In-Person,2023-04-07,Q2
65,DSB-960-872-814,7234,100009,Online,2023-04-03,Q2
89,DSB-717-342-793,2943,100008,In-Person,2023-04-23,Q2
106,DSB-939-176-839,1661,100001,Online,2023-05-02,Q2
119,DSB-687-427-946,4548,100008,Online,2023-05-21,Q2
125,DSB-863-337-664,6877,100008,Online,2023-04-06,Q2


In [96]:
df = transactions.pivot_table(index = ['Quarter', 'Online or In-Person'], values= ['Value'], aggfunc= 'sum')
df = df.reset_index()
df

Unnamed: 0,Quarter,Online or In-Person,Value
0,Q1,In-Person,47115
1,Q1,Online,55870
2,Q2,In-Person,81627
3,Q2,Online,94860
4,Q3,In-Person,84048
5,Q3,Online,56617
6,Q4,In-Person,52832
7,Q4,Online,57520


In [98]:
targets = targets.melt(id_vars=["Online or In-Person"], 
        var_name="Quarter", 
        value_name="Target")
targets

Unnamed: 0,Online or In-Person,Quarter,Target
0,Online,Q1,72500
1,In-Person,Q1,75000
2,Online,Q2,70000
3,In-Person,Q2,70000
4,Online,Q3,60000
5,In-Person,Q3,70000
6,Online,Q4,60000
7,In-Person,Q4,60000


In [102]:
final_answer = pd.merge(df, targets, on=['Quarter', 'Online or In-Person'])
final_answer['Variance To Target'] = final_answer['Value'] - final_answer['Target']
final_answer

Unnamed: 0,Quarter,Online or In-Person,Value,Target,Variance To Target
0,Q1,In-Person,47115,75000,-27885
1,Q1,Online,55870,72500,-16630
2,Q2,In-Person,81627,70000,11627
3,Q2,Online,94860,70000,24860
4,Q3,In-Person,84048,70000,14048
5,Q3,Online,56617,60000,-3383
6,Q4,In-Person,52832,60000,-7168
7,Q4,Online,57520,60000,-2480
