# Extra Metrics

Gorodetskaya et al. (2021) resalta la importancia de rolling mean

In [89]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import seaborn as sns
import matplotlib.pyplot as plt

pd.set_option('display.max_rows', 500)

In [90]:
data = pd.read_csv('../data/aggregated_data_payment.csv')
data.head()

Unnamed: 0,ATM Name,Transaction Date,No Of Withdrawals,No Of XYZ Card Withdrawals,No Of Other Card Withdrawals,Total amount Withdrawn,Amount withdrawn XYZ Card,Amount withdrawn Other Card,Type,Weekday,Holiday Sequence,isYesterdayHoliday,isHoliday,isTomorrowHoliday,isYesterdayWeekday,isTomorrowWeekday,isWeekday,isPaymentDay,isPayweek
0,Big Street ATM,2020-01-01,125,26,99,429200,75500,353700,National holiday,WEDNESDAY,WHH,False,True,True,True,True,True,False,True
1,Mount Road ATM,2020-01-01,144,49,95,377900,155200,222700,National holiday,WEDNESDAY,WHH,False,True,True,True,True,True,False,True
2,Airport ATM,2020-01-01,78,58,20,315400,269100,46300,National holiday,WEDNESDAY,WHH,False,True,True,True,True,True,False,True
3,KK Nagar ATM,2020-01-01,268,176,92,1290200,927700,362500,National holiday,WEDNESDAY,WHH,False,True,True,True,True,True,False,True
4,Christ College ATM,2020-01-01,95,39,56,427100,235700,191400,National holiday,WEDNESDAY,WHH,False,True,True,True,True,True,False,True


In [91]:
data[['ATM Name', 'Transaction Date', 'Weekday', 'Total amount Withdrawn']].loc[
    (data["ATM Name"] == "Big Street ATM") | 
    (data["ATM Name"] == "Mount Road ATM")
    ].head(20)

Unnamed: 0,ATM Name,Transaction Date,Weekday,Total amount Withdrawn
0,Big Street ATM,2020-01-01,WEDNESDAY,429200
1,Mount Road ATM,2020-01-01,WEDNESDAY,377900
5,Big Street ATM,2020-01-02,THURSDAY,401000
6,Mount Road ATM,2020-01-02,THURSDAY,772300
10,Big Street ATM,2020-01-03,FRIDAY,314800
11,Mount Road ATM,2020-01-03,FRIDAY,738100
15,Big Street ATM,2020-01-04,SATURDAY,356900
16,Mount Road ATM,2020-01-04,SATURDAY,633100
20,Big Street ATM,2020-01-05,SUNDAY,148800
21,Mount Road ATM,2020-01-05,SUNDAY,983100


## Cálculo de rolling_mean_weekday

The average demand for two days of the same past (on two Tuesdays, on two Wednesdays)

In [92]:
data_rolling_mean_weekday = data.copy(deep=True)

# Convert 'Transaction Date' to datetime
data_rolling_mean_weekday['Transaction Date'] = pd.to_datetime(data_rolling_mean_weekday['Transaction Date'])

# Sort the data by ATM Name and Transaction Date
data_rolling_mean_weekday = data_rolling_mean_weekday.sort_values(by=['ATM Name', 'Transaction Date'])

# Group by 'ATM Name' and 'Weekday', then apply rolling mean with window=2 (to compare two past same weekdays)
data_rolling_mean_weekday['rolling_mean_weekday'] = data_rolling_mean_weekday.groupby(['ATM Name', 'Weekday'])['Total amount Withdrawn'] \
    .transform(lambda x: x.rolling(window=2, min_periods=1, closed='left').mean())

data_rolling_mean_weekday['rolling_mean_weekday'] = data_rolling_mean_weekday['rolling_mean_weekday'].fillna(data_rolling_mean_weekday['Total amount Withdrawn'])

data_rolling_mean_weekday[['ATM Name', 'Transaction Date', 'Weekday', 'Total amount Withdrawn', 'rolling_mean_weekday']].loc[
    (data_rolling_mean_weekday["ATM Name"] == "Big Street ATM") | 
    (data_rolling_mean_weekday["ATM Name"] == "Mount Road ATM") 
    ].head(20)

Unnamed: 0,ATM Name,Transaction Date,Weekday,Total amount Withdrawn,rolling_mean_weekday
0,Big Street ATM,2020-01-01,WEDNESDAY,429200,429200.0
5,Big Street ATM,2020-01-02,THURSDAY,401000,401000.0
10,Big Street ATM,2020-01-03,FRIDAY,314800,314800.0
15,Big Street ATM,2020-01-04,SATURDAY,356900,356900.0
20,Big Street ATM,2020-01-05,SUNDAY,148800,148800.0
25,Big Street ATM,2020-01-06,MONDAY,307200,307200.0
30,Big Street ATM,2020-01-07,TUESDAY,273500,273500.0
35,Big Street ATM,2020-01-08,WEDNESDAY,242600,429200.0
40,Big Street ATM,2020-01-09,THURSDAY,215300,401000.0
45,Big Street ATM,2020-01-10,FRIDAY,336000,314800.0


## Cálculo de rolling_max

maximum demand value for the week

In [101]:
# Create DataFrame
data_rolling_max = pd.DataFrame(data)

# Convert 'Transaction Date' to datetime
data_rolling_max['Transaction Date'] = pd.to_datetime(data_rolling_max['Transaction Date'])

# Adding 'Year' column to differentiate weeks across years
data_rolling_max['Year'] = data_rolling_max['Transaction Date'].dt.year

# Adding the 'Week' column to group by weeks
data_rolling_max['Week'] = data_rolling_max['Transaction Date'].dt.isocalendar().week

# Group by 'ATM Name' and 'Week', then calculate the maximum demand value for each week
data_rolling_max['rolling_max'] = data_rolling_max.groupby(['ATM Name', 'Year', 'Week'])['Total amount Withdrawn'].transform('max')

data_rolling_max[['ATM Name', 'Transaction Date', 'Weekday', 'Total amount Withdrawn', 'rolling_max', 'Week']].loc[
    (data_rolling_max["ATM Name"] == "Big Street ATM")
    ].head()

Unnamed: 0,ATM Name,Transaction Date,Weekday,Total amount Withdrawn,rolling_max,Week
0,Big Street ATM,2020-01-01,WEDNESDAY,429200,429200,1
5,Big Street ATM,2020-01-02,THURSDAY,401000,429200,1
10,Big Street ATM,2020-01-03,FRIDAY,314800,429200,1
15,Big Street ATM,2020-01-04,SATURDAY,356900,429200,1
20,Big Street ATM,2020-01-05,SUNDAY,148800,429200,1


## Cálculo de lag_6 

the amount of demand six days ago

In [95]:
# Create DataFrame
data_lag_6 = pd.DataFrame(data)

data_lag_6['lag_6'] = data_lag_6.groupby('ATM Name')['Total amount Withdrawn'].shift(6)

data_lag_6['lag_6'] = data_lag_6['lag_6'].fillna(data_lag_6['Total amount Withdrawn'])

data_lag_6[['ATM Name', 'Transaction Date', 'Weekday', 'Total amount Withdrawn', 'lag_6']].loc[
    (data_lag_6["ATM Name"] == "Big Street ATM")
    ].head()

Unnamed: 0,ATM Name,Transaction Date,Weekday,Total amount Withdrawn,lag_6
0,Big Street ATM,2020-01-01,WEDNESDAY,429200,429200.0
5,Big Street ATM,2020-01-02,THURSDAY,401000,401000.0
10,Big Street ATM,2020-01-03,FRIDAY,314800,314800.0
15,Big Street ATM,2020-01-04,SATURDAY,356900,356900.0
20,Big Street ATM,2020-01-05,SUNDAY,148800,148800.0


## Cálculo de rolling_max_weekday

The maximum demand value for two of the same past days of the week (on two Tuesdays, on two Wednesdays)

In [96]:
data_rolling_max_weekday = data.copy(deep=True)

# Convert 'Transaction Date' to datetime
data_rolling_max_weekday['Transaction Date'] = pd.to_datetime(data_rolling_max_weekday['Transaction Date'])

# Sort the data by ATM Name and Transaction Date
data_rolling_max_weekday = data_rolling_max_weekday.sort_values(by=['ATM Name', 'Transaction Date'])

# Group by 'ATM Name' and 'Weekday', then apply rolling mean with window=2 (to compare two past same weekdays)
data_rolling_max_weekday['rolling_max_weekday'] = data_rolling_max_weekday.groupby(['ATM Name', 'Weekday'])['Total amount Withdrawn'] \
    .transform(lambda x: x.rolling(window=2, min_periods=1, closed='left').max())

data_rolling_max_weekday['rolling_max_weekday'] = data_rolling_max_weekday['rolling_max_weekday'].fillna(data_rolling_max_weekday['Total amount Withdrawn'])

data_rolling_max_weekday[['ATM Name', 'Transaction Date', 'Weekday', 'Total amount Withdrawn', 'rolling_max_weekday']].loc[
    (data_rolling_max_weekday["ATM Name"] == "Big Street ATM") | 
    (data_rolling_max_weekday["ATM Name"] == "Mount Road ATM") 
    ].head()

Unnamed: 0,ATM Name,Transaction Date,Weekday,Total amount Withdrawn,rolling_max_weekday
0,Big Street ATM,2020-01-01,WEDNESDAY,429200,429200.0
5,Big Street ATM,2020-01-02,THURSDAY,401000,401000.0
10,Big Street ATM,2020-01-03,FRIDAY,314800,314800.0
15,Big Street ATM,2020-01-04,SATURDAY,356900,356900.0
20,Big Street ATM,2020-01-05,SUNDAY,148800,148800.0


## Cálculo de rolling_std

Weekly demand standard deviation

In [98]:
# Create DataFrame
data_rolling_std = pd.DataFrame(data)

# Convert 'Transaction Date' to datetime
data_rolling_std['Transaction Date'] = pd.to_datetime(data_rolling_std['Transaction Date'])

# Adding 'Year' column to differentiate weeks across years
data_rolling_std['Year'] = data_rolling_std['Transaction Date'].dt.year

# Adding the 'Week' column to group by weeks
data_rolling_std['Week'] = data_rolling_std['Transaction Date'].dt.isocalendar().week

# Group by 'ATM Name' and 'Week', then calculate the maximum demand value for each week
data_rolling_std['rolling_std'] = data_rolling_std.groupby(['ATM Name', 'Year', 'Week'])['Total amount Withdrawn'].transform('std')

data_rolling_std[['ATM Name', 'Transaction Date', 'Weekday', 'Total amount Withdrawn', 'rolling_std', 'Week']].loc[
    (data_rolling_std["ATM Name"] == "Big Street ATM")
    ].head()

Unnamed: 0,ATM Name,Transaction Date,Weekday,Total amount Withdrawn,rolling_std,Week
0,Big Street ATM,2020-01-01,WEDNESDAY,429200,110305.974453,1
5,Big Street ATM,2020-01-02,THURSDAY,401000,110305.974453,1
10,Big Street ATM,2020-01-03,FRIDAY,314800,110305.974453,1
15,Big Street ATM,2020-01-04,SATURDAY,356900,110305.974453,1
20,Big Street ATM,2020-01-05,SUNDAY,148800,110305.974453,1


## Cálculo de rolling_mean

Average monthly demand value

In [99]:
# Create DataFrame
data_rolling_mean = pd.DataFrame(data)

# Convert 'Transaction Date' to datetime
data_rolling_mean['Transaction Date'] = pd.to_datetime(data_rolling_mean['Transaction Date'])

# Adding 'Year' column to differentiate weeks across years
data_rolling_mean['Year'] = data_rolling_mean['Transaction Date'].dt.year

# Adding 'Month' column to differentiate weeks across years
data_rolling_mean['Month'] = data_rolling_mean['Transaction Date'].dt.month

# Group by 'ATM Name' and 'Week', then calculate the maximum demand value for each week
data_rolling_mean['rolling_mean'] = data_rolling_mean.groupby(['ATM Name', 'Year', 'Month'])['Total amount Withdrawn'].transform('mean')

data_rolling_mean[['ATM Name', 'Transaction Date', 'Weekday', 'Total amount Withdrawn', 'rolling_mean']].loc[
    (data_rolling_mean["ATM Name"] == "Big Street ATM") &
    (data_rolling_mean["Month"] == 1)
    ].head()

Unnamed: 0,ATM Name,Transaction Date,Weekday,Total amount Withdrawn,rolling_mean
0,Big Street ATM,2020-01-01,WEDNESDAY,429200,298800.0
5,Big Street ATM,2020-01-02,THURSDAY,401000,298800.0
10,Big Street ATM,2020-01-03,FRIDAY,314800,298800.0
15,Big Street ATM,2020-01-04,SATURDAY,356900,298800.0
20,Big Street ATM,2020-01-05,SUNDAY,148800,298800.0


## Combinando las métricas

In [102]:
# Copiando el dataframe original
agg_data = data.copy(deep = True)

# Agregando todas las métricas
agg_data['rolling_mean_weekday'] = data_rolling_mean_weekday['rolling_mean_weekday']
agg_data['rolling_max'] = data_rolling_max['rolling_max']
agg_data['lag_6'] = data_lag_6['lag_6']
agg_data['rolling_max_weekday'] = data_rolling_max_weekday['rolling_max_weekday']
agg_data['rolling_std'] = data_rolling_std['rolling_std']
agg_data['rolling_mean'] = data_rolling_mean['rolling_mean']

agg_data[['ATM Name', 'rolling_mean_weekday', 'rolling_max', 'lag_6', 'rolling_max_weekday', 'rolling_std', 'rolling_mean']]

Unnamed: 0,ATM Name,rolling_mean_weekday,rolling_max,lag_6,rolling_max_weekday,rolling_std,rolling_mean
0,Big Street ATM,429200.0,429200,429200.0,429200.0,110305.974453,2.988000e+05
1,Mount Road ATM,377900.0,983100,377900.0,377900.0,220819.202064,4.999467e+05
2,Airport ATM,315400.0,1034000,315400.0,315400.0,272711.985435,5.019806e+05
3,KK Nagar ATM,1290200.0,1710500,1290200.0,1290200.0,289313.606662,1.059403e+06
4,Christ College ATM,427100.0,748100,427100.0,427100.0,146317.422066,4.707613e+05
...,...,...,...,...,...,...,...
7929,Big Street ATM,472600.0,513800,558900.0,558900.0,43164.819780,4.492571e+05
7930,Mount Road ATM,263500.0,447400,251100.0,337100.0,105204.449480,2.632172e+05
7931,Airport ATM,446150.0,709900,659400.0,462600.0,145034.454035,5.251759e+05
7932,KK Nagar ATM,609100.0,1175200,583700.0,611800.0,355719.502926,6.707793e+05


In [103]:
agg_data.to_csv('../data/aggregated_data_extra_metrics.csv', index=False)