# Payment day

Según las investigaciones de Asad et al. (2020), Gorodetskaya et al. (2021) y Riabykh et al. (2022) los anticipos y fechas de pago son variables que influyen en la demanda de los ATM. Por lo que en este archivo se calcularán y agregarán estas variables.

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
data = pd.read_csv('../data/aggregated_data_cleaning_complete.csv')
data.head()

Unnamed: 0,ATM Name,Transaction Date,No Of Withdrawals,No Of XYZ Card Withdrawals,No Of Other Card Withdrawals,Total amount Withdrawn,Amount withdrawn XYZ Card,Amount withdrawn Other Card,Type,Weekday,Holiday Sequence,isYesterdayHoliday,isHoliday,isTomorrowHoliday,isYesterdayWeekday,isTomorrowWeekday,isWeekday
0,Big Street ATM,2020-01-01,125,26,99,429200,75500,353700,Restricted Holiday,WEDNESDAY,WHH,False,True,True,True,True,True
1,Mount Road ATM,2020-01-01,144,49,95,377900,155200,222700,Restricted Holiday,WEDNESDAY,WHH,False,True,True,True,True,True
2,Airport ATM,2020-01-01,78,58,20,315400,269100,46300,Restricted Holiday,WEDNESDAY,WHH,False,True,True,True,True,True
3,KK Nagar ATM,2020-01-01,268,176,92,1290200,927700,362500,Restricted Holiday,WEDNESDAY,WHH,False,True,True,True,True,True
4,Christ College ATM,2020-01-01,95,39,56,427100,235700,191400,Restricted Holiday,WEDNESDAY,WHH,False,True,True,True,True,True


In [4]:
data[['ATM Name', 'Transaction Date', 'isWeekday']].loc[
    (data["ATM Name"] == "Big Street ATM") | 
    (data["ATM Name"] == "Mount Road ATM")
    ].head(20)

Unnamed: 0,ATM Name,Transaction Date,isWeekday
0,Big Street ATM,2020-01-01,True
1,Mount Road ATM,2020-01-01,True
5,Big Street ATM,2020-01-02,True
6,Mount Road ATM,2020-01-02,True
10,Big Street ATM,2020-01-03,True
11,Mount Road ATM,2020-01-03,True
15,Big Street ATM,2020-01-04,False
16,Mount Road ATM,2020-01-04,False
20,Big Street ATM,2020-01-05,False
21,Mount Road ATM,2020-01-05,False


## Cálculo de día de pago (Fin de mes)

En perú, generalmente, la fecha de pago se realiza el último día del mes, siempre y cuando, sea un día laborable (día de semana desde lunes hasta viernes) y no sea feriado. En caso se incumpla estos criterios, se calcula la fecha anterior más cercana que sí cumpla.

In [5]:
payment_data_end_month = data.copy(deep=True)

# Convertimos la columna 'Transaction Date' a tipo datetime
payment_data_end_month['Transaction Date'] = pd.to_datetime(payment_data_end_month['Transaction Date'])
def calculate_payment_day(group):
    # Obtenemos el último día del mes
    last_day_of_month = group['Transaction Date'].max()

    # Verificamos si el último día del mes existe en el grupo
    if not group[group['Transaction Date'] == last_day_of_month].empty:
        # Si el último día del mes es laborable y no es feriado, ese es el día de pago
        if group.loc[group['Transaction Date'] == last_day_of_month, 'isWeekday'].values[0] and not group.loc[group['Transaction Date'] == last_day_of_month, 'isHoliday'].values[0]:
            group['isPaymentDay'] = group['Transaction Date'] == last_day_of_month
        else:
            # Si no es laborable o es feriado, buscamos hacia atrás hasta encontrar un día laborable y que no sea feriado
            for i in range(1, len(group)):
                check_date = last_day_of_month - pd.Timedelta(days=i)
                if not group[group['Transaction Date'] == check_date].empty:
                    if group.loc[group['Transaction Date'] == check_date, 'isWeekday'].values[0] and not group.loc[group['Transaction Date'] == check_date, 'isHoliday'].values[0]:
                        group['isPaymentDay'] = group['Transaction Date'] == check_date
                        break
            else:
                group['isPaymentDay'] = False
    else:
        group['isPaymentDay'] = False

    return group

# Agrupamos por mes y ATM para aplicar la lógica de cálculo de día de pago
payment_data_end_month['isPaymentDay'] = False

payment_data_end_month = payment_data_end_month.groupby([payment_data_end_month['Transaction Date'].dt.to_period('M'), 'ATM Name']).apply(calculate_payment_day)

# Convertimos de nuevo 'Transaction Date' a tipo string si es necesario
payment_data_end_month['Transaction Date'] = payment_data_end_month['Transaction Date'].dt.strftime('%Y-%m-%d')

# Mostramos el dataframe resultante
payment_data_end_month = payment_data_end_month.reset_index(drop=True)

pd.set_option('display.max_rows', 500)

payment_data_end_month[['ATM Name', 'Transaction Date', 'isWeekday', 'isHoliday', 'isPaymentDay']].loc[
    (payment_data_end_month['ATM Name'] == 'Airport ATM') &
    (pd.to_datetime(payment_data_end_month['Transaction Date']).dt.day > 27)
    ].head(10)


  payment_data_end_month = payment_data_end_month.groupby([payment_data_end_month['Transaction Date'].dt.to_period('M'), 'ATM Name']).apply(calculate_payment_day)


Unnamed: 0,ATM Name,Transaction Date,isWeekday,isHoliday,isPaymentDay
27,Airport ATM,2020-01-28,True,False,False
28,Airport ATM,2020-01-29,True,False,False
29,Airport ATM,2020-01-30,True,False,False
30,Airport ATM,2020-01-31,True,False,True
181,Airport ATM,2020-02-28,True,False,True
321,Airport ATM,2020-03-28,False,False,False
322,Airport ATM,2020-03-29,False,True,False
323,Airport ATM,2020-03-30,True,False,True
324,Airport ATM,2020-03-31,True,True,False
475,Airport ATM,2020-04-28,True,False,False


## Cálculo de día de pago (Quincena)

Ocasionalmente en Perú, los pagos se realizan en dos fechas, a quincena y a fin de mes, por lo que se replicará el funcionamiento del cálculo de dia de pago a fin de mes, pero en quincena.

In [6]:
payment_data_fortnight = data.copy(deep=True)

# Convertimos la columna 'Transaction Date' a tipo datetime
payment_data_fortnight['Transaction Date'] = pd.to_datetime(payment_data_fortnight['Transaction Date'])
def calculate_payment_day(group):
    # Obtenemos el primer día del mes para luego calcular el 15
    first_day_of_month = group['Transaction Date'].min().replace(day=1)
    
    # Calculamos el día 15 de ese mes
    fifteenth_day_of_month = first_day_of_month + pd.DateOffset(days=14)

    # Verificamos si el último día del mes existe en el grupo
    if not group[group['Transaction Date'] == fifteenth_day_of_month].empty:
        # Si el último día del mes es laborable y no es feriado, ese es el día de pago
        if group.loc[group['Transaction Date'] == fifteenth_day_of_month, 'isWeekday'].values[0] and not group.loc[group['Transaction Date'] == fifteenth_day_of_month, 'isHoliday'].values[0]:
            group['isPaymentDay'] = group['Transaction Date'] == fifteenth_day_of_month
        else:
            # Si no es laborable o es feriado, buscamos hacia atrás hasta encontrar un día laborable y que no sea feriado
            for i in range(1, len(group)):
                check_date = fifteenth_day_of_month - pd.Timedelta(days=i)
                if not group[group['Transaction Date'] == check_date].empty:
                    if group.loc[group['Transaction Date'] == check_date, 'isWeekday'].values[0] and not group.loc[group['Transaction Date'] == check_date, 'isHoliday'].values[0]:
                        group['isPaymentDay'] = group['Transaction Date'] == check_date
                        break
            else:
                group['isPaymentDay'] = False
    else:
        group['isPaymentDay'] = False

    return group

# Agrupamos por mes y ATM para aplicar la lógica de cálculo de día de pago
payment_data_fortnight['isPaymentDay'] = False

payment_data_fortnight = payment_data_fortnight.groupby([payment_data_fortnight['Transaction Date'].dt.to_period('M'), 'ATM Name']).apply(calculate_payment_day)

# Convertimos de nuevo 'Transaction Date' a tipo string si es necesario
payment_data_fortnight['Transaction Date'] = payment_data_fortnight['Transaction Date'].dt.strftime('%Y-%m-%d')

# Mostramos el dataframe resultante
payment_data_fortnight = payment_data_fortnight.reset_index(drop=True)

pd.set_option('display.max_rows', 500)

payment_data_fortnight[['ATM Name', 'Transaction Date', 'isWeekday', 'isHoliday', 'isPaymentDay']].loc[
    (payment_data_fortnight['ATM Name'] == 'Airport ATM') &
    (pd.to_datetime(payment_data_fortnight['Transaction Date']).dt.day > 12) &
    (pd.to_datetime(payment_data_fortnight['Transaction Date']).dt.day < 16)
    ].head(10)


  payment_data_fortnight = payment_data_fortnight.groupby([payment_data_fortnight['Transaction Date'].dt.to_period('M'), 'ATM Name']).apply(calculate_payment_day)


Unnamed: 0,ATM Name,Transaction Date,isWeekday,isHoliday,isPaymentDay
12,Airport ATM,2020-01-13,True,True,False
13,Airport ATM,2020-01-14,True,True,False
14,Airport ATM,2020-01-15,True,False,True
166,Airport ATM,2020-02-13,True,False,True
167,Airport ATM,2020-02-14,True,True,False
168,Airport ATM,2020-02-15,False,False,False
306,Airport ATM,2020-03-13,True,False,True
307,Airport ATM,2020-03-14,False,False,False
308,Airport ATM,2020-03-15,False,False,False
460,Airport ATM,2020-04-13,True,False,False


## Calculando semana de pago

Asad et al. (2020) indica que la semana de pago también podría ser más precisa que un día en específico, como fue calculado anteriormente, por lo que para calcular la semana de pago, en base al último día del mes y quincena, se capturará los 3 días anteriores y los 3 días en adelante, todos ellos serán considerados como la semana de pago.

In [7]:
data[['ATM Name', 'Transaction Date']]

Unnamed: 0,ATM Name,Transaction Date
0,Big Street ATM,2020-01-01
1,Mount Road ATM,2020-01-01
2,Airport ATM,2020-01-01
3,KK Nagar ATM,2020-01-01
4,Christ College ATM,2020-01-01
...,...,...
7929,Big Street ATM,2024-09-29
7930,Mount Road ATM,2024-09-29
7931,Airport ATM,2024-09-29
7932,KK Nagar ATM,2024-09-29


In [8]:
payment_data_week = data.copy(deep=True)

# Convertimos 'Transaction Date' a datetime si no lo es
payment_data_week['Transaction Date'] = pd.to_datetime(payment_data_week['Transaction Date'])

# Función para calcular si una fecha cae dentro de la ventana de fin de mes o quincena
def is_payweek(transaction_date):
    # Calculamos el fin del mes de la fecha actual, independientemente del día
    start_of_month =  pd.Timestamp(transaction_date.year, transaction_date.month, 1) + pd.offsets.MonthBegin(0)
    end_of_month = pd.Timestamp(transaction_date.year, transaction_date.month, 1) + pd.offsets.MonthEnd(0)
    mid_month = transaction_date.replace(day=15)
    
    # Rango de 3 días antes y 3 días después para quincena y fin de mes, incluyendo días del mes siguiente
    payweek_days_start = pd.date_range(start=start_of_month, periods=3)
    payweek_days_end = pd.date_range(end=end_of_month, periods=3)
    payweek_days_mid = pd.date_range(mid_month - pd.DateOffset(days=3), mid_month + pd.DateOffset(days=3), freq='D')

    # Verificamos si la fecha de transacción está en cualquiera de los dos rangos
    return transaction_date in payweek_days_end or transaction_date in payweek_days_mid or transaction_date in payweek_days_start


# Creamos la nueva columna 'isPayweek' usando la función
payment_data_week['isPayweek'] = payment_data_week['Transaction Date'].apply(is_payweek)

payment_data_week[['ATM Name', 'Transaction Date', 'isWeekday', 'isHoliday', 'isPayweek']].loc[
    (payment_data_week['ATM Name'] == 'Airport ATM')
    ].head(10)


Unnamed: 0,ATM Name,Transaction Date,isWeekday,isHoliday,isPayweek
2,Airport ATM,2020-01-01,True,True,True
7,Airport ATM,2020-01-02,True,False,True
12,Airport ATM,2020-01-03,True,False,True
17,Airport ATM,2020-01-04,False,False,False
22,Airport ATM,2020-01-05,False,False,False
27,Airport ATM,2020-01-06,True,False,False
32,Airport ATM,2020-01-07,True,False,False
37,Airport ATM,2020-01-08,True,False,False
42,Airport ATM,2020-01-09,True,False,False
47,Airport ATM,2020-01-10,True,False,False


## Combinando Pago a fin de mes, quincena y semana de pago

In [9]:
# Pago a fin de mes y quincena
temp_payment_data_end_month = payment_data_end_month['isPaymentDay']
temp_payment_data_fortnight = payment_data_fortnight['isPaymentDay']

agg_data = data.copy(deep = True)
agg_data['isPaymentDay'] = temp_payment_data_end_month | temp_payment_data_fortnight
agg_data[['ATM Name', 'isWeekday', 'isHoliday', 'Transaction Date', 'isPaymentDay']]

Unnamed: 0,ATM Name,isWeekday,isHoliday,Transaction Date,isPaymentDay
0,Big Street ATM,True,True,2020-01-01,False
1,Mount Road ATM,True,True,2020-01-01,False
2,Airport ATM,True,True,2020-01-01,False
3,KK Nagar ATM,True,True,2020-01-01,False
4,Christ College ATM,True,True,2020-01-01,False
...,...,...,...,...,...
7929,Big Street ATM,False,True,2024-09-29,False
7930,Mount Road ATM,False,True,2024-09-29,False
7931,Airport ATM,False,True,2024-09-29,True
7932,KK Nagar ATM,False,True,2024-09-29,False


In [10]:
# Agregando semana de pago
agg_data['isPayweek'] = payment_data_week['isPayweek']
agg_data[['ATM Name', 'isWeekday', 'isHoliday', 'Transaction Date', 'isPaymentDay', 'isPayweek']]

Unnamed: 0,ATM Name,isWeekday,isHoliday,Transaction Date,isPaymentDay,isPayweek
0,Big Street ATM,True,True,2020-01-01,False,True
1,Mount Road ATM,True,True,2020-01-01,False,True
2,Airport ATM,True,True,2020-01-01,False,True
3,KK Nagar ATM,True,True,2020-01-01,False,True
4,Christ College ATM,True,True,2020-01-01,False,True
...,...,...,...,...,...,...
7929,Big Street ATM,False,True,2024-09-29,False,True
7930,Mount Road ATM,False,True,2024-09-29,False,True
7931,Airport ATM,False,True,2024-09-29,True,True
7932,KK Nagar ATM,False,True,2024-09-29,False,True


In [11]:
agg_data.to_csv('../data/aggregated_data_payment.csv', index=False)