In [12]:
import pandas as pd

# Load the data from CSV files
dim_data = pd.read_csv('dim.csv')
fact_data = pd.read_csv('fact.csv')

# Define the constants for transaction codes
PAYMENT_TYPE_CODE = 'OPTP0028'  # Code for payment transactions
SPENDING_TYPE_CODE = 'OPTP0000'  # Code for spending transactions

# Convert transaction time to datetime object for easier manipulation
fact_data['TXN_TM'] = pd.to_datetime(fact_data['TXN_TM'])

# Sort the transactions by time to ensure the order is correct for subsequent operations
fact_data_sorted = fact_data.sort_values(by='TXN_TM')

# Helper function to get the statement period based on a transaction date
def get_statement_period(txn_date):
    # Statement period starts on the 22nd of the previous month and ends on the 21st of the transaction month
    statement_start = txn_date.replace(day=22, hour=00, minute=00, second=00) - pd.DateOffset(months=1)
    statement_end = txn_date.replace(day=21, hour=00, minute=00, second=00)
    return statement_start, statement_end

# Helper function to calculate the due date of payment
def get_payment_due_date(txn_date):
    # Payment due date is the 5th of the month following the transaction month
    due_date = txn_date.replace(day=5) + pd.DateOffset(months=1)
    return due_date

# Add a column for the statement period start and end, and the payment due date to the payments dataframe
payments = fact_data_sorted[fact_data_sorted['TML_WEB_AP_NM'] == PAYMENT_TYPE_CODE].copy()
payments[['STATEMENT_START', 'STATEMENT_END']] = payments['TXN_TM'].apply(
    lambda x: pd.Series(get_statement_period(x))
)
payments['PAYMENT_DUE_DATE'] = payments['TXN_TM'].apply(get_payment_due_date)


In [13]:
payments

Unnamed: 0,CIF hash,CARD_NBR hash,TXN_TM,TML_WEB_AP_NM,DSC,NET_CASH_FLOW_AMT_LCY,TXN_SRC_ID,CARD_CLASSCIFICATION,STATEMENT_START,STATEMENT_END,PAYMENT_DUE_DATE
6379,815e4858d422f45f27ff703fce8acfed,af446dd0dce35b7c0d687b32466726c89239b37dd8fcf9...,2022-05-31 21:23:35,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,1520565.0,272976284.0,VC,2022-04-22,2022-05-21,2022-06-05 21:23:35
18669,73842a366de67e8d76320590e6a6ced8,5001c0d5c425bdfdcd5108671045068a43e3012dc4faf7...,2022-06-01 12:54:21,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,3500000.0,273092292.0,VC,2022-05-22,2022-06-21,2022-07-05 12:54:21
623,0828e14ddfd5dcfe9b2fc7a54eeba5f8,f0b80252f29a51ae72a1e4a259ca6857824f00e62cc868...,2022-06-06 15:57:12,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,143912.0,274393715.0,VC,2022-05-22,2022-06-21,2022-07-05 15:57:12
478,815e4858d422f45f27ff703fce8acfed,af446dd0dce35b7c0d687b32466726c89239b37dd8fcf9...,2022-06-17 09:54:55,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,737193.0,277175045.0,VC,2022-05-22,2022-06-21,2022-07-05 09:54:55
163,ae8bbf4b121fef182aae98c767899e83,582e14d16ee8c857a5df5d450fe3dcdb189bf214c4e0bd...,2022-06-18 12:34:21,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,1000000.0,277483815.0,VC,2022-05-22,2022-06-21,2022-07-05 12:34:21
...,...,...,...,...,...,...,...,...,...,...,...
4570,4245fc82e1c81cecfc821752609a8289,7bc66484f2e2db4c181a076a11450b7d0359478c57dd21...,2023-08-21 11:36:58,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,492411.0,410460621.0,VC,2023-07-22,2023-08-21,2023-09-05 11:36:58
19531,26345247ea23ba0d58eba8853a75658d,4db6066839d92a38d7a82a66516964ceecc86581318339...,2023-08-21 18:46:23,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,2000000.0,410653981.0,VC,2023-07-22,2023-08-21,2023-09-05 18:46:23
9036,626ef5a51add74b6f85c885fc3063527,e2b6431a93ca2afde0c086e989446eb9db12b42673fe0c...,2023-08-21 20:00:50,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,291000.0,410687567.0,VC,2023-07-22,2023-08-21,2023-09-05 20:00:50
18488,badd4c90538da80a6d854ae5db51e292,2590c054745a1a0676d2f60b3145622445f49c97994548...,2023-08-21 20:13:44,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,1512439.0,410693054.0,VC,2023-07-22,2023-08-21,2023-09-05 20:13:44


In [22]:
# Calculate the statement balance for each customer-card-statement period
statement_balances = payments.groupby(['CIF hash', 'CARD_NBR hash', 'STATEMENT_START', 'STATEMENT_END'])
statement_balances = statement_balances['NET_CASH_FLOW_AMT_LCY'].sum().reset_index(name='STATEMENT_BALANCE')

# Merge the statement balances with the credit limit information
# statement_balances = statement_balances.merge(dim_data, on=['CIF hash', 'CARD_NBR hash'])

In [23]:
statement_balances

Unnamed: 0,CIF hash,CARD_NBR hash,STATEMENT_START,STATEMENT_END,STATEMENT_BALANCE
0,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-10-22,2022-11-21,16060000.0
1,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-12-22,2023-01-21,17000000.0
2,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2023-01-22,2023-02-21,17000000.0
3,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2023-03-22,2023-04-21,16995716.0
4,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2023-04-22,2023-05-21,17000000.0
...,...,...,...,...,...
676,fabbc6f82889b39ea2529bcd3134ede2,a8ea8bc7dd239166636d3626765c69e5a056c57b0ffd1e...,2023-01-22,2023-02-21,4188933.0
677,fabbc6f82889b39ea2529bcd3134ede2,a8ea8bc7dd239166636d3626765c69e5a056c57b0ffd1e...,2023-03-22,2023-04-21,2253937.0
678,fabbc6f82889b39ea2529bcd3134ede2,a8ea8bc7dd239166636d3626765c69e5a056c57b0ffd1e...,2023-05-22,2023-06-21,4692171.0
679,fb57dffb7f96a780b33ba00ba0f2d8c3,a466359e5b3742757bbbf27f9e33386f0f0c746fd6bdc8...,2023-06-22,2023-07-21,13740851.0


In [19]:
statement_balances = payments.groupby(['CIF hash', 'CARD_NBR hash', 'STATEMENT_START', 'STATEMENT_END'])
statement_balances.head(5)

Unnamed: 0,CIF hash,CARD_NBR hash,TXN_TM,TML_WEB_AP_NM,DSC,NET_CASH_FLOW_AMT_LCY,TXN_SRC_ID,CARD_CLASSCIFICATION,STATEMENT_START,STATEMENT_END,PAYMENT_DUE_DATE
6379,815e4858d422f45f27ff703fce8acfed,af446dd0dce35b7c0d687b32466726c89239b37dd8fcf9...,2022-05-31 21:23:35,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,1520565.0,272976284.0,VC,2022-04-22,2022-05-21,2022-06-05 21:23:35
18669,73842a366de67e8d76320590e6a6ced8,5001c0d5c425bdfdcd5108671045068a43e3012dc4faf7...,2022-06-01 12:54:21,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,3500000.0,273092292.0,VC,2022-05-22,2022-06-21,2022-07-05 12:54:21
623,0828e14ddfd5dcfe9b2fc7a54eeba5f8,f0b80252f29a51ae72a1e4a259ca6857824f00e62cc868...,2022-06-06 15:57:12,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,143912.0,274393715.0,VC,2022-05-22,2022-06-21,2022-07-05 15:57:12
478,815e4858d422f45f27ff703fce8acfed,af446dd0dce35b7c0d687b32466726c89239b37dd8fcf9...,2022-06-17 09:54:55,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,737193.0,277175045.0,VC,2022-05-22,2022-06-21,2022-07-05 09:54:55
163,ae8bbf4b121fef182aae98c767899e83,582e14d16ee8c857a5df5d450fe3dcdb189bf214c4e0bd...,2022-06-18 12:34:21,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,1000000.0,277483815.0,VC,2022-05-22,2022-06-21,2022-07-05 12:34:21
...,...,...,...,...,...,...,...,...,...,...,...
11378,815e4858d422f45f27ff703fce8acfed,af446dd0dce35b7c0d687b32466726c89239b37dd8fcf9...,2023-08-21 10:51:24,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,1000000.0,410442059.0,VC,2023-07-22,2023-08-21,2023-09-05 10:51:24
19531,26345247ea23ba0d58eba8853a75658d,4db6066839d92a38d7a82a66516964ceecc86581318339...,2023-08-21 18:46:23,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,2000000.0,410653981.0,VC,2023-07-22,2023-08-21,2023-09-05 18:46:23
9036,626ef5a51add74b6f85c885fc3063527,e2b6431a93ca2afde0c086e989446eb9db12b42673fe0c...,2023-08-21 20:00:50,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,291000.0,410687567.0,VC,2023-07-22,2023-08-21,2023-09-05 20:00:50
18488,badd4c90538da80a6d854ae5db51e292,2590c054745a1a0676d2f60b3145622445f49c97994548...,2023-08-21 20:13:44,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,1512439.0,410693054.0,VC,2023-07-22,2023-08-21,2023-09-05 20:13:44
