In [73]:
import pandas as pd

# Load the data from CSV files
dim_data = pd.read_csv('dim.csv')
fact_data = pd.read_csv('fact.csv')

# Define the constants for transaction codes
PAYMENT_TYPE_CODE = 'OPTP0028'  # Code for payment transactions
SPENDING_TYPE_CODE = 'OPTP0000'  # Code for spending transactions

# Convert transaction time to datetime object for easier manipulation
fact_data['TXN_TM'] = pd.to_datetime(fact_data['TXN_TM'])

# Sort the transactions by time to ensure the order is correct for subsequent operations
fact_data_sorted = fact_data.sort_values(by='TXN_TM', ignore_index=True)

# Filter out invalid transaction
fact_data_sorted = fact_data_sorted.loc[~fact_data_sorted['TXN_SRC_ID'].isna()]

# Helper function to get the statement period based on a transaction date
def get_statement_period(txn_date):
    # Statement period starts on the 22nd of the previous month and ends on the 21st of the transaction month
    statement_start = txn_date.replace(day=22, hour=00, minute=00, second=00) - pd.DateOffset(months=1)
    statement_end = txn_date.replace(day=21, hour=00, minute=00, second=00)
    return statement_start, statement_end

# Helper function to calculate the due date of payment
def get_payment_due_date(txn_date):
    # Payment due date is the 5th of the month following the transaction month
    due_date = txn_date.replace(day=5, hour=00, minute=00, second=00) + pd.DateOffset(months=1)
    return due_date

# Add a column for the statement period start and end, and the payment due date to the payments dataframe
payments = fact_data_sorted[fact_data_sorted['TML_WEB_AP_NM'] == PAYMENT_TYPE_CODE].copy()
payments[['STATEMENT_START', 'STATEMENT_END']] = payments['TXN_TM'].apply(
    lambda x: pd.Series(get_statement_period(x))
)
payments['PAYMENT_DUE_DATE'] = payments['TXN_TM'].apply(get_payment_due_date)


In [36]:
payments.head(10)

Unnamed: 0,CIF hash,CARD_NBR hash,TXN_TM,TML_WEB_AP_NM,DSC,NET_CASH_FLOW_AMT_LCY,TXN_SRC_ID,CARD_CLASSCIFICATION,STATEMENT_START,STATEMENT_END,PAYMENT_DUE_DATE
88,815e4858d422f45f27ff703fce8acfed,af446dd0dce35b7c0d687b32466726c89239b37dd8fcf9...,2022-05-31 21:23:35,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,1520565.0,272976284.0,VC,2022-04-22,2022-05-21,2022-06-05
91,73842a366de67e8d76320590e6a6ced8,5001c0d5c425bdfdcd5108671045068a43e3012dc4faf7...,2022-06-01 12:54:21,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,3500000.0,273092292.0,VC,2022-05-22,2022-06-21,2022-07-05
118,0828e14ddfd5dcfe9b2fc7a54eeba5f8,f0b80252f29a51ae72a1e4a259ca6857824f00e62cc868...,2022-06-06 15:57:12,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,143912.0,274393715.0,VC,2022-05-22,2022-06-21,2022-07-05
286,815e4858d422f45f27ff703fce8acfed,af446dd0dce35b7c0d687b32466726c89239b37dd8fcf9...,2022-06-17 09:54:55,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,737193.0,277175045.0,VC,2022-05-22,2022-06-21,2022-07-05
306,ae8bbf4b121fef182aae98c767899e83,582e14d16ee8c857a5df5d450fe3dcdb189bf214c4e0bd...,2022-06-18 12:34:21,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,1000000.0,277483815.0,VC,2022-05-22,2022-06-21,2022-07-05
361,8005882b634ca522bf44b3e07d8752eb,419d53a19c0f2c9cca0cc4488430ac2469506cc1a9c429...,2022-06-21 19:01:07,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,1365940.0,278355313.0,VC,2022-05-22,2022-06-21,2022-07-05
491,ef6af2ae19ebeed14db05a8a262ac7de,eaf6483f18ac769868e8ee4361f7fdb47106f4a5c1db06...,2022-06-26 17:11:10,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,7097128.0,279678052.0,VC,2022-05-22,2022-06-21,2022-07-05
544,e64642c9ee2f4087a4297dbe2a3ce536,fa12763c857c9a7f96d93c7ee58509bdd007fe8d70d6c0...,2022-06-28 11:31:21,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,2100224.0,280127863.0,VC,2022-05-22,2022-06-21,2022-07-05
551,73842a366de67e8d76320590e6a6ced8,5001c0d5c425bdfdcd5108671045068a43e3012dc4faf7...,2022-06-28 14:29:07,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,9230113.0,280170464.0,VC,2022-05-22,2022-06-21,2022-07-05
574,73842a366de67e8d76320590e6a6ced8,5001c0d5c425bdfdcd5108671045068a43e3012dc4faf7...,2022-06-29 13:22:21,OPTP0028,TT QUA TPBANK EBANKING 44 LE NGOC HAN HA NOI V...,5111111.0,280419929.0,VC,2022-05-22,2022-06-21,2022-07-05


In [74]:
# Calculate the statement balance for each customer-card-statement period
statement_balances = payments.groupby(['CIF hash', 'CARD_NBR hash', 'STATEMENT_START', 'STATEMENT_END'])
statement_balances = statement_balances['NET_CASH_FLOW_AMT_LCY'].sum().reset_index(name='STATEMENT_BALANCE')
print(statement_balances.shape)
# Merge the statement balances with the credit limit information
statement_balances = statement_balances.merge(dim_data, on=['CIF hash', 'CARD_NBR hash'])
print(statement_balances.shape)

(681, 5)
(681, 6)


In [38]:
dim_data.shape

(110, 3)

In [79]:
statement_balances

Unnamed: 0,CIF hash,CARD_NBR hash,STATEMENT_START,STATEMENT_END,STATEMENT_BALANCE,CREDIT_LIMIT
0,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-10-22,2022-11-21,16060000.0,17000000
1,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-12-22,2023-01-21,17000000.0,17000000
2,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2023-01-22,2023-02-21,17000000.0,17000000
3,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2023-03-22,2023-04-21,16995716.0,17000000
4,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2023-04-22,2023-05-21,17000000.0,17000000
...,...,...,...,...,...,...
676,fabbc6f82889b39ea2529bcd3134ede2,a8ea8bc7dd239166636d3626765c69e5a056c57b0ffd1e...,2023-01-22,2023-02-21,4188933.0,8000000
677,fabbc6f82889b39ea2529bcd3134ede2,a8ea8bc7dd239166636d3626765c69e5a056c57b0ffd1e...,2023-03-22,2023-04-21,2253937.0,8000000
678,fabbc6f82889b39ea2529bcd3134ede2,a8ea8bc7dd239166636d3626765c69e5a056c57b0ffd1e...,2023-05-22,2023-06-21,4692171.0,8000000
679,fb57dffb7f96a780b33ba00ba0f2d8c3,a466359e5b3742757bbbf27f9e33386f0f0c746fd6bdc8...,2023-06-22,2023-07-21,13740851.0,50000000


In [75]:
spendings = fact_data_sorted[fact_data_sorted['TML_WEB_AP_NM'] == SPENDING_TYPE_CODE].copy()
spendings[['STATEMENT_START', 'STATEMENT_END']] = spendings['TXN_TM'].apply(
    lambda x: pd.Series(get_statement_period(x))
)
spendings['PAYMENT_DUE_DATE'] = spendings['TXN_TM'].apply(get_payment_due_date)


In [77]:
spendings.loc[spendings['CIF hash']=='0340ab3837f34a1aa87d5b5a8a25a07e']

Unnamed: 0,CIF hash,CARD_NBR hash,TXN_TM,TML_WEB_AP_NM,DSC,NET_CASH_FLOW_AMT_LCY,TXN_SRC_ID,CARD_CLASSCIFICATION,STATEMENT_START,STATEMENT_END,PAYMENT_DUE_DATE
2589,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-09-19 21:52:59,OPTP0000,CTYVIMO*HOKINHDO 0824853333 704,15728000.0,303376660.0,VC,2022-08-22,2022-09-21,2022-10-05
2627,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-09-21 18:13:42,OPTP0000,Foody 19002042 704,1000.0,303934425.0,VC,2022-08-22,2022-09-21,2022-10-05
2628,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-09-21 18:13:45,OPTP0000,Foody 19002042 704,1000.0,303934425.0,VC,2022-08-22,2022-09-21,2022-10-05
2629,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-09-21 18:14:36,OPTP0000,Foody 19002042 704,78000.0,303934767.0,VC,2022-08-22,2022-09-21,2022-10-05
2970,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-09-27 13:53:45,OPTP0000,Foody 19002042 704,253000.0,305578899.0,VC,2022-08-22,2022-09-21,2022-10-05
4993,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-11-04 17:58:46,OPTP0000,TPBANK QPAY 57 LY THUONG KIET HA NOI VNM 704 0...,16000000.0,316781459.0,VC,2022-10-22,2022-11-21,2022-12-05
5339,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-11-11 11:24:05,OPTP0000,Foody 19002042 704,103000.0,318841726.0,VC,2022-10-22,2022-11-21,2022-12-05
5651,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-11-18 12:17:13,OPTP0000,Foody 19002042 704,183000.0,321000870.0,VC,2022-10-22,2022-11-21,2022-12-05
5724,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-11-19 11:54:08,OPTP0000,Foody 19002042 704,57900.0,321311937.0,VC,2022-10-22,2022-11-21,2022-12-05
5725,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-11-19 11:54:09,OPTP0000,Foody 19002042 704,57900.0,321311937.0,VC,2022-10-22,2022-11-21,2022-12-05


In [76]:
debt_balances = spendings.groupby(['CIF hash', 'CARD_NBR hash', 'STATEMENT_START', 'STATEMENT_END'])
debt_balances = debt_balances['NET_CASH_FLOW_AMT_LCY'].sum().reset_index(name='STATEMENT_BALANCE')
debt_balances = debt_balances.merge(dim_data, on=['CIF hash', 'CARD_NBR hash'])
print(debt_balances.shape)


(837, 6)


In [78]:
debt_balances

Unnamed: 0,CIF hash,CARD_NBR hash,STATEMENT_START,STATEMENT_END,STATEMENT_BALANCE,CREDIT_LIMIT
0,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-08-22,2022-09-21,16061000.0,17000000
1,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-10-22,2022-11-21,16884600.0,17000000
2,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-11-22,2022-12-21,44000.0,17000000
3,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2022-12-22,2023-01-21,17000000.0,17000000
4,0340ab3837f34a1aa87d5b5a8a25a07e,8a310933f4b7f835b19a0a8a2ab43ef865e49111d3fb0a...,2023-01-22,2023-02-21,17000000.0,17000000
...,...,...,...,...,...,...
832,fabbc6f82889b39ea2529bcd3134ede2,a8ea8bc7dd239166636d3626765c69e5a056c57b0ffd1e...,2023-06-22,2023-07-21,1090000.0,8000000
833,fabbc6f82889b39ea2529bcd3134ede2,a8ea8bc7dd239166636d3626765c69e5a056c57b0ffd1e...,2023-07-22,2023-08-21,338800.0,8000000
834,fb57dffb7f96a780b33ba00ba0f2d8c3,a466359e5b3742757bbbf27f9e33386f0f0c746fd6bdc8...,2023-05-22,2023-06-21,2967122.0,50000000
835,fb57dffb7f96a780b33ba00ba0f2d8c3,a466359e5b3742757bbbf27f9e33386f0f0c746fd6bdc8...,2023-06-22,2023-07-21,18048132.0,50000000


In [72]:
statement_balances.merge(debt_balances)

Unnamed: 0,CIF hash,CARD_NBR hash,TXN_TM,TML_WEB_AP_NM,DSC,NET_CASH_FLOW_AMT_LCY,TXN_SRC_ID,CARD_CLASSCIFICATION,STATEMENT_START,STATEMENT_END,PAYMENT_DUE_DATE
11,d46f9418182bc00b4d12d4eee5058c41,e3ec0045c51675ba5bd927204be7a68ae0e7dbb38d9493...,2022-05-22 02:54:08,OPTP0000,"TKTT MOBILE BANKING 57 LY THUONG KIET, HOAN KI...",200000.0,270573626.0,VC,2022-04-22,2022-05-21,2022-06-05
12,0deb82c7391489aa02c771fec3b67f0f,41874f1bdd5736ee9e3c884326e3137839484f3ab3a86b...,2022-05-22 08:45:26,OPTP0000,MOMO VN 1900545441 704,512200.0,270596125.0,VC,2022-04-22,2022-05-21,2022-06-05
13,0deb82c7391489aa02c771fec3b67f0f,41874f1bdd5736ee9e3c884326e3137839484f3ab3a86b...,2022-05-22 08:51:55,OPTP0000,MOMO VN 1900545441 704,512200.0,270597152.0,VC,2022-04-22,2022-05-21,2022-06-05
14,0deb82c7391489aa02c771fec3b67f0f,41874f1bdd5736ee9e3c884326e3137839484f3ab3a86b...,2022-05-22 08:56:27,OPTP0000,MOMO VN 1900545441 704,308200.0,270597964.0,VC,2022-04-22,2022-05-21,2022-06-05
15,4f5a39e1204e049da7edf68b8f5f3414,27b838994beccba6db7ddc83a6b75f97c0a8bcadb8f815...,2022-05-22 09:08:59,OPTP0000,TIKI 842873058454 704,610000.0,270600572.0,VC,2022-04-22,2022-05-21,2022-06-05
...,...,...,...,...,...,...,...,...,...,...,...
19555,8b0cd88c599475377a006f9535233825,fddd87622ba07539c2b1a1689ce4fa67725ceda0e70fdb...,2023-08-21 22:48:15,OPTP0000,THE GIOI DI DONG 0838125960 704,30000.0,410757699.0,VC,2023-07-22,2023-08-21,2023-09-05
19556,8b0cd88c599475377a006f9535233825,fddd87622ba07539c2b1a1689ce4fa67725ceda0e70fdb...,2023-08-21 23:03:08,OPTP0000,THE GIOI DI DONG 0838125960 704,30000.0,410760486.0,VC,2023-07-22,2023-08-21,2023-09-05
19558,626ef5a51add74b6f85c885fc3063527,e2b6431a93ca2afde0c086e989446eb9db12b42673fe0c...,2023-08-21 23:25:09,OPTP0000,"TKTT MOBILE BANKING 57 LY THUONG KIET, HOAN KI...",136850.0,410770114.0,VC,2023-07-22,2023-08-21,2023-09-05
19559,626ef5a51add74b6f85c885fc3063527,e2b6431a93ca2afde0c086e989446eb9db12b42673fe0c...,2023-08-21 23:26:21,OPTP0000,MPOS*HKDTOPTEPMART Q BA DINH 704,3070000.0,410770270.0,VC,2023-07-22,2023-08-21,2023-09-05
