**Init**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
from pathlib import Path
import os
import datetime
import time
import warnings
from sklearn.preprocessing import LabelEncoder
warnings.filterwarnings('ignore')

**Read Data**

In [None]:

df1 = pd.read_parquet('/content/drive/MyDrive/merged_data.parquet')
df1.head()


Unnamed: 0,CUST_CUSTNO,ACC_BUSINESSTYPE,ACC_ACCNO,ACC_BUSINESSNO,ACC_CURRENCYISO,BUSINESSNO_TRANS,FK_CURRENCY,AMOUNT,AMOUNTORIG,BRANCH_OFFICE,...,REASON4,TR_FLAG_01,ANALYTICAL_TRANS_CODE,TR_SP_01,TR_SP_02,TR_SP_03,TR_SP_04,TR_SP_05,TR_SP_10,VALUEDATE
0,1655bf1b772d90df34c51ba9240f683d3847700b330295...,CASA,0c643de92622a5f3bdb856d4471510b234be53fb0ab018...,0553d33b8bbc8e3a944ff438a51a17cfb38fd0241667af...,VND,2023120391980769,VND,-650.0,-650.0,110000,...,,N,1|2,e4180c1cc088ebea7f7a882144f55159a8dddad215334f...,MOB,HOI SO SHB,FP,,,2023-12-03
1,1655bf1b772d90df34c51ba9240f683d3847700b330295...,CASA,0c643de92622a5f3bdb856d4471510b234be53fb0ab018...,0553d33b8bbc8e3a944ff438a51a17cfb38fd0241667af...,VND,2023120391467506,VND,-34.0,-34.0,110000,...,,N,1|2,b403758c4a7b9fbc934464efaaaab53c7e24f0b68891f7...,MOB,HOI SO SHB,FP,,,2023-12-03
2,1b781f92156f274627a131e707e7a890548aea921b84f5...,CASA,df774ebe64129d3a6cf3af75e7de8d01ca85e92b2f2c1c...,0553d33b8bbc8e3a944ff438a51a17cfb38fd0241667af...,VND,2023120391730083,VND,120000.0,120000.0,110000,...,,Y,1|1,22527ba60412b7170a3dfcd831805f966b244b354d1607...,NET,HOI SO SHB,FP,,,2023-12-03
3,53048c1afff7f2cff6e1469fd1659f99ec93696df7a3ce...,CASA,2715ec2c8fc66cc32a6accc328d63843fb0dcd5565cd59...,0553d33b8bbc8e3a944ff438a51a17cfb38fd0241667af...,VND,2023120391677652,VND,-62.0,-62.0,110000,...,,N,1|2,fb05380821892df14964a30cbe3fd795e0cb225c8da13b...,MOB,HOI SO SHB,FP,,,2023-12-03
4,5abad7746ef69e998247ba7848ca6224dd106e30018237...,CASA,a5dc6ed17504c973d61d1ea548eb1945ae698847f3cb5c...,0553d33b8bbc8e3a944ff438a51a17cfb38fd0241667af...,VND,2023120391519660,VND,-1500.0,-1500.0,110000,...,,N,1|2,54ff7b7a4f04b0a6e7de5abd7a4473ec7831e5579431a3...,MOB,HOI SO SHB,FP,,,2023-12-03


In [None]:
# Define the rule: antecedents and consequents
antecedents = {"ACC_BUSINESSTYPE"}
consequents = {"TR_SP_02", "AMOUNTORIG", "REASON1", "AMOUNT", "ACC_CURRENCYISO"}

# Combine antecedents and consequents into one set for column selection
columns_to_show = list(antecedents | consequents)
# Additional columns to include
additional_columns = ["VALUEDATE", "CUST_CUSTNO"]


**Validate the identified rules against criteria**

In [None]:

# Combine antecedents, consequents, and additional columns into one list for column selection
columns_to_show = list(antecedents | consequents) + additional_columns

# Find examples from the data that satisfy the antecedents
# Ensure the columns exist in the DataFrame
if set(antecedents).issubset(df1.columns) and set(consequents).issubset(df1.columns):
    antecedent_examples = df1[df1[list(antecedents)].notnull().all(axis=1)]

    # Check if the consequents are also present in the examples
    consequent_examples = antecedent_examples[antecedent_examples[list(consequents)].notnull().all(axis=1)]

    # Select unique rows based on all columns
    consequent_examples_unique = consequent_examples.drop_duplicates(subset=columns_to_show)

    # Show the head of the unique consequent examples with only the specified columns
    consequent_examples_head = consequent_examples_unique[columns_to_show]

else:
    print("Some antecedents or consequents are not present in the DataFrame columns.")

In [None]:
consequent_examples_head

Unnamed: 0,AMOUNTORIG,TR_SP_02,AMOUNT,REASON1,ACC_BUSINESSTYPE,ACC_CURRENCYISO,VALUEDATE,CUST_CUSTNO
0,-650.00,MOB,-650.00,X ngay don dep XXh XXp,CASA,VND,2023-12-03,1655bf1b772d90df34c51ba9240f683d3847700b330295...
1,-34.00,MOB,-34.00,xit kinh khan lau kinh,CASA,VND,2023-12-03,1655bf1b772d90df34c51ba9240f683d3847700b330295...
2,120000.00,NET,120000.00,Co mai chuyen(GD#XXXXXX),CASA,VND,2023-12-03,1b781f92156f274627a131e707e7a890548aea921b84f5...
3,-62.00,MOB,-62.00,CHUYEN KHOAN TA CONG BANG,CASA,VND,2023-12-03,53048c1afff7f2cff6e1469fd1659f99ec93696df7a3ce...
4,-1500.00,MOB,-1500.00,tam ung XXX,CASA,VND,2023-12-03,5abad7746ef69e998247ba7848ca6224dd106e30018237...
...,...,...,...,...,...,...,...,...
12306256,-90884.71,BRC,-90884.71,tdXXXpt: Debit to PRIN acco,TIDE,VND,2024-01-18,3cc57965cd0814da93ad54bcebb030a1535daeeabb57f6...
12306257,90884.71,BRC,90884.71,Deposit Booking TD-M? tai k,TIDE,VND,2024-01-18,3cc57965cd0814da93ad54bcebb030a1535daeeabb57f6...
12306258,-90000.00,BRC,-90000.00,Liquidation Principal Payme,TIDE,VND,2024-01-18,3cc57965cd0814da93ad54bcebb030a1535daeeabb57f6...
12306259,-0.04,BRC,-0.04,Application Entry Accrual A,TIDE,VND,2024-01-18,3cc57965cd0814da93ad54bcebb030a1535daeeabb57f6...


In [None]:
# Sort by AMOUNT and filter by VALUEDATE
consequent_examples_sorted = consequent_examples_head.sort_values('AMOUNT', ascending=False)
consequent_examples_filtered = consequent_examples_sorted[consequent_examples_sorted['VALUEDATE'] == '2023-12-05']

# Display unique CUST_CUSTNO
unique_cust_custno = consequent_examples_filtered.drop_duplicates(subset='CUST_CUSTNO')
unique_cust_custno.head(10)

Unnamed: 0,AMOUNTORIG,TR_SP_02,AMOUNT,REASON1,ACC_BUSINESSTYPE,ACC_CURRENCYISO,VALUEDATE,CUST_CUSTNO
1686369,2500000000.0,BRC,2500000000.0,NGAVTX/DUONGNH - Y/C BAN VO,CASA,VND,2023-12-05,4c9a21532c77256b35197dc53db54243139fbc39261024...
1797000,2000000000.0,BRC,2000000000.0,CT KIM LONG VIET CK CT AN H,CASA,VND,2023-12-05,84da7ef354174252021e10431da64dae7b513b660a0bcb...
2310452,2000000000.0,BRC,2000000000.0,XXXXXXXXXXXXXXXX,LOAN,VND,2023-12-05,f82a9d25673478a95ae762bb6ee313385eb3c2da73da25...
2184980,2000000000.0,BRC,2000000000.0,- GIAI NGAN XXXXXXXXXXXXXXX,CASA,VND,2023-12-05,6567a448ec70441baea13fb4f8be538144ef16cb92fabd...
1721116,985000000.0,BRC,985000000.0,CT THANH HIEP PHAT CT CTY P,CASA,VND,2023-12-05,7af6d692c1b382f16bb13079687250a8e6a4980fb02b34...
2064220,985000000.0,BRC,985000000.0,- GIAI NGAN XXXXXXXXXXXXXXX,CASA,VND,2023-12-05,2befae32918c4416d9e4b018c10660d53ffddbba2129a3...
2312155,900000000.0,BRC,900000000.0,XXXXXXXXXXXXXXXX,LOAN,VND,2023-12-05,58dc7d3b59af303ad55105acd45068c8d73e3e3300f76e...
1549443,36000000.0,BRC,873360000.0,MM VIETINBANK XXXXX,CASA,USD,2023-12-05,3445fd72e0c2224374abdce982eeb1faa3430b6a7812d6...
2313313,650000000.0,BRC,650000000.0,|XXXXXXXXXX|XXX|XXX|VND|XXX,MMMM,VND,2023-12-05,86130ba867ffe0bc1d4e7fed99722b835b5fdbfc52dd92...
1742573,600000000.0,BRC,600000000.0,SHB CHUYEN TIEN THEO HD MM,CASA,VND,2023-12-05,6d681af72190882763ad5115560d38bb219881aa0c68f6...


In [None]:
case_match_rule = pd.read_parquet('/content/drive/MyDrive/CASE_MATCH_RULE.parquet')

In [None]:
# prompt: from case_match_rule display the cust_custno of the VALUEDATE=20231206

case_match_rule[case_match_rule['VALUEDATE'] == 20231206]['CUST_CUSTNO']


0     2364153489c634ae8a1de3d487cc4c0f2891b6123e730d...
1     a4cb361bfe2ab09dbe7fc0c30b87d83a46eccd1c8423c2...
2     5c8728aa4043558a10865b6408aeeb0dfa1b6d79ceed74...
3     19e62bc66e3ffa95948d1946cf3545a28f44e3ad85d3b3...
4     674f16ed9ca84049f9125a8b57db84b2ee6ddfc03a9d21...
5     30913de2ecd92e973fb8ca0d914450e557490588fced2a...
6     be2f8dea44c54015f5ab428f8643d3027f20ff2a1759d1...
7     e68e5a2d4ea30c9e5557026f378ead5e7d607d5084f682...
8     b3817c6ea313efe316f26a706114cf80818d0fd79c3ee3...
9     b9efe736a3ef082f84caaa40b3b38ed50613c264814970...
10    ea3471277b141c10817f14aebfa8fa3fab2f2931b74499...
11    1a025a3ce4c294b9e050e0b8e81d15b0cfbb5c7bd6fd7c...
12    26da03eb4339f280dda355fde43db842ee402abfe6d49d...
Name: CUST_CUSTNO, dtype: object

**RULE1**

In [None]:
# prompt: find if these CUST_CUSTNO are presnet in unique_cust_custno

matched_customers = set(unique_cust_custno['CUST_CUSTNO']) & set(case_match_rule[case_match_rule['VALUEDATE'] == 20231206]['CUST_CUSTNO'])

if matched_customers:
    print("The following CUST_CUSTNO are present in both unique_cust_custno and case_match_rule:")
    print(matched_customers)
else:
    print("No CUST_CUSTNO are present in both unique_cust_custno and case_match_rule.")


The following CUST_CUSTNO are present in both unique_cust_custno and case_match_rule:
{'30913de2ecd92e973fb8ca0d914450e557490588fced2a42344aabe1905fa220', '2364153489c634ae8a1de3d487cc4c0f2891b6123e730dbea66b764be5769c69', 'b3817c6ea313efe316f26a706114cf80818d0fd79c3ee3fce9d4233739ebd5d5', '5c8728aa4043558a10865b6408aeeb0dfa1b6d79ceed74040a45eb675bc6aaa0', 'a4cb361bfe2ab09dbe7fc0c30b87d83a46eccd1c8423c2b7a1a251792f665c5b', '26da03eb4339f280dda355fde43db842ee402abfe6d49dc16c00891d44c70cbe', 'ea3471277b141c10817f14aebfa8fa3fab2f2931b74499b2f71e298769a2ae91', 'b9efe736a3ef082f84caaa40b3b38ed50613c264814970f97c591f30ac250974', 'be2f8dea44c54015f5ab428f8643d3027f20ff2a1759d1e9647b01aaaba6f130', 'e68e5a2d4ea30c9e5557026f378ead5e7d607d5084f682397f68a020ae3876f6', '674f16ed9ca84049f9125a8b57db84b2ee6ddfc03a9d218252ad68408dad05f8', '19e62bc66e3ffa95948d1946cf3545a28f44e3ad85d3b3e2d30738b1087e80dc', '1a025a3ce4c294b9e050e0b8e81d15b0cfbb5c7bd6fd7cf4a893f790c5df54b2'}


In [None]:


matched_customers_df = unique_cust_custno[unique_cust_custno['CUST_CUSTNO'].isin(matched_customers)]

matched_customers_df




Unnamed: 0,AMOUNTORIG,TR_SP_02,AMOUNT,REASON1,ACC_BUSINESSTYPE,ACC_CURRENCYISO,VALUEDATE,CUST_CUSTNO
2347947,417046.58,BRC,417046.58,Deposit Booking TD-M? tai k,TIDE,VND,2023-12-05,ea3471277b141c10817f14aebfa8fa3fab2f2931b74499...
1926883,249893.0,BRC,249893.0,TIEN CUA NGUYEN THI THANH T,CASA,VND,2023-12-05,2364153489c634ae8a1de3d487cc4c0f2891b6123e730d...
2315621,150000.0,BRC,150000.0,PHAN THI HUONG GUI TIET KI,TIDE,VND,2023-12-05,26da03eb4339f280dda355fde43db842ee402abfe6d49d...
2346957,120000.0,BRC,120000.0,NGUYEN THI PHUONG LIEN GUI,TIDE,VND,2023-12-05,1a025a3ce4c294b9e050e0b8e81d15b0cfbb5c7bd6fd7c...
2314324,104211.51,BRC,104211.51,Deposit Booking TD-M? tai k,TIDE,VND,2023-12-05,a4cb361bfe2ab09dbe7fc0c30b87d83a46eccd1c8423c2...
1872741,58100.0,NET,58100.0,DVXXXXX chup anh studio vip,CASA,VND,2023-12-05,674f16ed9ca84049f9125a8b57db84b2ee6ddfc03a9d21...
1677357,55000.0,NET,55000.0,Thanh toan QR HOANG CHI PHU,CASA,VND,2023-12-05,be2f8dea44c54015f5ab428f8643d3027f20ff2a1759d1...
1777129,5000.0,NET,5000.0,NGUYEN HOANG QUY chuyen kho,CASA,VND,2023-12-05,e68e5a2d4ea30c9e5557026f378ead5e7d607d5084f682...
2304820,640.0,NET,640.0,QR - chuyen tien(GD#XXXXXX),CASA,VND,2023-12-05,30913de2ecd92e973fb8ca0d914450e557490588fced2a...
1783398,640.0,NET,640.0,IBVCB.XXXXXXXXXX.XXXXXX.ckX,CASA,VND,2023-12-05,b3817c6ea313efe316f26a706114cf80818d0fd79c3ee3...


**RULE3**

In [None]:
# Define the rule: antecedents and consequents
antecedents = {"AMOUNTORIG", "VALUEDATE", "ACC_BUSINESSTYPE"}
consequents = {"AMOUNT", "BUSINESSNO_TRANS", "ACC_CURRENCYISO", "AMOUNTORIG", "TR_SP_01", "ACC_BUSINESSTYPE"}

In [None]:
#Combine antecedents and consequents into one list for column selection
columns_to_show = list(antecedents | consequents)

# Ensure the columns exist in the DataFrame
if set(antecedents).issubset(df1.columns) and set(consequents).issubset(df1.columns):
    # Filter rows where all antecedents are not null
    antecedent_examples = df1[df1[list(antecedents)].notnull().all(axis=1)]

    # Further filter rows where all consequents are not null
    consequent_examples = antecedent_examples[antecedent_examples[list(consequents)].notnull().all(axis=1)]

    # Select unique rows based on antecedents and consequents columns
    consequent_examples_unique = consequent_examples.drop_duplicates(subset=columns_to_show)

In [None]:
consequent_examples_unique[columns_to_show]

Unnamed: 0,AMOUNTORIG,BUSINESSNO_TRANS,VALUEDATE,ACC_CURRENCYISO,ACC_BUSINESSTYPE,TR_SP_01,AMOUNT
0,-650.00,2023120391980769,2023-12-03,VND,CASA,e4180c1cc088ebea7f7a882144f55159a8dddad215334f...,-650.00
1,-34.00,2023120391467506,2023-12-03,VND,CASA,b403758c4a7b9fbc934464efaaaab53c7e24f0b68891f7...,-34.00
2,120000.00,2023120391730083,2023-12-03,VND,CASA,22527ba60412b7170a3dfcd831805f966b244b354d1607...,120000.00
3,-62.00,2023120391677652,2023-12-03,VND,CASA,fb05380821892df14964a30cbe3fd795e0cb225c8da13b...,-62.00
4,-1500.00,2023120391519660,2023-12-03,VND,CASA,54ff7b7a4f04b0a6e7de5abd7a4473ec7831e5579431a3...,-1500.00
...,...,...,...,...,...,...,...
12306257,90884.71,2024011831142753,2024-01-18,VND,TIDE,c0f1dd790cf90366ba9557e61edc1a2ab19516b333f519...,90884.71
12306258,-90000.00,2024011831142237,2024-01-18,VND,TIDE,08346e77718dea7f1b8a19d74ef66c637c46217a1b902a...,-90000.00
12306259,-0.04,2024011831176514,2024-01-18,VND,TIDE,08346e77718dea7f1b8a19d74ef66c637c46217a1b902a...,-0.04
12306260,0.01,2024011831178989,2024-01-18,VND,TIDE,58671ba3498b8bbe7caf3270969015054d4b1f7fbce69c...,0.01


In [None]:
 # Convert VALUEDATE to datetime
consequent_examples_unique['VALUEDATE'] = pd.to_datetime(consequent_examples_unique['VALUEDATE'])

In [None]:
consequent_examples_unique['VALUEDATE']

0          2023-12-03
1          2023-12-03
2          2023-12-03
3          2023-12-03
4          2023-12-03
              ...    
12306257   2024-01-18
12306258   2024-01-18
12306259   2024-01-18
12306260   2024-01-18
12306261   2024-01-18
Name: VALUEDATE, Length: 12156187, dtype: datetime64[ns]

In [None]:

# Group by ACC_BUSINESSTYPE and resample for 3 days
account_transaction_counts = consequent_examples_unique.groupby('ACC_BUSINESSTYPE').resample('3D', on='VALUEDATE').size().reset_index(name='transaction_count')

In [None]:
account_transaction_counts

Unnamed: 0,ACC_BUSINESSTYPE,VALUEDATE,transaction_count
0,CARD,2023-10-04,7555
1,CARD,2023-10-07,273
2,CARD,2023-10-10,0
3,CARD,2023-10-13,0
4,CARD,2023-10-16,0
...,...,...,...
211,TIDE,2024-01-05,0
212,TIDE,2024-01-08,0
213,TIDE,2024-01-11,0
214,TIDE,2024-01-14,107842


In [None]:
# Filter accounts with at least 300 transactions within 3 days
high_transaction_accounts = account_transaction_counts[account_transaction_counts['transaction_count'] >= 300]


In [None]:
high_transaction_accounts

Unnamed: 0,ACC_BUSINESSTYPE,VALUEDATE,transaction_count
0,CARD,2023-10-04,7555
20,CARD,2023-12-03,7539
21,CARD,2023-12-06,1440
34,CARD,2024-01-14,2220
35,CARD,2024-01-17,3083
36,CASA,2023-10-04,2329541
37,CASA,2023-10-07,1215898
56,CASA,2023-12-03,2222015
57,CASA,2023-12-06,1529000
70,CASA,2024-01-14,2553200


In [None]:


high_transaction_accounts.sort_values(by='transaction_count', ascending=False).head(3)


Unnamed: 0,ACC_BUSINESSTYPE,VALUEDATE,transaction_count
70,CASA,2024-01-14,2553200
36,CASA,2023-10-04,2329541
56,CASA,2023-12-03,2222015


In [None]:
# display for high_transaction_accounts their associate BUSINESSNO_TRANS

high_transaction_accounts_businessno = (
    consequent_examples_unique[consequent_examples_unique['ACC_BUSINESSTYPE'].isin(high_transaction_accounts['ACC_BUSINESSTYPE'])]
    .groupby('ACC_BUSINESSTYPE')['BUSINESSNO_TRANS']
    .apply(list)
    .reset_index(name='BUSINESSNO_TRANS')
)
high_transaction_accounts_businessno


Unnamed: 0,ACC_BUSINESSTYPE,BUSINESSNO_TRANS
0,CARD,"[2023120392081199, 2023120392081200, 202312039..."
1,CASA,"[2023120391980769, 2023120391467506, 202312039..."
2,LOAN,"[2023120392080607, 2023120392080590, 202312039..."
3,MMMM,"[2023120392081074, 2023120392080991, 202312039..."
4,TIDE,"[2023120392069331, 2023120392079690, 202312039..."


Error: Runtime no longer has a reference to this dataframe, please re-run this cell and try again.


In [None]:
 # Display  from consequent_examples_unique  unique CUST_CUSTNO for high transaction accounts

unique_cust_custno_high_transactions = consequent_examples_unique[consequent_examples_unique['ACC_BUSINESSTYPE'].isin(high_transaction_accounts['ACC_BUSINESSTYPE'])]['CUST_CUSTNO'].unique()

print("Unique CUST_CUSTNO for high transaction accounts:")
unique_cust_custno_high_transactions


Unique CUST_CUSTNO for high transaction accounts:


<StringArray>
['1655bf1b772d90df34c51ba9240f683d3847700b330295a077d17e40491d1c28',
 '1b781f92156f274627a131e707e7a890548aea921b84f5ddec59322ebc61e86d',
 '53048c1afff7f2cff6e1469fd1659f99ec93696df7a3ce309aef278bfed89856',
 '5abad7746ef69e998247ba7848ca6224dd106e300182370090564f0cae691746',
 'bd9cdd7aec72f3ebbc490454b8c5d091a49e8e96fe4469f91233b7296e018ef7',
 'f8b0aee94f5aed9ffdc703e91f74898fe4aba2cfff5c444ace301849074c540b',
 '376c930170d7358228e8c449a5c03f45ef062ffc68f2bf5120a51343d6486a1e',
 '48271b5c6301ee628b43e19d684dacc20e7faad21bf89cea8f256c1419f9632a',
 'd8c66dd860da8f007bd693d2c46bd463d121e3631fb1fd3481f124047f53cff2',
 'b9d057750d1ffa2969f3a5b50de16e5a24604e2779009e3e575e3183189721b2',
 ...
 'c5a25940b20cb40f65de97686a6530c7557cfc368abf5ce47e1ce72f968e4095',
 'ec78abd0407155f3de726be0b921ce55bfe97c1b80d900e2f3b8aa00b16be748',
 'b87198be732a249cea6d97db44ca63f122ec2210858aaa2330112da83c0cd776',
 '76c9a174b1833566b0d4e9a498f74ae916fc64c02455dbbae698787a60b44bc2',
 'c3ec3b83e85b9

In [None]:

# Get the transaction count for each unique CUST_CUSTNO
transaction_count_by_customer = consequent_examples_unique[consequent_examples_unique['CUST_CUSTNO'].isin(unique_cust_custno_high_transactions)].groupby('CUST_CUSTNO').size().reset_index(name='transaction_count')

# Merge the transaction count with the VALUEDATE
transaction_count_by_customer_with_date = transaction_count_by_customer.merge(consequent_examples_unique[['CUST_CUSTNO', 'VALUEDATE']], on='CUST_CUSTNO')

# Filter accounts with at least 300 transactions within 3 days
high_transaction_customers = transaction_count_by_customer_with_date[transaction_count_by_customer_with_date['transaction_count'] >= 300]

# Display the unique CUST_CUSTNO, transaction_count, and VALUEDATE
print("Unique CUST_CUSTNO for high transaction accounts with transaction count and VALUEDATE:")
high_transaction_customers


Unique CUST_CUSTNO for high transaction accounts with transaction count and VALUEDATE:


Unnamed: 0,CUST_CUSTNO,transaction_count,VALUEDATE
1189,000975b0ee367856502b174994023821120d5730b3fcd3...,320,2023-12-04
1190,000975b0ee367856502b174994023821120d5730b3fcd3...,320,2023-12-04
1191,000975b0ee367856502b174994023821120d5730b3fcd3...,320,2023-12-04
1192,000975b0ee367856502b174994023821120d5730b3fcd3...,320,2023-12-04
1193,000975b0ee367856502b174994023821120d5730b3fcd3...,320,2023-12-05
...,...,...,...
12154603,fff4e2d27dec94ba868aa82f51e97511392058b7efd6c7...,1389,2023-10-08
12154604,fff4e2d27dec94ba868aa82f51e97511392058b7efd6c7...,1389,2023-10-08
12154605,fff4e2d27dec94ba868aa82f51e97511392058b7efd6c7...,1389,2023-10-08
12154606,fff4e2d27dec94ba868aa82f51e97511392058b7efd6c7...,1389,2023-10-08


In [None]:
# prompt: from high_transaction_customers  print unique CUST_CUSTNO,ACC_BUSINESSTYPE,VALUEDATE

high_transaction_customers.drop_duplicates()


Unnamed: 0,CUST_CUSTNO,transaction_count,VALUEDATE
1189,000975b0ee367856502b174994023821120d5730b3fcd3...,320,2023-12-04
1193,000975b0ee367856502b174994023821120d5730b3fcd3...,320,2023-12-05
1225,000975b0ee367856502b174994023821120d5730b3fcd3...,320,2023-12-06
1254,000975b0ee367856502b174994023821120d5730b3fcd3...,320,2023-12-07
1268,000975b0ee367856502b174994023821120d5730b3fcd3...,320,2023-10-04
...,...,...,...
12154136,fff4e2d27dec94ba868aa82f51e97511392058b7efd6c7...,1389,2023-10-05
12154257,fff4e2d27dec94ba868aa82f51e97511392058b7efd6c7...,1389,2023-10-06
12154379,fff4e2d27dec94ba868aa82f51e97511392058b7efd6c7...,1389,2023-10-07
12154523,fff4e2d27dec94ba868aa82f51e97511392058b7efd6c7...,1389,2023-10-08


In [None]:

# case_match_rule[case_match_rule['VALUEDATE'] == 20240116]['CUST_CUSTNO']

case_match_rule[case_match_rule['VALUEDATE'] == 20240116]['CUST_CUSTNO']


15    503072fa3980f522f81d5720f25142f2d2960b41643475...
16    a0e3688d92237235a399c098a48c13c0507c1b336d341c...
17    e17d2ae1ad00907c508e04d988904b89d62875e6bb841e...
18    617196c7b93c981791bfb6892e7a2bb63a240e75d1cdba...
19    024ef66ebd2fac218811de275110b6c793b776b34dd5a1...
20    4971502cb38fcc7ee07b17bf1c5772ac2fc144c6e5deac...
21    4572fd1b7a4f96c6c13882b898e91d519405ef748c5492...
22    ba0757b1ce434c24bfa044b53ce3fc6a9d50beb0c27318...
23    77b8bc8841aba5fb1e2c6cfb3aa8bceceb15b069cf40da...
24    4c7f66d9a38e746b89ddfde1a25f709940d3e72cf0aab4...
25    c84307d18ee26d92a41e172cedc5631a7821e711a5a199...
26    7c02571f395592ccd2f78f5bbca300cb3888370a83ca4e...
27    4054910a516adefb5cc4bafc34eac6ab77eda11e1474cb...
28    bc68f19f9dc7f0ca233743261ae1f9cb36082b56de5037...
29    48decbc75f75851069a5ae8f189a7a72af132d438512cc...
30    da5f804a88dc5c4e282d74f343ccaad88bf41da471bef7...
31    e238c07c52cf23f5aad8cf1bf3151f5c2d0eee74e5ed3d...
32    a7a63c851e93a1a423225d6509a3326e2f126c9ca7

In [None]:
#  check if these CUST_CUSTNO are present in unique_cust_custno_high_transactions

matched_customers_high_transactions = set(unique_cust_custno_high_transactions) & set(case_match_rule[case_match_rule['VALUEDATE'] == 20240116]['CUST_CUSTNO'])

if matched_customers_high_transactions:
    print("The following CUST_CUSTNO are present in both unique_cust_custno_high_transactions and case_match_rule for 20240116:")
    print(matched_customers_high_transactions)
else:
    print("No CUST_CUSTNO are present in both unique_cust_custno_high_transactions and case_match_rule for 20240116.")


The following CUST_CUSTNO are present in both unique_cust_custno_high_transactions and case_match_rule for 20240116:
{'c84307d18ee26d92a41e172cedc5631a7821e711a5a19990b9011442f89ae39e', '4054910a516adefb5cc4bafc34eac6ab77eda11e1474cb3236bb129ee6e2fc83', 'e238c07c52cf23f5aad8cf1bf3151f5c2d0eee74e5ed3d87f6f14ac33436263e', '4c7f66d9a38e746b89ddfde1a25f709940d3e72cf0aab4d176b2c9be99f1103c', '503072fa3980f522f81d5720f25142f2d2960b41643475ee618a0ae9eac3f390', 'd6f0ee33b751b64709e9baf70aa51f52016a56333c779b2f9647cff832ecc873', '617196c7b93c981791bfb6892e7a2bb63a240e75d1cdba473cac0a4459494177', 'da5f804a88dc5c4e282d74f343ccaad88bf41da471bef7706c0c3484cd2d9d51', '77b8bc8841aba5fb1e2c6cfb3aa8bceceb15b069cf40da10ec0b42301e116e02', 'a7a63c851e93a1a423225d6509a3326e2f126c9ca77c0abcb5c007922f7ac877', '7c02571f395592ccd2f78f5bbca300cb3888370a83ca4e7a2c1c0223265d19f1', '48decbc75f75851069a5ae8f189a7a72af132d438512cc84ed6667c908773004', '4572fd1b7a4f96c6c13882b898e91d519405ef748c5492f8be1e3934ba5c6574'

In [None]:

matched_customers_high_transactions

{'024ef66ebd2fac218811de275110b6c793b776b34dd5a1fcb898819c9cd0ba47',
 '4054910a516adefb5cc4bafc34eac6ab77eda11e1474cb3236bb129ee6e2fc83',
 '4572fd1b7a4f96c6c13882b898e91d519405ef748c5492f8be1e3934ba5c6574',
 '48decbc75f75851069a5ae8f189a7a72af132d438512cc84ed6667c908773004',
 '4971502cb38fcc7ee07b17bf1c5772ac2fc144c6e5deacd6e82fd820f91818bd',
 '4c7f66d9a38e746b89ddfde1a25f709940d3e72cf0aab4d176b2c9be99f1103c',
 '503072fa3980f522f81d5720f25142f2d2960b41643475ee618a0ae9eac3f390',
 '617196c7b93c981791bfb6892e7a2bb63a240e75d1cdba473cac0a4459494177',
 '77b8bc8841aba5fb1e2c6cfb3aa8bceceb15b069cf40da10ec0b42301e116e02',
 '7c02571f395592ccd2f78f5bbca300cb3888370a83ca4e7a2c1c0223265d19f1',
 'a0e3688d92237235a399c098a48c13c0507c1b336d341c309604a1481c43c202',
 'a7a63c851e93a1a423225d6509a3326e2f126c9ca77c0abcb5c007922f7ac877',
 'ba0757b1ce434c24bfa044b53ce3fc6a9d50beb0c27318bfa08769a7f4c29ca1',
 'bc68f19f9dc7f0ca233743261ae1f9cb36082b56de50375e8ccedec06a92e313',
 'c84307d18ee26d92a41e172cedc5631a