In [1]:
import pandas as pd

In [2]:
audit = pd.read_csv('audit.csv')
bank_activity = pd.read_csv('bank_activity.csv')
# Clean audit data
audit = audit[['Code', 'Description', 'Date Effective', ' Amount ']]
audit['Code'] = audit['Code'].astype(str)
audit['Description'] = audit['Description'].astype(str)
audit['Date Effective'] = pd.to_datetime(audit['Date Effective'])

# Convert Amount to float, handling () for negatives
audit['Amount'] = audit[' Amount '].str.replace('$', '', regex=False)
audit['Amount'] = audit['Amount'].str.replace(',', '', regex=False)
audit['Amount'] = audit['Amount'].apply(lambda x: -float(x.strip().strip('()')) if '(' in str(x) else float(x.strip()))
audit = audit.drop(" Amount ", axis=1)
# Clean bank activity data
bank_activity = bank_activity[['Date', ' Amount ', 'Transaction Detail']]
bank_activity['Date'] = pd.to_datetime(bank_activity['Date'])
bank_activity["Amount"] = bank_activity[" Amount "].str.replace("$", "", regex=False)
bank_activity["Amount"] = bank_activity["Amount"].str.replace(",", "", regex=False)
bank_activity["Amount"] = bank_activity["Amount"].apply(lambda x: -float(str(x).strip().strip("()")) if "(" in str(x) else float(str(x).strip()))
bank_activity = bank_activity.drop(" Amount ", axis=1)
# Create cc_provider column based on Transaction Detail
def get_cc_provider(detail):
    if 'SHIFT4' in str(detail) and 'PYMT' in str(detail):
        return 'shift4'
    elif 'AMERICAN EXPRESSSETTLEMENT' in str(detail):
        return 'amex'
    return None

# Map credit card codes to shift4
shift4_codes = ['visa', 'mc', 'svis', 'disc']
audit.loc[audit['Code'].str.lower().isin(shift4_codes), 'Code'] = 'shift4'


bank_activity['cc_provider'] = bank_activity['Transaction Detail'].apply(get_cc_provider)
bank_activity = bank_activity.drop("Transaction Detail", axis=1)

In [None]:
# Group audit data by Code and Date
audit_grouped = audit.groupby(['Code', 'Date Effective'])['Amount'].sum().reset_index()
bank_activity = bank_activity.groupby(["cc_provider", "Date"])['Amount'].sum().reset_index()
# Group bank activity by date and cc_provider 
# Join the dataframes on date, matching audit Date Effective with bank Date
# Also match audit Code with bank cc_provider
# Create empty list to store matches
matches = []

# For each audit record
for _, audit_row in audit_grouped.iterrows():
    code = audit_row['Code']
    date_effective = audit_row['Date Effective'] 
    audit_amount = audit_row['Amount']
    
    # Find bank transactions after the audit date with matching code/provider
    date_matches = bank_activity['Date'] >= date_effective
    code_matches = bank_activity['cc_provider'] == code.lower()
    amount_matches = abs(bank_activity['Amount'].round(2) - round(audit_amount, 2)) < 0.01
    
    # Debug prints for matching conditions

    
    potential_matches = bank_activity[date_matches & code_matches & amount_matches]
    
    
    if not potential_matches.empty:
        match = potential_matches.iloc[0]
        matches.append({
            'Audit_Date': date_effective,
            'Bank_Date': match['Date'],
            'Code': code,
            'Amount': audit_amount,
            'Days_to_Clear': (match['Date'] - date_effective).days
        })

# Convert matches to DataFrame
matches_df = pd.DataFrame(matches)


In [5]:
print(matches_df)

   Audit_Date  Bank_Date    Code    Amount  Days_to_Clear
0  2024-12-01 2024-12-03    amex   4305.94              2
1  2024-12-03 2024-12-05    amex  11107.54              2
2  2024-12-04 2024-12-06    amex  57545.91              2
3  2024-12-06 2024-12-09    amex   1467.31              3
4  2024-12-07 2024-12-09    amex   3728.45              2
5  2024-12-10 2024-12-12    amex  10613.37              2
6  2024-12-12 2024-12-16    amex   5958.50              4
7  2024-12-15 2024-12-17    amex  10114.18              2
8  2024-12-20 2024-12-23    amex   3449.04              3
9  2024-12-21 2024-12-23    amex  16758.05              2
10 2024-12-22 2024-12-24    amex   4059.58              2
11 2024-12-23 2024-12-26    amex   2039.08              3
12 2024-12-26 2024-12-30    amex   1020.70              4
13 2024-12-27 2024-12-30    amex  12498.16              3
14 2024-12-29 2024-12-31    amex   2144.26              2
15 2024-12-03 2024-12-05  shift4  22882.14              2
16 2024-12-04 

In [8]:
# Create a new DataFrame with audit records and bank records 2 days later
audit_bank_pairs = []

for _, audit_row in audit_grouped.iterrows():
    code = audit_row['Code']
    audit_date = audit_row['Date Effective']
    audit_amount = audit_row['Amount']
    
    # Look for bank transactions exactly 2 days after audit date
    bank_matches = bank_activity[
        (bank_activity['Date'] == audit_date + pd.Timedelta(days=2)) &
        (bank_activity['cc_provider'] == code.lower())
    ]
    
    if not bank_matches.empty:
        for _, bank_row in bank_matches.iterrows():
            audit_bank_pairs.append({
                'Audit_Date': audit_date,
                'Bank_Date': bank_row['Date'],
                'Code': code,
                'Audit_Amount': float(audit_amount),
                'Bank_Amount': float(bank_row['Amount']),
                'variance': round(float(abs(audit_amount - bank_row['Amount'])), 2)
            })

pairs_df = pd.DataFrame(audit_bank_pairs)
print("\nAudit and Bank Transactions (Bank Date = Audit Date + 2 days):")
print(pairs_df)



Audit and Bank Transactions (Bank Date = Audit Date + 2 days):
   Audit_Date  Bank_Date    Code  Audit_Amount  Bank_Amount  variance
0  2024-12-01 2024-12-03    amex       4305.94      4305.94      0.00
1  2024-12-02 2024-12-04    amex       3979.17      2190.36   1788.81
2  2024-12-03 2024-12-05    amex      11107.54     11107.54      0.00
3  2024-12-04 2024-12-06    amex      57545.91     57545.91      0.00
4  2024-12-07 2024-12-09    amex       3728.45      1467.31   2261.14
5  2024-12-07 2024-12-09    amex       3728.45      3728.45      0.00
6  2024-12-07 2024-12-09    amex       3728.45     19133.72  15405.27
7  2024-12-08 2024-12-10    amex      11508.48     15256.20   3747.72
8  2024-12-09 2024-12-11    amex      16526.56     12778.84   3747.72
9  2024-12-10 2024-12-12    amex      10613.37     10613.37      0.00
10 2024-12-11 2024-12-13    amex       2206.51      2688.46    481.95
11 2024-12-14 2024-12-16    amex       1027.12      1020.21      6.91
12 2024-12-14 2024-12-16  