In [8]:
# Step 1: Import required libraries
import pandas as pd
import numpy as np
import re

# Step 2: Simulated financial transaction data
data = {
    'TransactionID': [1001, 1002, 1003, 1004, 1005],
    'AccountNumber': ['AC123', 'AC124', 'AC125', 'AC126', 'AC124'],
    'Amount': [1500.00, -200.00, 300.00, 0.00, 500.00],
    'TransactionDate': ['2024-05-01', '2024-05-02', '2024-05-03', '2024-05-03', 'InvalidDate'],
    'Currency': ['USD', 'USD', 'EUR', 'USD', 'USD']
}
df = pd.DataFrame(data)

# Step 3: Define accuracy checks

def check_positive_amounts(df):
    return df[df['Amount'] <= 0]

def check_valid_dates(df):
    try:
        return pd.to_datetime(df['TransactionDate'], errors='coerce').isna()
    except:
        return pd.Series([True] * len(df))

def check_duplicate_transactions(df):
    return df[df.duplicated(['TransactionID'])]

def check_duplicate_accounts(df):
    return df[df.duplicated(['AccountNumber'], keep=False)]

def check_valid_currency(df, valid_currencies=['USD', 'EUR', 'GBP']):
    return ~df['Currency'].isin(valid_currencies)

# Step 4: Apply checks
invalid_amounts = check_positive_amounts(df)
invalid_dates_mask = check_valid_dates(df)
invalid_dates = df[invalid_dates_mask]
duplicate_accounts = check_duplicate_accounts(df)
invalid_currencies = df[check_valid_currency(df)]

# Step 5: Print the results
print("Invalid Amounts (non-positive):")
print(invalid_amounts)

print("\nInvalid Dates:")
print(invalid_dates)

print("\nDuplicate Accounts:")
print(duplicate_accounts)

print("\nInvalid Currencies:")
print(invalid_currencies)

# Step 6: Accuracy Summary
total_checks = len(df)
results = {
    "Valid Transactions": total_checks - len(invalid_amounts) - invalid_dates_mask.sum() - len(invalid_currencies),
    "Invalid Amounts": len(invalid_amounts),
    "Invalid Dates": invalid_dates_mask.sum(),
    "Invalid Currencies": len(invalid_currencies),
    "Potential Duplicate Accounts": len(duplicate_accounts)
}

summary_df = pd.DataFrame(list(results.items()), columns=["Issue", "Count"])
print("\nAccuracy Summary:")
print(summary_df)

Invalid Amounts (non-positive):
   TransactionID AccountNumber  Amount TransactionDate Currency
1           1002         AC124  -200.0      2024-05-02      USD
3           1004         AC126     0.0      2024-05-03      USD

Invalid Dates:
   TransactionID AccountNumber  Amount TransactionDate Currency
4           1005         AC124   500.0     InvalidDate      USD

Duplicate Accounts:
   TransactionID AccountNumber  Amount TransactionDate Currency
1           1002         AC124  -200.0      2024-05-02      USD
4           1005         AC124   500.0     InvalidDate      USD

Invalid Currencies:
Empty DataFrame
Columns: [TransactionID, AccountNumber, Amount, TransactionDate, Currency]
Index: []

Accuracy Summary:
                          Issue  Count
0            Valid Transactions      2
1               Invalid Amounts      2
2                 Invalid Dates      1
3            Invalid Currencies      0
4  Potential Duplicate Accounts      2
