# A) Payment Dataflow

Goals:

Verify that tokenized_payments.csv exists.

Look for Primary Account Number-like sequences (13–19 digits) in all columns.

Check for token uniqueness.

Display counts by status.

Write outputs/validation_report.csv with the results.

In [4]:
from pathlib import Path
import re, datetime, csv, hashlib, json
import pandas as pd
BASE = Path('..').resolve()
print(BASE)

C:\Project\SafePay\01_Case\A_Payment_Dataflow


In [None]:
from pathlib import Path

out_dir = BASE / 'outputs'
reports = []

# Load tokenized outputs
tok_file = out_dir / 'tokenized_payments.csv'
pan_pattern = re.compile(r'\b\d{13,19}\b')
if tok_file.exists():
    tok = pd.read_csv(tok_file)
    reports.append({'check':'token_file_exists','result':True,'details':f'rows={len(tok)}'})
    # 1) Check PAN-like sequences
    def contains_pan_like(v):
        try:
            return bool(pan_pattern.search(str(v)))
        except:
            return False
    pan_found_cols = [col for col in tok.columns if tok[col].astype(str).apply(contains_pan_like).any()]
    if pan_found_cols:
        reports.append({'check':'pan_leak_detected','result':False,'details':f'PAN-like string found in columns: {pan_found_cols}'})
    else:
        reports.append({'check':'pan_leak_detected','result':True,'details':'No PAN-like sequences found in tokenized file'})
    # 2) token uniqueness
    total = len(tok)
    unique_tokens = tok['token'].nunique() if 'token' in tok.columns else 0
    reports.append({'check':'token_uniqueness','result': unique_tokens==total,'details':f'total={total}, unique={unique_tokens}'})
    # 3) basic counts by status...if present
    if 'status' in tok.columns:
        counts = tok['status'].value_counts().to_dict()
        reports.append({'check':'status_counts','result':True,'details':json.dumps(counts)})
else:
    reports.append({'check':'token_file_exists','result':False,'details':'tokenized_payments.csv not found'})

# validation report
vr = pd.DataFrame(reports)
out_file = out_dir / 'validation_report.csv'
vr.to_csv(out_file, index=False)
print('Validation report written to', out_file)
print(vr.to_string(index=False))


Validation report written to C:\Project\SafePay\01_Case\A_Payment_Dataflow\outputs\validation_report.csv
            check  result                                          details
token_file_exists    True                                        rows=1200
pan_leak_detected    True    No PAN-like sequences found in tokenized file
 token_uniqueness    True                          total=1200, unique=1200
    status_counts    True {"failed": 423, "success": 398, "declined": 379}
