In [1]:
import os, json, csv, datetime, time
from pathlib import Path

# In GitHub Actions these come from Secrets; locally you can still keep a .env if you want
PLAID_ENV = os.getenv("PLAID_ENV", "production").lower()
PLAID_CLIENT_ID = os.getenv("PLAID_CLIENT_ID")
PLAID_SECRET = os.getenv("PLAID_SECRET")
ACCESS_TOKENS_RAW = os.getenv("PLAID_ACCESS_TOKENS")  # JSON string mapping bank->access_token

if PLAID_ENV not in ("production", "sandbox"):
    raise SystemExit(f"PLAID_ENV must be 'production' or 'sandbox' (got '{PLAID_ENV}')")

if not (PLAID_CLIENT_ID and PLAID_SECRET and ACCESS_TOKENS_RAW):
    raise SystemExit("Missing PLAID_CLIENT_ID / PLAID_SECRET / PLAID_ACCESS_TOKENS env vars")

try:
    ACCESS_TOKENS = json.loads(ACCESS_TOKENS_RAW)
    assert isinstance(ACCESS_TOKENS, dict) and ACCESS_TOKENS
except Exception as e:
    raise SystemExit(f"PLAID_ACCESS_TOKENS must be JSON object, e.g. {{\"Bank1\":\"access-...\"}}. Error: {e}")

# Folders/state
ROOT = Path(".")
RAW_DIR = ROOT / "data" / "raw"
STATE_DIR = ROOT / ".state"
RAW_DIR.mkdir(parents=True, exist_ok=True)
STATE_DIR.mkdir(parents=True, exist_ok=True)

CURSOR_PATH = STATE_DIR / "plaid_cursors.json"
cursors = json.loads(CURSOR_PATH.read_text()) if CURSOR_PATH.exists() else {}

today = datetime.date.today().isoformat()
print("Env OK. Banks:", list(ACCESS_TOKENS.keys()))

Env OK. Banks: ['Discover', 'SSSCU', 'Petal']


In [2]:
from plaid import ApiClient, Configuration
from plaid.api.plaid_api import PlaidApi
from plaid.model.transactions_sync_request import TransactionsSyncRequest
from plaid.model.transactions_get_request import TransactionsGetRequest
from plaid.model.transactions_get_request_options import TransactionsGetRequestOptions

BASE_URL = "https://production.plaid.com" if PLAID_ENV == "production" else "https://sandbox.plaid.com"
config = Configuration(host=BASE_URL, api_key={"clientId": PLAID_CLIENT_ID, "secret": PLAID_SECRET})
client = PlaidApi(ApiClient(config))
print("Plaid client ready:", BASE_URL)


Plaid client ready: https://production.plaid.com


In [3]:
def write_csv(bank: str, rows: list):
    if not rows:
        return None
    cols = [
        "account_id","transaction_id","authorized_date","date","name","merchant_name",
        "amount","iso_currency_code","pending","payment_channel","category","category_id"
    ]
    out_path = RAW_DIR / f"{today}_{bank}.csv"
    with out_path.open("w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=cols)
        w.writeheader()
        for t in rows:
            w.writerow({
                "account_id": t.get("account_id"),
                "transaction_id": t.get("transaction_id"),
                "authorized_date": t.get("authorized_date"),
                "date": t.get("date"),
                "name": t.get("name"),
                "merchant_name": t.get("merchant_name"),
                "amount": t.get("amount"),
                "iso_currency_code": t.get("iso_currency_code"),
                "pending": t.get("pending"),
                "payment_channel": t.get("payment_channel"),
                "category": "|".join(t.get("category") or []),
                "category_id": t.get("category_id"),
            })
    return out_path

def fetch_sync(access_token: str, cursor: str | None):
    """Use transactions/sync to get new/modified transactions since last cursor.
       On first run, OMIT the cursor field entirely."""
    from plaid.model.transactions_sync_request import TransactionsSyncRequest

    added, modified, removed = [], [], []
    next_cursor = cursor
    has_more = True

    while has_more:
        # Build request kwargs without cursor when it's None
        kwargs = {"access_token": access_token}
        if next_cursor is not None:
            kwargs["cursor"] = next_cursor

        req = TransactionsSyncRequest(**kwargs)
        res = client.transactions_sync(req).to_dict()

        added.extend(res.get("added", []))
        modified.extend(res.get("modified", []))
        removed.extend(res.get("removed", []))

        has_more = bool(res.get("has_more"))
        next_cursor = res.get("next_cursor")
        time.sleep(0.2)

    return added, modified, removed, next_cursor

In [4]:
any_changes = False

for bank, token in ACCESS_TOKENS.items():
    print(f"\n=== {bank} ===")
    cursor = cursors.get(bank)
    if cursor:
        print("Using existing cursor…")
        added, modified, removed, next_cursor = fetch_sync(token, cursor)
    else:
        print("No cursor yet. Doing initial sync (last ~30 days via /sync).")
        added, modified, removed, next_cursor = fetch_sync(token, None)

    print(f"Added: {len(added)}, Modified: {len(modified)}, Removed: {len(removed)}")
    out_path = write_csv(bank, added + modified)
    if out_path:
        print(f"Wrote: {out_path}")
        any_changes = True
    else:
        print("No new/modified transactions to write.")

    if next_cursor and next_cursor != cursor:
        cursors[bank] = next_cursor

# save cursors
CURSOR_PATH.write_text(json.dumps(cursors, indent=2), encoding="utf-8")
print(f"\nSaved cursors -> {CURSOR_PATH}")
print("Done. Changes:", any_changes)


=== Discover ===
Using existing cursor…


Added: 0, Modified: 0, Removed: 0
No new/modified transactions to write.

=== SSSCU ===
Using existing cursor…


Added: 0, Modified: 0, Removed: 0
No new/modified transactions to write.

=== Petal ===
Using existing cursor…


Added: 0, Modified: 0, Removed: 0
No new/modified transactions to write.

Saved cursors -> .state/plaid_cursors.json
Done. Changes: False
