In [1]:
# --- Cell 1: Env, tokens, Plaid client (works with v10+ or legacy SDK) ---
import os, json
from pathlib import Path
from datetime import date, timedelta
import pandas as pd

# dotenv is optional but recommended (already in requirements.txt)
try:
    from dotenv import load_dotenv, find_dotenv
except Exception:
    load_dotenv = None
    find_dotenv = None

def mask(s: str | None) -> str:
    if not s: return "<missing>"
    return (s[:4] + "…" + s[-4:]) if len(s) > 8 else "***"

# --- Resolve repo root (works from /, /scripts, notebooks, etc.) ---
cwd = Path.cwd().resolve()
candidates = [cwd, *cwd.parents]
repo_root = next((p for p in candidates if (p / ".git").exists() or p.name == "spending-dashboard"), cwd)

# --- Load .envs: absolute override first, then fallbacks ---
def load_envs():
    if load_dotenv is None:
        return
    abs_override = os.getenv("ENV_PATH", str(repo_root / "scripts" / ".env"))
    if abs_override and Path(abs_override).exists():
        load_dotenv(abs_override, override=False)

    for p in [
        repo_root / "scripts" / ".env",
        repo_root / ".env",
        repo_root / "config" / ".env",
        cwd / ".env",
    ]:
        if Path(p).exists():
            load_dotenv(p, override=False)

    if find_dotenv:
        found = find_dotenv(usecwd=True)
        if found:
            load_dotenv(found, override=False)

load_envs()

PLAID_CLIENT_ID = os.getenv("PLAID_CLIENT_ID")
PLAID_SECRET    = os.getenv("PLAID_SECRET")
PLAID_ENV       = (os.getenv("PLAID_ENV", "production") or "production").strip().lower()
alias = {"prod":"production", "live":"production", "dev":"development", "devel":"development", "sb":"sandbox"}
PLAID_ENV = alias.get(PLAID_ENV, PLAID_ENV)
if PLAID_ENV not in {"production", "development", "sandbox"}:
    PLAID_ENV = "production"

OUTPUT_DIR = Path(os.getenv("OUTPUT_DIR", str(repo_root / "data" / "raw")))
STATE_DIR  = Path(os.getenv("STATE_DIR",  str(repo_root / ".state")))
TOKENS_PATH = Path(os.getenv("TOKENS_PATH", str(STATE_DIR / "access_tokens.json")))

OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
assert PLAID_CLIENT_ID and PLAID_SECRET, "Missing PLAID_CLIENT_ID or PLAID_SECRET (check scripts/.env or GitHub Secrets)."

example_json = '{"Issuer":"access-..."}'
assert TOKENS_PATH.exists(), f"Missing tokens file at {TOKENS_PATH}. Expect JSON like: {example_json}"

with open(TOKENS_PATH, "r") as f:
    ACCESS_TOKENS = json.load(f)
assert isinstance(ACCESS_TOKENS, dict) and ACCESS_TOKENS, "access_tokens.json is empty/invalid (expect issuer→token map)."

print(
    "Env OK →",
    "PLAID_CLIENT_ID:", mask(PLAID_CLIENT_ID),
    "| PLAID_SECRET:", mask(PLAID_SECRET),
    "| PLAID_ENV:", PLAID_ENV,
    "| OUTPUT_DIR:", str(OUTPUT_DIR),
    "| TOKENS_PATH:", str(TOKENS_PATH),
)

# --- Plaid client init with SDK auto-detection ---
USE_PLAID_V10 = False
client = None

try:
    # Try modern SDK (v10+)
    from plaid.api import plaid_api
    from plaid.configuration import Configuration
    try:
        from plaid.configuration import Environment  # newer style
        env_host = {
            "production":  Environment.Production,
            "development": Environment.Development,
            "sandbox":     Environment.Sandbox,
        }[PLAID_ENV]
        config = Configuration(host=env_host)
    except Exception:
        # Fallback if Environment enum not present
        host_url = {
            "production":  "https://production.plaid.com",
            "development": "https://development.plaid.com",
            "sandbox":     "https://sandbox.plaid.com",
        }[PLAID_ENV]
        config = Configuration(host=host_url)

    from plaid.api_client import ApiClient
    config.api_key["clientId"] = PLAID_CLIENT_ID
    config.api_key["secret"]   = PLAID_SECRET
    api_client = ApiClient(config)
    client = plaid_api.PlaidApi(api_client)
    USE_PLAID_V10 = True
    print("Plaid SDK: v10+ (plaid_api)")
except Exception as e_v10:
    # Try legacy SDK
    try:
        from plaid import Client as LegacyClient
        client = LegacyClient(
            client_id=PLAID_CLIENT_ID,
            secret=PLAID_SECRET,
            environment=PLAID_ENV
        )
        USE_PLAID_V10 = False
        print("Plaid SDK: legacy Client()")
    except Exception as e_legacy:
        raise ImportError(
            "Could not initialize Plaid client. Ensure 'plaid-python' is installed. "
            f"v10 error: {e_v10}\nlegacy error: {e_legacy}"
        )


Env OK → PLAID_CLIENT_ID: 68bb…6689 | PLAID_SECRET: a605…7df5 | PLAID_ENV: production | OUTPUT_DIR: C:\Users\kosis\Downloads\Automation\spending-dashboard\data\raw | TOKENS_PATH: C:\Users\kosis\Downloads\Automation\spending-dashboard\.state\access_tokens.json
Plaid SDK: v10+ (plaid_api)


In [2]:
# --- Cell 2: Pull & consolidate transactions across all banks ---
from datetime import date, timedelta

DAYS_BACK = int(os.getenv("DAYS_BACK", "90"))
end_date = date.today()
start_date = end_date - timedelta(days=DAYS_BACK)

all_frames = []

if USE_PLAID_V10:
    # Modern SDK imports
    from plaid.model.transactions_get_request import TransactionsGetRequest

    def fetch_transactions(bank_name: str, access_token: str) -> pd.DataFrame:
        txns = []
        offset = 0
        while True:
            req = TransactionsGetRequest(
                access_token=access_token,
                start_date=start_date,
                end_date=end_date,
                options={"count": 500, "offset": offset}
            )
            resp = client.transactions_get(req).to_dict()
            txns.extend(resp.get("transactions", []))
            if len(txns) >= resp.get("total_transactions", 0):
                break
            offset = len(txns)

            # Safety: avoid infinite loops if API misreports
            if offset > 50_000:
                raise RuntimeError(f"Pagination runaway for {bank_name}")
        df = pd.DataFrame(txns)
        if not df.empty:
            df["bank_name"] = bank_name
        return df

else:
    # Legacy SDK path
    def fetch_transactions(bank_name: str, access_token: str) -> pd.DataFrame:
        txns = []
        offset = 0
        while True:
            resp = client.Transactions.get(
                access_token=access_token,
                start_date=start_date,
                end_date=end_date,
                options={"count": 500, "offset": offset}
            )
            total = resp["total_transactions"]
            txns.extend(resp["transactions"])
            if len(txns) >= total:
                break
            offset = len(txns)
            if offset > 50_000:
                raise RuntimeError(f"Pagination runaway for {bank_name}")
        df = pd.DataFrame(txns)
        if not df.empty:
            df["bank_name"] = bank_name
        return df

# Pull all banks
for bank_name, token in ACCESS_TOKENS.items():
    print(f"🔄 Fetching {bank_name}…")
    df_bank = fetch_transactions(bank_name, token)
    print(f"   → {len(df_bank):,} rows")
    all_frames.append(df_bank)

# Combine & light schema standardization for Power BI
combined = pd.concat([df for df in all_frames if df is not None and not df.empty], ignore_index=True) if all_frames else pd.DataFrame()

keep_cols = [
    "date",
    "name",
    "merchant_name",
    "category",
    "amount",
    "payment_channel",
    "pending",
    "account_id",
    "transaction_id",
    "bank_name",
]
available = [c for c in keep_cols if c in combined.columns]
if not combined.empty:
    combined = combined[available].sort_values("date", ascending=False).reset_index(drop=True)

print(f"✅ Pulled total {0 if combined.empty else len(combined):,} transactions across {len(ACCESS_TOKENS)} banks.")
combined.head(3)


🔄 Fetching Discover…
   → 29 rows
🔄 Fetching SSSCU…
   → 93 rows
🔄 Fetching Petal…
   → 26 rows
✅ Pulled total 148 transactions across 3 banks.


  combined = pd.concat([df for df in all_frames if df is not None and not df.empty], ignore_index=True) if all_frames else pd.DataFrame()


Unnamed: 0,date,name,merchant_name,category,amount,payment_channel,pending,account_id,transaction_id,bank_name
0,2025-09-08,Withdrawal AMEX EPAYMENT / TYPE: ACH PMT ID: 0...,,,777.78,other,False,wrJyLQqMX7TpyAXwRMKrimXrogEnMDiRzbxxm,Pk8ogy3JrxtMebqmOAE9fqAYO7ZaxXiaEnOOv,SSSCU
1,2025-09-08,Deposit Kiosk / WEALTHFRONT BROKERAGE LLC/WELL...,Wealthfront,,-500.0,other,False,wrJyLQqMX7TpyAXwRMKrimXrogEnMDiRzbxxm,6xnVwvbJYETqXgOk93x7uJb07JzjPrFV8mbKa,SSSCU
2,2025-09-07,Withdrawal Signature base / APPLE CASH SENT MO...,,,9.0,other,False,wrJyLQqMX7TpyAXwRMKrimXrogEnMDiRzbxxm,jDB5KQAE48cABg3n8rjqUKQkBVj1QYuvDkLom,SSSCU


In [3]:
# --- Cell 3: Light cleaning / types ---
if not combined.empty:
    # Ensure date is proper dtype for Power BI
    combined["date"] = pd.to_datetime(combined["date"], errors="coerce").dt.date

    # Keep a tidy subset if present
    keep_cols = [
        "date","name","merchant_name","category","amount",
        "payment_channel","pending","account_id","transaction_id","bank_name"
    ]
    combined = combined[[c for c in keep_cols if c in combined.columns]]

    # Sort newest first
    combined = combined.sort_values("date", ascending=False).reset_index(drop=True)

print(f"Columns → {list(combined.columns)}")
print(f"Rows → {len(combined):,}")


Columns → ['date', 'name', 'merchant_name', 'category', 'amount', 'payment_channel', 'pending', 'account_id', 'transaction_id', 'bank_name']
Rows → 148


In [4]:
# --- Cell 4: Write latest.csv ---
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
latest_path = OUTPUT_DIR / "latest.csv"

# Write CSV
combined.to_csv(latest_path, index=False)

print("✅ Saved:", latest_path)
print("🔎 Preview:")
display(combined.head(10))


✅ Saved: C:\Users\kosis\Downloads\Automation\spending-dashboard\data\raw\latest.csv
🔎 Preview:


Unnamed: 0,date,name,merchant_name,category,amount,payment_channel,pending,account_id,transaction_id,bank_name
0,2025-09-08,Withdrawal AMEX EPAYMENT / TYPE: ACH PMT ID: 0...,,,777.78,other,False,wrJyLQqMX7TpyAXwRMKrimXrogEnMDiRzbxxm,Pk8ogy3JrxtMebqmOAE9fqAYO7ZaxXiaEnOOv,SSSCU
1,2025-09-08,Deposit Kiosk / WEALTHFRONT BROKERAGE LLC/WELL...,Wealthfront,,-500.0,other,False,wrJyLQqMX7TpyAXwRMKrimXrogEnMDiRzbxxm,6xnVwvbJYETqXgOk93x7uJb07JzjPrFV8mbKa,SSSCU
2,2025-09-07,Withdrawal Signature base / APPLE CASH SENT MO...,,,9.0,other,False,wrJyLQqMX7TpyAXwRMKrimXrogEnMDiRzbxxm,jDB5KQAE48cABg3n8rjqUKQkBVj1QYuvDkLom,SSSCU
3,2025-09-06,Plaid,,,-1.0,other,True,M7aKOL7DqqtbbNRaemv1T3ozK1D87pcBjEdvx,km3MnPmDQQIvv38pV5douOJyQVNV66cp8mA9w,Discover
4,2025-09-06,SP DILOHOME.COM 2156459070,Dilohome,,21.36,online,False,M7aKOL7DqqtbbNRaemv1T3ozK1D87pcBjEdvx,ZNMm7jNbxxU55x19VD44SRVwDkVZE4UV8RyD7,Discover
5,2025-09-06,Plaid,Plaid Technologies Inc,,1.0,in store,True,M7aKOL7DqqtbbNRaemv1T3ozK1D87pcBjEdvx,Kbr3awb611FMMV3r8kg4tEL7Yexe88fLPjBZD,Discover
6,2025-09-06,Plaid,Plaid Technologies Inc,,1.0,in store,True,M7aKOL7DqqtbbNRaemv1T3ozK1D87pcBjEdvx,AjqYEMjDooUzzVDqAov9tJ6d3101YYHZLaENx,Discover
7,2025-09-06,Plaid,,,-1.0,other,True,M7aKOL7DqqtbbNRaemv1T3ozK1D87pcBjEdvx,yzZq9xzAQQt88ZywzOx4CEYbvwAwJJfD5VnLr,Discover
8,2025-09-05,Withdrawal Wealthfront / TYPE: EDI PYMNTS ID: ...,Wealthfront,,500.0,other,False,wrJyLQqMX7TpyAXwRMKrimXrogEnMDiRzbxxm,4mRVxkNaY0u16LeVwqaKIw37VMOj3BTDdzXpZ,SSSCU
9,2025-09-05,Withdrawal Signature base / APPLE CASH SENT MO...,,,4.74,other,False,wrJyLQqMX7TpyAXwRMKrimXrogEnMDiRzbxxm,rkj1aYXMN6tAPKJk0Mm8Ux0LdqMV0zFr1jvMg,SSSCU
