In [1]:
# --- Cell 1: Env, paths, tokens ---
import os, json, re
from pathlib import Path
from datetime import datetime
import pandas as pd
import numpy as np

# Optional dotenv
try:
    from dotenv import load_dotenv, find_dotenv
except Exception:
    load_dotenv = None
    find_dotenv = None

def load_envs():
    if load_dotenv is None:
        return
    # Prefer scripts/.env if present
    here = Path.cwd().resolve()
    gw = os.getenv("GITHUB_WORKSPACE")
    start = Path(gw).resolve() if gw else here
    repo = next((p for p in [start, *start.parents] if (p / ".git").exists()), start)
    for p in [repo / "scripts" / ".env", repo / ".env", here / ".env"]:
        if p.exists():
            try:
                load_dotenv(str(p), override=False, encoding="utf-8")
            except TypeError:
                load_dotenv(str(p), override=False)
    if find_dotenv:
        found = find_dotenv(usecwd=True)
        if found:
            try:
                load_dotenv(found, override=False, encoding="utf-8")
            except TypeError:
                load_dotenv(found, override=False)

load_envs()

# Resolve repo root
cwd = Path.cwd().resolve()
gw = os.getenv("GITHUB_WORKSPACE")
REPO = next((p for p in [Path(gw).resolve() if gw else cwd, *cwd.parents] if (p / ".git").exists()), cwd)

# Paths
CONFIG_DIR = REPO / "config"
STATE_DIR  = REPO / ".state"
OUT_PATH   = CONFIG_DIR / "accounts_dim.csv"
STATE_DIR.mkdir(parents=True, exist_ok=True)
CONFIG_DIR.mkdir(parents=True, exist_ok=True)

# Env
PLAID_CLIENT_ID = os.getenv("PLAID_CLIENT_ID")
PLAID_SECRET    = os.getenv("PLAID_SECRET")
PLAID_ENV       = (os.getenv("PLAID_ENV", "production") or "production").strip().lower()
alias = {"prod":"production","live":"production","dev":"development","devel":"development","sb":"sandbox"}
PLAID_ENV = alias.get(PLAID_ENV, PLAID_ENV)
if PLAID_ENV not in {"production","development","sandbox"}:
    PLAID_ENV = "production"

# Access tokens (from env or file)
def _normalize_tokens(obj) -> dict:
    if isinstance(obj, dict):
        return {str(k): str(v).strip() for k,v in obj.items()}
    if isinstance(obj, list):
        out = {}
        for item in obj:
            if isinstance(item, dict):
                name = item.get("issuer") or item.get("bank") or item.get("name")
                token = item.get("access_token") or item.get("token")
                if name and token:
                    out[str(name)] = str(token).strip()
        return out
    if isinstance(obj, str):
        s = obj.lstrip("\ufeff").strip()
        try:
            return _normalize_tokens(json.loads(s))
        except Exception:
            parts = {}
            for seg in re.split(r"[|,\n;]+", s):
                if "=" in seg:
                    k, v = seg.split("=", 1)
                elif ":" in seg:
                    k, v = seg.split(":", 1)
                else:
                    continue
                k = k.strip().strip('"').strip("'")
                v = v.strip().strip('"').strip("'")
                if k and v:
                    parts[k] = v
            return parts
    return {}

TOKENS_PATH = Path(os.getenv("TOKENS_PATH", str(STATE_DIR / "access_tokens.json")))
blob = os.getenv("PLAID_ACCESS_TOKENS", "").strip()
if blob:
    ACCESS_TOKENS = _normalize_tokens(blob)
elif TOKENS_PATH.exists():
    ACCESS_TOKENS = _normalize_tokens(TOKENS_PATH.read_text(encoding="utf-8", errors="ignore"))
else:
    raise AssertionError("No Plaid access tokens found in env or .state/access_tokens.json")

print(f"Accounts dim will be written to {OUT_PATH}")
print(f"Found {len(ACCESS_TOKENS)} token(s).")


Accounts dim will be written to C:\Users\kosis\Downloads\Automation\spending-dashboard\config\accounts_dim.csv
Found 3 token(s).


In [2]:
# --- Cell 2: Plaid client init ---
USE_PLAID_V10 = False
client = None

try:
    # v10+
    from plaid.api import plaid_api
    from plaid.configuration import Configuration
    try:
        from plaid.configuration import Environment
        env_host = {
            "production":  Environment.Production,
            "development": Environment.Development,
            "sandbox":     Environment.Sandbox,
        }[PLAID_ENV]
        config = Configuration(host=env_host)
    except Exception:
        host_url = {
            "production":  "https://production.plaid.com",
            "development": "https://development.plaid.com",
            "sandbox":     "https://sandbox.plaid.com",
        }[PLAID_ENV]
        config = Configuration(host=host_url)

    from plaid.api_client import ApiClient
    config.api_key["clientId"] = PLAID_CLIENT_ID
    config.api_key["secret"]   = PLAID_SECRET
    api_client = ApiClient(config)
    client = plaid_api.PlaidApi(api_client)
    USE_PLAID_V10 = True
    print("Plaid SDK: v10+")
except Exception as e:
    try:
        from plaid import Client as LegacyClient
        client = LegacyClient(client_id=PLAID_CLIENT_ID, secret=PLAID_SECRET, environment=PLAID_ENV)
        USE_PLAID_V10 = False
        print("Plaid SDK: legacy")
    except Exception as e2:
        raise ImportError(f"Failed to init Plaid client: v10 error={e}, legacy error={e2}")


Plaid SDK: v10+


In [3]:
# --- Cell 3: Fetch accounts & build accounts_dim.csv ---
def best_card_name(bank_name, official_name, name, subtype, mask):
    # prefer official_name > name > subtype + mask
    if official_name and str(official_name).strip():
        return str(official_name).strip()
    if name and str(name).strip():
        return str(name).strip()
    tail = f"••{str(mask).strip()}" if (mask and str(mask).strip()) else ""
    sub = str(subtype).upper().strip() if subtype else "CARD"
    base = f"{bank_name} {sub}".strip()
    return f"{base} {tail}".strip()

rows = []
now = datetime.utcnow().isoformat()

if USE_PLAID_V10:
    from plaid.model.accounts_get_request import AccountsGetRequest
    for bank_name, token in ACCESS_TOKENS.items():
        req = AccountsGetRequest(access_token=token)
        resp = client.accounts_get(req).to_dict()
        for a in resp.get("accounts", []):
            rows.append({
                "account_id": a.get("account_id"),
                "bank_name": bank_name,
                "official_name": a.get("official_name"),
                "name": a.get("name"),
                "mask": a.get("mask"),
                "type": a.get("type"),
                "subtype": a.get("subtype"),
                "card_name": best_card_name(bank_name, a.get("official_name"), a.get("name"), a.get("subtype"), a.get("mask")),
                "last_updated": now,
            })
else:
    for bank_name, token in ACCESS_TOKENS.items():
        a = client.Accounts.get(access_token=token)
        for acct in a["accounts"]:
            rows.append({
                "account_id": acct.get("account_id"),
                "bank_name": bank_name,
                "official_name": acct.get("official_name"),
                "name": acct.get("name"),
                "mask": acct.get("mask"),
                "type": acct.get("type"),
                "subtype": acct.get("subtype"),
                "card_name": best_card_name(bank_name, acct.get("official_name"), acct.get("name"), acct.get("subtype"), acct.get("mask")),
                "last_updated": now,
            })

df = pd.DataFrame(rows, columns=[
    "account_id","bank_name","card_name","official_name","name","mask","type","subtype","last_updated"
])

# Deduplicate (keep most recent)
if not df.empty:
    df = (df.sort_values("last_updated")
            .drop_duplicates(subset=["account_id"], keep="last")
            .reset_index(drop=True))

print(f"Accounts fetched: {len(df)}")
df.head(5)


  now = datetime.utcnow().isoformat()


Accounts fetched: 5


Unnamed: 0,account_id,bank_name,card_name,official_name,name,mask,type,subtype,last_updated
0,MeB44vqbEwfQ5YJEbVR8UqrD3J9VKwFge99waB,Discover,Discover it Card,Discover it Card,Discover it Card,8759,credit,credit card,2025-09-10T08:16:18.067434
1,gNvLDRDj5jt8kzgKweR5UBRg5rVrKDU6dgO1x,Petal,Petal,Petal,Petal,9307,credit,credit card,2025-09-10T08:16:18.067434
2,gN6YZzQRNzSopryjzYN1IBPAD8pzr1t8D5wwJ,Silver State Schools Credit Union,CHECKING,CHECKING,CHECKING,6212,depository,checking,2025-09-10T08:16:18.067434
3,9BMy3wr7BwSQO3KnBVRAC5b7azA8X4SgKmjjE,Silver State Schools Credit Union,CREDIT CARD,CREDIT CARD,CREDIT CARD,6212,credit,credit card,2025-09-10T08:16:18.067434
4,7PzBqm05PmTZ6kDPz0vmS7O4p1eBwRt0qxMML,Silver State Schools Credit Union,SAVINGS,SAVINGS,SAVINGS,6212,depository,savings,2025-09-10T08:16:18.067434


In [4]:
# --- Cell 4: Save accounts_dim.csv ---
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(OUT_PATH, index=False, encoding="utf-8")
print(f"✅ Saved → {OUT_PATH}  rows={len(df)}")


✅ Saved → C:\Users\kosis\Downloads\Automation\spending-dashboard\config\accounts_dim.csv  rows=5
