## Connection to API and data extraction

In [41]:
import os
import calendar, requests
from datetime import datetime
from pathlib import Path
from dotenv import load_dotenv
from pathlib import Path
import json
import pandas as pd

In [36]:
#Load config
env = Path.cwd().parent/"config"/"config.env"
load_dotenv(env, override=True)
API_KEY  = os.getenv("POS_API_KEY")
BASE_URL = os.getenv("BASE_URL")
headers  = {"Authorization": f"Bearer {API_KEY}"}

In [None]:
#Bounds
#Formating date for requests 

now = datetime.utcnow() #UTC

# first instant of month
first = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
# last instant of month
last_day = calendar.monthrange(now.year, now.month)[1]
last     = now.replace(day=last_day, hour=23, minute=59, second=59, microsecond=0)

#Include '.sss' milliseconds
created_min = first.isoformat(timespec="milliseconds") + "Z"
created_max = last.isoformat(timespec="milliseconds")  + "Z"

print(created_min, created_max)

2025-07-01T00:00:00.000Z 2025-07-31T23:59:59.000Z


In [39]:
#Request for the time bound receipts
r = requests.get(f"{BASE_URL}/receipts?",
                 headers=headers,
                 params={"created_at_min": created_min, "created_at_max": created_max})
print("Status:", r.status_code)
print("Receipts status:", r.status_code, r.headers.get("Content-Type"))
receipts = r.json().get("receipts", [])
print("→ Got receipts:", len(receipts))

#Request for items
i = requests.get(f"{BASE_URL}/items", headers=headers)
print("Items status:", i.status_code, i.headers.get("Content-Type"))
items = i.json().get("items", [])
print("→ Got items:", len(items))


Status: 200
Receipts status: 200 application/json
→ Got receipts: 50
Items status: 200 application/json
→ Got items: 23


#### Save the data

In [None]:
# 1️⃣ Add data folders path
repo_root = Path.cwd().parent
raw_dir   = repo_root / "data" / "raw"
cur_dir   = repo_root / "data" / "curated"

# 2️⃣ Create them if they don’t exist
raw_dir.mkdir(parents=True, exist_ok=True)
cur_dir.mkdir(parents=True, exist_ok=True)

# 3️⃣ Save raw receipts JSON
receipts_path = raw_dir / f"receipts_{created_min[:7]}.json"
with receipts_path.open("w", encoding="utf-8") as f:
    json.dump(receipts, f, ensure_ascii=False, indent=2)
print(f"→ Wrote {len(receipts)} raw receipts to {receipts_path}")

# 4️⃣ Save raw items JSON (optional)
items_path = raw_dir / f"items_{created_min[:7]}.json"
with items_path.open("w", encoding="utf-8") as f:
    json.dump(items, f, ensure_ascii=False, indent=2)
print(f"→ Wrote {len(items)} items to {items_path}")


→ Wrote 50 raw receipts to c:\Users\abner\Desktop\POS_API_Pipeline\data\raw\receipts_2025-07.json
→ Wrote 23 items to c:\Users\abner\Desktop\POS_API_Pipeline\data\raw\items.json
