
# 🧾 RapidAPI → MongoDB: Transactions Only

This notebook creates **transaction documents only** in MongoDB.  
It pulls product ASINs (and prices when available) from a RapidAPI Amazon-style API, then generates **logical transactions** per ASIN.

**Output (DB = `Assignment3`)**
- Collection: `transactions`

> Fill in your `RAPID_API_KEY`, `RAPID_API_HOST`, and `MONGODB_URI` in the Config cell.


In [None]:
%pip install -q pymongo faker tenacity requests

Note: you may need to restart the kernel to use updated packages.


In [None]:
import os, time, random, uuid, hashlib
from datetime import datetime, timedelta, timezone
from typing import List, Optional

import requests
from pymongo import MongoClient
from faker import Faker
from tenacity import retry, stop_after_attempt, wait_exponential


RAPID_API_KEY  = os.getenv("RAPID_API_KEY",   "ab802389b4msh0530b55dfacdf91p18fc57jsnc745b66c0914")
RAPID_API_HOST = os.getenv("RAPID_API_HOST",  "real-time-amazon-data.p.rapidapi.com")
COUNTRY        = os.getenv("COUNTRY",         "US")

MONGODB_URI = os.getenv("MONGODB_URI", "mongodb+srv://admin:adminpassword@assignment3.dhfn7vh.mongodb.net/?retryWrites=true&w=majority&appName=Assignment3")
MONGO_DB    = os.getenv("MONGO_DB",    "Assignment3")
COLL_TXN    = os.getenv("COLL_TXN",    "transactions")


KEYWORDS          = ["laptop", "smart tv", "smartphone", "headphone", "dslr"]  
MAX_PER_KEYWORD   = 5      
TX_PER_PRODUCT    = 5      
REQUEST_SLEEP_SEC = 1.5    

print("RAPID_API_HOST:", RAPID_API_HOST)
print("COUNTRY:", COUNTRY)
print("MongoDB:", MONGODB_URI, "DB:", MONGO_DB, "Collection:", COLL_TXN)


client = MongoClient(MONGODB_URI)
col_txn = client[MONGO_DB][COLL_TXN]
col_txn.create_index([("asin", 1), ("txn_time", 1)])


RAPID_API_HOST: real-time-amazon-data.p.rapidapi.com
COUNTRY: US
MongoDB: mongodb+srv://admin:adminpassword@assignment3.dhfn7vh.mongodb.net/?retryWrites=true&w=majority&appName=Assignment3 DB: Assignment3 Collection: transactions


'asin_1_txn_time_1'

In [None]:
class RapidAPIError(Exception): pass

def _ensure_ok(resp: requests.Response):
    if resp.status_code != 200:
        raise RapidAPIError(f"HTTP {resp.status_code}: {resp.text[:300]}")
    try:
        return resp.json()
    except Exception:
        raise RapidAPIError("Invalid JSON in response")

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=8), reraise=True)
def rapid_get(path: str, params: dict) -> dict:
    url = f"https://{RAPID_API_HOST}{path}"
    headers = {
        "x-rapidapi-key": RAPID_API_KEY,
        "x-rapidapi-host": RAPID_API_HOST
    }
    resp = requests.get(url, headers=headers, params=params, timeout=30)
    return _ensure_ok(resp)


SEARCH_PATH = "/search"           
OFFERS_PATH = "/product-offers"    

def product_search(keyword: str, country: str, page:int=1) -> dict:
    return rapid_get(SEARCH_PATH, {"query": keyword, "page": page, "country": country})

def product_offers(asin: str, country: str, page:int=1) -> dict:
    return rapid_get(OFFERS_PATH, {"asin": asin, "page": page, "country": country})


In [None]:

def choose_lowest_price(offers_payload: dict):
    # Handle multiple possible shapes from providers
    candidates = (offers_payload.get("data") or {}).get("offers") or offers_payload.get("results") or []
    if not isinstance(candidates, list):
        candidates = []
    best_price = None
    currency = "USD"
    for o in candidates:
        raw = o.get("price") if "price" in o else (o.get("price", {}) if isinstance(o.get("price"), dict) else None)
        if isinstance(raw, dict):
            val = raw.get("value")
            cur = raw.get("currency") or currency
        else:
            val = raw
            cur = o.get("currency") or currency
        try:
            price_f = float(str(val).replace("$","").replace(",","").strip())
        except:
            continue
        if (best_price is None) or (price_f < best_price):
            best_price = price_f
            currency = cur or currency
    return best_price, currency


In [None]:

fake = Faker()
Faker.seed(101)

def make_transaction(asin: str, price: Optional[float], currency: str="USD") -> dict:
    if price is None:
        base = (int(hashlib.sha1(asin.encode()).hexdigest(), 16) % 400) + 50
        price = round(random.uniform(base*0.8, base*1.2), 2)

    qty = random.choice([1,1,1,2,3]) 
    total = round(price * qty, 2)


    delta = timedelta(days=random.randint(0, 45), minutes=random.randint(0, 24*60))
    txn_time = datetime.now(timezone.utc) - delta

    return {
        "txn_id": str(uuid.uuid4()),
        "asin": asin,
        "quantity": qty,
        "unit_price": float(price),
        "currency": currency or "USD",
        "total_amount": float(total),
        "txn_time": txn_time,
        "customer": {
            "id": str(uuid.uuid4()),
            "name": fake.name(),
            "email": fake.email(),
            "city": fake.city(),
            "state": fake.state_abbr(),
            "zip": fake.postcode()
        },
        "payment_method": random.choice(["VISA", "MASTERCARD", "AMEX", "PAYPAL", "APPLE_PAY"]),
        "source": "rapidapi_txn_only"
    }


In [None]:

def stream_transactions_only(keywords: List[str], country: str = COUNTRY, max_per_keyword: int = MAX_PER_KEYWORD, tx_per_product: int = TX_PER_PRODUCT):
    total_inserted = 0
    for kw in keywords:
        try:
            payload = product_search(kw, country=country, page=1)
        except Exception as e:
            print(f"[WARN] search fail '{kw}': {e}")
            time.sleep(REQUEST_SLEEP_SEC)
            continue

        items = (payload.get("data") or {}).get("products") or payload.get("results") or []
        if not isinstance(items, list):
            items = []

        print(f"[INFO] keyword='{kw}' -> {len(items)} items (taking up to {max_per_keyword})")
        for it in items[:max_per_keyword]:
            asin = it.get("asin")
            if not asin:
                continue

            price, currency = None, "USD"
            try:
                offers = product_offers(asin, country=country, page=1)
                price, currency = choose_lowest_price(offers)
            except Exception as e:
                print(f"[INFO] offers fail ASIN={asin}: {e}")

            txns = [make_transaction(asin, price, currency) for _ in range(tx_per_product)]
            if txns:
                try:
                    col_txn.insert_many(txns, ordered=False)
                    total_inserted += len(txns)
                except Exception as e:
                    ok = 0
                    for t in txns:
                        try:
                            col_txn.insert_one(t); ok += 1
                        except: pass
                    total_inserted += ok

            print(f"  - ASIN={asin} -> +{len(txns)} txns • price={price} {currency}")
            time.sleep(REQUEST_SLEEP_SEC)

    print(f"[DONE] total transactions inserted: {total_inserted}")


In [None]:

stream_transactions_only(KEYWORDS, country=COUNTRY, max_per_keyword=MAX_PER_KEYWORD, tx_per_product=TX_PER_PRODUCT)


[INFO] keyword='laptop' -> 16 items (taking up to 5)
  - ASIN=B0FG4MGVJP -> +5 txns • price=None USD
  - ASIN=B0DZD9S5GC -> +5 txns • price=None USD
  - ASIN=B0F83ZXKXQ -> +5 txns • price=None USD
  - ASIN=B0FLKNZJ1H -> +5 txns • price=None USD
  - ASIN=B0947BJ67M -> +5 txns • price=None USD
[INFO] keyword='smart tv' -> 16 items (taking up to 5)
  - ASIN=B0CXG3HMX1 -> +5 txns • price=None USD
  - ASIN=B0F19KLHG3 -> +5 txns • price=None USD
  - ASIN=B0F7RZ1MS9 -> +5 txns • price=None USD


In [None]:
print("One sample txn:", col_txn.find_one({}, {"_id":0, "asin":1, "unit_price":1, "quantity":1, "txn_time":1, "customer":1}))
print("Total count:", col_txn.estimated_document_count())