In [None]:
import os
import requests
import pandas as pd
import dotenv

dotenv.load_dotenv()

# ─── CONFIG ─────────────────────────────────────────────────────────────────────
API_KEY = os.getenv("BRAVE_SEARCH_API_KEY")
if not API_KEY:
    raise ValueError("Please set BRAVE_SEARCH_API_KEY as an environment variable.")

QUERY = "Predicting Customer Lifetime Value"
OUTPUT_CSV = "clv_search_results.csv"

BASE_URL = "https://api.search.brave.com/res/v1/web/search"
HEADERS = {
    "Accept": "application/json",
    "Accept-Encoding": "gzip",
    "X-Subscription-Token": API_KEY
}

# ─── 1) SEND ONE REQUEST ─────────────────────────────────────────────────────────
params = {
    "q": QUERY,
    # you can also specify `count` (max 20 for web) and `offset` here if you want pagination:
    # "count": 20,
    # "offset": 0,
    # "result_filter": "web,videos"
}

response = requests.get(BASE_URL, headers=HEADERS, params=params)
response.raise_for_status()
result = response.json()


# ─── 2) EXTRACT "videos" RESULTS ────────────────────────────────────────────────
video_rows = []
if "videos" in result and isinstance(result["videos"].get("results", []), list):
    for entry in result["videos"]["results"]:
        # Start with a base dict containing all the video fields… 
        row = {
            "type": "video", 
            # you can pick whichever fields you care about; these are examples:
            "title": entry.get("title"),
            "url": entry.get("url"),
            "description": entry.get("description"),
            "thumbnail": entry.get("thumbnail"),
            # if you want more fields in the CSV, just add them here:
            # e.g. entry.get("video", {}).get("duration"), etc.
        }
        video_rows.append(row)


# ─── 3) EXTRACT "web" RESULTS ───────────────────────────────────────────────────
web_rows = []
if "web" in result and isinstance(result["web"].get("results", []), list):
    for entry in result["web"]["results"]:
        row = {
            "type": "web",
            "title": entry.get("title"),
            "url": entry.get("url"),
            "description": entry.get("description"),
            # you can pull in any of the other keys, for example:
            "language": entry.get("language"),
            "page_age": entry.get("page_age"),
        }
        web_rows.append(row)


# ─── 4) COMBINE + SAVE TO CSV ───────────────────────────────────────────────────
all_rows = video_rows + web_rows
df = pd.DataFrame(all_rows)

# Re-order columns so "type" is first:
cols = ["type"] + [c for c in df.columns if c != "type"]
df = df[cols]

df.to_csv(OUTPUT_CSV, index=False)
print(f"Saved {len(df)} rows to {OUTPUT_CSV!r}")


In [2]:
import os
import time
import requests
import pandas as pd
import dotenv
dotenv.load_dotenv()

# ─── CONFIG ─────────────────────────────────────────────────────────────────────
API_KEY = os.getenv("BRAVE_SEARCH_API_KEY")
if not API_KEY:
    raise ValueError("Please set BRAVE_SEARCH_API_KEY as an environment variable.")

QUERY = "Predicting Customer Lifetime Value"
RESULTS_PER_PAGE = 50     # max allowed by this endpoint
TOTAL_RESULTS = 1000      # total desired results
OUTPUT_CSV = "clv_search_results.csv"

BASE_URL = "https://api.search.brave.com/res/v1/web/search"
HEADERS = {
    "Accept": "application/json",
    "Accept-Encoding": "gzip",
    "X-Subscription-Token": API_KEY
}

In [None]:
params = {
        "q": QUERY
    }

response = requests.get(BASE_URL, headers=HEADERS, params=params)
response


<Response [200]>

In [5]:
result = response.json()

In [23]:
list(result.keys())
# ['query', 'mixed', 'type', 'videos', 'web']

['query', 'mixed', 'type', 'videos', 'web']

In [26]:
list(result['videos']['results'][0].keys())
# ['type', 'url', 'title', 'description', 'video', 'meta_url', 'thumbnail']

['type', 'url', 'title', 'description', 'video', 'meta_url', 'thumbnail']

In [27]:
list(result["web"]["results"][0].keys())

# ['title',
#  'url',
#  'is_source_local',
#  'is_source_both',
#  'description',
#  'page_age',
#  'profile',
#  'language',
#  'family_friendly',
#  'type',
#  'subtype',
#  'is_live',
#  'meta_url',
#  'thumbnail',
#  'age',
#  'extra_snippets']


['title',
 'url',
 'is_source_local',
 'is_source_both',
 'description',
 'page_age',
 'profile',
 'language',
 'family_friendly',
 'type',
 'subtype',
 'is_live',
 'meta_url',
 'thumbnail',
 'age',
 'extra_snippets']

In [28]:


# ─── 2) EXTRACT "videos" RESULTS ────────────────────────────────────────────────
video_rows = []
if "videos" in result and isinstance(result["videos"].get("results", []), list):
    for entry in result["videos"]["results"]:
        # Start with a base dict containing all the video fields… 
        row = {
            "type": "video", 
            # you can pick whichever fields you care about; these are examples:
            "title": entry.get("title"),
            "url": entry.get("url"),
            "description": entry.get("description"),
            "thumbnail": entry.get("thumbnail"),
            # if you want more fields in the CSV, just add them here:
            # e.g. entry.get("video", {}).get("duration"), etc.
        }
        video_rows.append(row)


# ─── 3) EXTRACT "web" RESULTS ───────────────────────────────────────────────────
web_rows = []
if "web" in result and isinstance(result["web"].get("results", []), list):
    for entry in result["web"]["results"]:
        row = {
            "type": "web",
            "title": entry.get("title"),
            "url": entry.get("url"),
            "description": entry.get("description"),
            # you can pull in any of the other keys, for example:
            "language": entry.get("language"),
            "page_age": entry.get("page_age"),
        }
        web_rows.append(row)


# ─── 4) COMBINE + SAVE TO CSV ───────────────────────────────────────────────────
all_rows = video_rows + web_rows
df = pd.DataFrame(all_rows)

# Re-order columns so "type" is first:
cols = ["type"] + [c for c in df.columns if c != "type"]
df = df[cols]

df.to_csv(OUTPUT_CSV, index=False)
print(f"Saved {len(df)} rows to {OUTPUT_CSV!r}")


Saved 25 rows to 'clv_search_results.csv'


In [None]:

# ─── FETCH RESULTS ──────────────────────────────────────────────────────────────
all_hits = []
offset = 0

while offset < TOTAL_RESULTS:
    params = {
        "q": QUERY,
        "count": RESULTS_PER_PAGE,
        "offset": offset
    }

    response = requests.get(BASE_URL, headers=HEADERS, params=params)
    if response.status_code != 200:
        raise RuntimeError(f"Brave API error {response.status_code}: {response.text}")

    data = response.json()
    results = data.get("web", {}).get("results", [])

    if not results:
        print(f"No more results at offset {offset}; stopping.")
        break

    for res in results:
        all_hits.append({
            "title": res.get("title"),
            "url": res.get("url"),
            "description": res.get("description"),
            "source": res.get("source"),
            "published": res.get("published"),
            "type": res.get("type")
        })

    offset += RESULTS_PER_PAGE
    time.sleep(1.0)  # Respect Brave API rate limits (1 req/sec on free plan)

print(f"Fetched {len(all_hits)} results total.")

# ─── SAVE TO CSV ────────────────────────────────────────────────────────────────
df = pd.DataFrame(all_hits)
df.to_csv(OUTPUT_CSV, index=False)
print(f"Saved to {OUTPUT_CSV}")
