# API Settings

In [1]:
import os
from dotenv import load_dotenv

api_key = os.environ.get("SONAR_API_KEY")
load_dotenv()

True

In [2]:
url = "https://api.perplexity.ai/chat/completions"
headers = {
    "accept": "application/json",
    "authorization": f"Bearer {api_key}",
    "content-type": "application/json"
}

# Payload

In [3]:
# Normal
payload = {
    "model": "sonar",
    "messages": [
        {"role": "system", "content":"""You are a helpful AI assistant.
Your task is to review all retrieved search results and synthesize them into a single, coherent answer.
Rules:
1. Base your answer only on the provided search results.
2. Merge overlapping information and resolve any contradictions using the most credible sources.
3. Present the answer in a clear, well-structured format with concise language.
4. If relevant, include key facts, dates, figures, and context to support the explanation.
5. If search results do not fully answer the question, explicitly state the missing information.
"""},
        {"role": "user", "content": "What is dark vessel detection?"}
    ],
    "stream": False,
}

# Academic
# payload = {
#     "model": "sonar",
#     "messages": [{"role": "user", "content": "What is dark vessel detection?"}],
#     "stream": False,
#     "search_mode": "academic",
#     "search_after_date_filter": "8/1/2023",
#     "web_search_options": {"search_context_size": "high"},
#     # "search_domain_filter": [
#     #     "ieee.org",
#     # ]
# }

# Search

In [4]:
import requests
import json

response = requests.post(url, headers=headers, json=payload).json()
print(f"Response:\n{response['choices'][0]['message']['content']}")

citations = response.get("citations", [])
print(f"\nTotal {len(citations)} Citations:")
for citation in citations:
    print(citation)

search_results = response.get("search_results", [])
print(f"\nTotal {len(search_results)} Search Results:")
for search_result in search_results:
    print(json.dumps(search_result, indent=2, ensure_ascii=False))

Response:
**Dark vessel detection** is the process of identifying ships that intentionally turn off or disable their Automatic Identification System (AIS), making them invisible to conventional maritime tracking systems. These vessels, known as **dark vessels** or **ghost ships**, evade detection to conceal illegal or covert activities such as illegal fishing, smuggling, or unauthorized maritime presence.

Detection relies heavily on satellite technologies combined with advanced data processing techniques:

- **Satellite Sensors:** Optical imagers, Synthetic Aperture Radar (SAR), and radio frequency (RF) geolocation sensors scan vast ocean areas regularly from low-Earth orbit satellites to detect vessels independent of AIS signals[1][3]. SAR is particularly important because it penetrates cloud cover and works day or night[3].

- **Data Processing:** Satellite images undergo processing using feature detection algorithms and machine learning models to identify potential vessels by spott

In [10]:
# save search result as json file
import json

with open("result/response.md", "w", encoding="utf-8") as f:
    f.write(response['choices'][0]['message']['content'])

with open("result/search_results.json", "w", encoding="utf-8") as f:
    json.dump(search_results, f, ensure_ascii=False, indent=2)

# Crawling

In [6]:
import shutil

shutil.rmtree("result/md_docs")
os.makedirs("result/md_docs", exist_ok=True)

In [7]:
import asyncio
import nest_asyncio
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode

nest_asyncio.apply()

async def quick_parallel_example(urls):
    run_conf = CrawlerRunConfig(
        cache_mode=CacheMode.BYPASS,
        stream=True  # Enable streaming mode
    )

    async with AsyncWebCrawler() as crawler:
        # # Stream results as they complete
        # async for result in await crawler.arun_many(urls, config=run_conf):
        #     if result.success:
        #         print(f"[OK] {result.url}, length: {len(result.markdown.raw_markdown)}")
        #     else:
        #         print(f"[ERROR] {result.url} => {result.error_message}")

        # Or get all results at once (default behavior)
        run_conf = run_conf.clone(stream=False)
        results = await crawler.arun_many(urls, config=run_conf)

        index = 1
        for res in results:
            if res.success:
                print(f"[OK] {res.url}, length: {len(res.markdown.raw_markdown)}")
                with open(f"result/md_docs/citation_{index}.md", "w") as f:
                    f.write(res.markdown.raw_markdown)
                index += 1
            else:
                print(f"[ERROR] {res.url} => {res.error_message}")

asyncio.run(quick_parallel_example(citations))

[OK] https://www.esa.int/Enabling_Support/Space_Engineering_Technology/Shaping_the_Future/Machine-learning_system_can_detect_dark_vessels_faster, length: 9627
[OK] https://globalfishingwatch.org/research-project-dark-vessels/, length: 22751
[OK] https://insights.blurgs.ai/dark-vessel-detection-ai-maritime-security/, length: 10908
[OK] https://mda.space/dark-vessel, length: 3654
[OK] https://www.starboardintelligence.com/articles/satellite-dark-vessel-detection-for-maritime-domain-awareness, length: 33367
