# Scientific validation of policy proposals
---
Experimenting with web search APIs for scientific validation of policy proposals.

## Setup

### Import libraries

In [None]:
import os
from typing import Any, Dict
import json
import backoff
from dotenv import load_dotenv
from IPython.display import Markdown, display
from tqdm.auto import tqdm
from pydantic import BaseModel, Field, ValidationError
from openai import OpenAI
import requests

In [None]:
from polids.config import settings

### Set parameters

In [None]:
system_prompt = """Search the web for credible scientific context related to a given policy proposal, and determine its scientific validation.

Identify and evaluate sources such as highly cited academic papers, randomized controlled trials (RCTs), and reputable news outlets with scientific grounding. Based on this research, provide a validation outcome and a justification.

# Steps

1. **Identify Keywords**: Extract main concepts and terms from the policy proposal to guide your search.
2. **Conduct Web Search**: Use the identified keywords to search for scientific literature, credible reports, and analyses related to the policy.
3. **Evaluate Sources**: Prioritize sources based on credibility, relevance, and citation count. Look for consensus among multiple credible sources to enhance reliability.
4. **Synthesize Information**: Summarize the findings clearly indicating whether the scientific evidence supports or refutes the proposal.
5. **Conclude Validation**: Determine if the policy is scientifically validated based on gathered evidence.
6. **Provide Reasoning**: Articulate the reasoning based on the findings, citing key evidence.

# Notes

- Regardless of the original language of the proposal, the search should be conducted in English and the results should be presented in English.
- Ensure the evaluation is grounded in current and credible scientific data.
- Consider the strength and consensus of evidence rather than anecdotal or single-study claims.
- If evidence is mixed, provide a balanced view in the reasoning string."""

In [None]:
max_retries = 5

In [None]:
load_dotenv()
perplexity_api_key = os.getenv("PERPLEXITY_API_KEY")

In [None]:
allowed_sources = [
    # --- General Knowledge ---
    # Sources offering broad, encyclopedic information.
    "wikipedia.org",  # Crowd-sourced general knowledge encyclopedia
    # --- Data Aggregators ---
    # Platforms specializing in collecting, analyzing, and visualizing data on various topics.
    "ourworldindata.org",  # Accessible global data visualization & analysis
    # --- International Organizations ---
    # Official websites of major international bodies providing data, reports, and policy guidelines.
    "oecd.org",  # Organisation for Economic Co-operation and Development data & reports
    "un.org",  # United Nations reports & policy guidelines (global issues)
    "worldbank.org",  # World Bank global economic & development data
    # --- Research & Policy Analysis Institutes ---
    # Organizations focused on specific research areas, often influencing policy.
    "nber.org",  # National Bureau of Economic Research (influential economics)
    # --- Research Aggregators & Databases ---
    # Platforms providing access to collections of academic research papers.
    "core.ac.uk",  # Aggregator for open access research papers (multidisciplinary)
    "ncbi.nlm.nih.gov",  # National Center for Biotechnology Information (biomedical literature)
    "arxiv.org",  # Open access preprints (physics, math, CS, quantitative biology, etc.)
    "sci-hub.box",  # Tool for accessing paywalled scientific papers (legality varies)
]

## Load policies to validate
We're going to start from manually defined policies, so as to avoid dependencies on previous steps of the pipeline.

In [None]:
policies_to_validate = {
    "carbon_tax": "Implementing a carbon tax to reduce greenhouse gas emissions.",
    "vaccines": "Mandatory vaccination for all school-aged children to prevent outbreaks of infectious diseases.",
    "ubi": "Implementing universal basic income to address income inequality and support job displacement due to automation.",
    "immigration_jobs": "Reducing immigration quotas to improve job opportunities for native citizens.",
    "immigration_crime": "Blocking immigration from countries with different cultural backgrounds to reduce crime rates.",
}

## Define the output schema

In [None]:
class ScientificValidation(BaseModel):
    is_policy_supported_by_sources: bool = Field(
        description="Indicates whether the policy proposal has scientific backing or not."
    )
    is_validation_consensual_and_reliable: bool = Field(
        description="Indicates whether the validation is based on a consensus of multiple reliable sources."
    )
    reasoning: str = Field(
        description="A detailed explanation of the validation outcome, including key evidence and sources."
    )

## Test different search APIs

### OpenAI

In [None]:
client = OpenAI(api_key=settings.openai_api_key)

#### GPT 4o mini

##### Low search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-mini-search-preview",
        web_search_options={
            "search_context_size": "low",
        },
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": example_policy,
            },
        ],
        response_format=ScientificValidation,  # Specify the schema for the structured output
    )
    policy_validation_results[example_name] = completion.choices[0].message.parsed
    assert isinstance(policy_validation_results[example_name], ScientificValidation), (
        "Output does not match the expected schema."
    )
    citations[example_name] = completion.choices[0].message.annotations
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

Seems like GPT 4o mini with low search often doesn't use any search results at all. When it does, I'm only seeing two citations. This is not enough to validate a policy.

##### Medium search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-mini-search-preview",
        web_search_options={
            "search_context_size": "medium",
        },
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": example_policy,
            },
        ],
        response_format=ScientificValidation,  # Specify the schema for the structured output
    )
    policy_validation_results[example_name] = completion.choices[0].message.parsed
    assert isinstance(policy_validation_results[example_name], ScientificValidation), (
        "Output does not match the expected schema."
    )
    citations[example_name] = completion.choices[0].message.annotations
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

Medium gets more sources for some of the samples, but still has some of them without any citations. This is not enough to validate a policy.

##### High search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-mini-search-preview",
        web_search_options={
            "search_context_size": "high",
        },
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": example_policy,
            },
        ],
        response_format=ScientificValidation,  # Specify the schema for the structured output
    )
    policy_validation_results[example_name] = completion.choices[0].message.parsed
    assert isinstance(policy_validation_results[example_name], ScientificValidation), (
        "Output does not match the expected schema."
    )
    citations[example_name] = completion.choices[0].message.annotations
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

Still some samples without citations, even on high search context 👎🏻

#### GPT 4o (larger)

##### Low search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-search-preview",
        web_search_options={
            "search_context_size": "low",
        },
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": example_policy,
            },
        ],
        response_format=ScientificValidation,  # Specify the schema for the structured output
    )
    policy_validation_results[example_name] = completion.choices[0].message.parsed
    assert isinstance(policy_validation_results[example_name], ScientificValidation), (
        "Output does not match the expected schema."
    )
    citations[example_name] = completion.choices[0].message.annotations
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

##### Medium search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-search-preview",
        web_search_options={
            "search_context_size": "medium",
        },
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": example_policy,
            },
        ],
        response_format=ScientificValidation,  # Specify the schema for the structured output
    )
    policy_validation_results[example_name] = completion.choices[0].message.parsed
    assert isinstance(policy_validation_results[example_name], ScientificValidation), (
        "Output does not match the expected schema."
    )
    citations[example_name] = completion.choices[0].message.annotations
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

##### High search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-search-preview",
        web_search_options={
            "search_context_size": "high",
        },
        messages=[
            {
                "role": "system",
                "content": system_prompt,
            },
            {
                "role": "user",
                "content": example_policy,
            },
        ],
        response_format=ScientificValidation,  # Specify the schema for the structured output
    )
    policy_validation_results[example_name] = completion.choices[0].message.parsed
    assert isinstance(policy_validation_results[example_name], ScientificValidation), (
        "Output does not match the expected schema."
    )
    citations[example_name] = completion.choices[0].message.annotations
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

Still some samples without citations, even on high search context and with the larger GPT 4o 👎🏻

### Perplexity

In [None]:
def extract_valid_json(response: Dict[str, Any]) -> Dict[str, Any]:
    """
    Extracts and returns only the valid JSON part from a response object.

    This function assumes that the response has a structure where the valid JSON
    is included in the 'content' field of the first choice's message, after the
    closing "</think>" marker. Any markdown code fences (e.g. ```json) are stripped.

    Parameters:
        response (dict): The full API response object.

    Returns:
        dict: The parsed JSON object extracted from the content.

    Raises:
        ValueError: If no valid JSON can be parsed from the content.
    """
    # Navigate to the 'content' field; adjust if your structure differs.
    content = response.get("choices", [{}])[0].get("message", {}).get("content", "")

    # Find the index of the closing </think> tag.
    marker = "</think>"
    idx = content.rfind(marker)

    if idx == -1:
        # If marker not found, try parsing the entire content.
        try:
            return json.loads(content)
        except json.JSONDecodeError as e:
            raise ValueError(
                "No </think> marker found and content is not valid JSON"
            ) from e

    # Extract the substring after the marker.
    json_str = content[idx + len(marker) :].strip()

    # Remove markdown code fence markers if present.
    if json_str.startswith("```json"):
        json_str = json_str[len("```json") :].strip()
    if json_str.startswith("```"):
        json_str = json_str[3:].strip()
    if json_str.endswith("```"):
        json_str = json_str[:-3].strip()

    try:
        parsed_json = json.loads(json_str)
        return parsed_json
    except json.JSONDecodeError as e:
        raise ValueError("Failed to parse valid JSON from response content") from e

In [None]:
@backoff.on_exception(
    backoff.expo,
    ValidationError,
    max_tries=max_retries,
    max_time=60,
)
def search_on_perplexity(
    policy: str,
    model_name: str,
    search_context_size: str = None,
    system_prompt: str = system_prompt,
    perplexity_api_key: str = perplexity_api_key,
    allowed_sources: list = None,
) -> tuple[ScientificValidation, list]:
    """
    Search for scientific validation of a policy proposal using Perplexity AI.

    Args:
        policy (str): The policy proposal to validate.
        model_name (str): The model name to use for the search.
        search_context_size (str, optional): The size of the search context (low, medium, high).
        system_prompt (str): The system prompt for the model.
        perplexity_api_key (str): The API key for Perplexity AI.
        allowed_sources (list, optional): A list of allowed sources for the search.

    Returns:
        tuple[ScientificValidation, list]: A tuple containing the validation result and citations.
    """
    request_payload = {
        "model": model_name,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": policy},
        ],
        "response_format": {
            "type": "json_schema",
            "json_schema": {"schema": ScientificValidation.model_json_schema()},
        },
    }

    if allowed_sources:
        # Only search on the allowed sources
        request_payload["search_domain_filter"] = allowed_sources

    if search_context_size:
        # Define how many sources to use for the search
        assert search_context_size in ["low", "medium", "high"], (
            f"Invalid search context size: {search_context_size}. "
            "Must be one of: low, medium, high."
        )
        request_payload["web_search_options"] = {
            "search_context_size": search_context_size
        }

    response = requests.post(
        "https://api.perplexity.ai/chat/completions",
        headers={"Authorization": f"Bearer {perplexity_api_key}"},
        json=request_payload,
    ).json()

    citations = response.get("citations", [])
    response_content = response["choices"][0]["message"]["content"]

    if ("reasoning" in model_name) or ("<think>" in response_content):
        # Extract the valid JSON part from the response content
        json_content = extract_valid_json(response)
        # Parse the JSON content into the ScientificValidation model
        parsed_content = ScientificValidation.model_validate(json_content)
    else:
        # Parse the string content into the ScientificValidation model
        parsed_content = ScientificValidation.model_validate_json(response_content)

    return parsed_content, citations

#### Sonar

##### Low search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    policy_validation_results[example_name], citations[example_name] = (
        search_on_perplexity(
            policy=example_policy,
            model_name="sonar",
            search_context_size="low",
            system_prompt=system_prompt,
            perplexity_api_key=perplexity_api_key,
            allowed_sources=allowed_sources,
        )
    )
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

Wow this already worked very well, even in the cheapest setting!

##### Medium search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    policy_validation_results[example_name], citations[example_name] = (
        search_on_perplexity(
            policy=example_policy,
            model_name="sonar",
            search_context_size="medium",
            system_prompt=system_prompt,
            perplexity_api_key=perplexity_api_key,
            allowed_sources=allowed_sources,
        )
    )
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

##### High search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    policy_validation_results[example_name], citations[example_name] = (
        search_on_perplexity(
            policy=example_policy,
            model_name="sonar",
            search_context_size="high",
            system_prompt=system_prompt,
            perplexity_api_key=perplexity_api_key,
            allowed_sources=allowed_sources,
        )
    )
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

#### Sonar Pro

##### Low search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    policy_validation_results[example_name], citations[example_name] = (
        search_on_perplexity(
            policy=example_policy,
            model_name="sonar-pro",
            search_context_size="low",
            system_prompt=system_prompt,
            perplexity_api_key=perplexity_api_key,
            allowed_sources=allowed_sources,
        )
    )
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

Wow this already worked very well, even in the cheapest setting!

##### Medium search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    policy_validation_results[example_name], citations[example_name] = (
        search_on_perplexity(
            policy=example_policy,
            model_name="sonar-pro",
            search_context_size="medium",
            system_prompt=system_prompt,
            perplexity_api_key=perplexity_api_key,
            allowed_sources=allowed_sources,
        )
    )
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

##### High search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    policy_validation_results[example_name], citations[example_name] = (
        search_on_perplexity(
            policy=example_policy,
            model_name="sonar-pro",
            search_context_size="high",
            system_prompt=system_prompt,
            perplexity_api_key=perplexity_api_key,
            allowed_sources=allowed_sources,
        )
    )
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

#### Sonar Reasoning

##### Low search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    policy_validation_results[example_name], citations[example_name] = (
        search_on_perplexity(
            policy=example_policy,
            model_name="sonar-reasoning",
            search_context_size="low",
            system_prompt=system_prompt,
            perplexity_api_key=perplexity_api_key,
            allowed_sources=allowed_sources,
        )
    )
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

##### Medium search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    policy_validation_results[example_name], citations[example_name] = (
        search_on_perplexity(
            policy=example_policy,
            model_name="sonar-reasoning",
            search_context_size="medium",
            system_prompt=system_prompt,
            perplexity_api_key=perplexity_api_key,
            allowed_sources=allowed_sources,
        )
    )
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

##### High search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    policy_validation_results[example_name], citations[example_name] = (
        search_on_perplexity(
            policy=example_policy,
            model_name="sonar-reasoning",
            search_context_size="high",
            system_prompt=system_prompt,
            perplexity_api_key=perplexity_api_key,
            allowed_sources=allowed_sources,
        )
    )
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

#### Sonar Reasoning Pro

##### Low search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    policy_validation_results[example_name], citations[example_name] = (
        search_on_perplexity(
            policy=example_policy,
            model_name="sonar-reasoning-pro",
            search_context_size="low",
            system_prompt=system_prompt,
            perplexity_api_key=perplexity_api_key,
            allowed_sources=allowed_sources,
        )
    )
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

##### Medium search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    policy_validation_results[example_name], citations[example_name] = (
        search_on_perplexity(
            policy=example_policy,
            model_name="sonar-reasoning-pro",
            search_context_size="medium",
            system_prompt=system_prompt,
            perplexity_api_key=perplexity_api_key,
            allowed_sources=allowed_sources,
        )
    )
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

##### High search context

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    policy_validation_results[example_name], citations[example_name] = (
        search_on_perplexity(
            policy=example_policy,
            model_name="sonar-reasoning-pro",
            search_context_size="high",
            system_prompt=system_prompt,
            perplexity_api_key=perplexity_api_key,
            allowed_sources=allowed_sources,
        )
    )
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

#### Sonar Deep Research

In [None]:
policy_validation_results = {
    example_name: None for example_name in policies_to_validate.keys()
}
citations = {example_name: None for example_name in policies_to_validate.keys()}
for example_name, example_policy in tqdm(policies_to_validate.items()):
    policy_validation_results[example_name], citations[example_name] = (
        search_on_perplexity(
            policy=example_policy,
            model_name="sonar-deep-research",
            system_prompt=system_prompt,
            perplexity_api_key=perplexity_api_key,
        )
    )
policy_validation_results

In [None]:
citations

In [None]:
# Example of the reasoning, in an easier to read format
display(Markdown(policy_validation_results[example_name].reasoning))

### Implemented solution