In [1]:
# Initialise poetry
import os
import subprocess

# print(os.getcwd())

result = subprocess.run(["poetry", "--version"], capture_output=True, text=True)
print(result.stdout)

Poetry (version 2.1.1)



In [2]:
import subprocess

result = subprocess.run(["python", "--version"], capture_output=True, text=True)
print(result.stdout)

Python 3.12.6



In [3]:
!poetry install

Installing dependencies from lock file

No dependencies to install or update


In [4]:
# Step 1: Import necessary libraries
import argparse
import asyncio
import logging
import os
from datetime import datetime
from typing import Literal
from dotenv import load_dotenv

from forecasting_tools import (
    AskNewsSearcher,
    BinaryQuestion,
    ForecastBot,
    GeneralLlm,
    MetaculusApi,
    MetaculusQuestion,
    MultipleChoiceQuestion,
    NumericDistribution,
    NumericQuestion,
    PredictedOptionList,
    PredictionExtractor,
    ReasonedPrediction,
    SmartSearcher,
    ForecastReport,
    clean_indents,
)
import typeguard
import litellm

litellm.set_verbose = True

load_dotenv()

logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

# Suppress LiteLLM logging
litellm_logger = logging.getLogger("LiteLLM")
litellm_logger.setLevel(logging.WARNING)
litellm_logger.propagate = False


In [6]:
# Step 2: Define methods and classes
class QBot(ForecastBot):
    _max_concurrent_questions = (
        10  # Number of questions to ask in parallel (to speed up the process)
    )
    _concurrency_limiter = asyncio.Semaphore(_max_concurrent_questions)

    async def run_research(self, question):
        async with self._concurrency_limiter:
            research = ""
            print("Debug: Entered concurrency limiter")
            if os.getenv("ASKNEWS_CLIENT_ID") and os.getenv("ASKNEWS_SECRET"):
                print("Debug: Using AskNewsSearcher")
                research = AskNewsSearcher().get_formatted_news(question.question_text)
            elif os.getenv("METACULUS_TOKEN"):
                print("Debug: Using MetaculusApi")
                # print("Debug: Using OpenRouter")
                research = await self._call_free_model(question.question_text, use_ext_api=True)
            logger.info(f"Found Research for {question.page_url}:\n{research}")
            return research
        
    async def _call_free_model(self, question_text, use_ext_api=False):
        prompt = clean_indents(
            f"""
            You are an assistant to a superforecaster.
            The superforecaster will give you a question they intend to forecast on.
            To be a great assistant, you generate a concise but detailed rundown of the most relevant news, including if the question would resolve Yes or No based on current information.
            You do not produce forecasts yourself.

            Question:
            {question_text}
            """
        )
        if use_ext_api:
            model_name = "metaculus/gpt-4o"
        # else:
        #     model_name = "google/gemini-2.0-pro-exp-02-05:free"
        model = GeneralLlm(
            model=model_name,
            temperature=0.1,
        )
        response = await model.invoke(prompt)
        return response
    
    def _get_final_decision_llm(self):
        model = None
        if os.getenv("METACULUS_TOKEN"):
            model = GeneralLlm(
                model="metaculus/gpt-4o",
                temperature=0.3,
            )
        else:
            raise ValueError("No API key found for final_decision_llm")
        return model
    
    async def _run_forecast_on_binary(self, question, research):
        prompt = clean_indents(
            f"""
            You are a professional forecaster interviewing for a job.

            Your interview question is:
            {question.question_text}

            Question background:
            {question.background_info}


            This question's outcome will be determined by the specific criteria below. These criteria have not yet been satisfied:
            {question.resolution_criteria}

            {question.fine_print}


            Your research assistant says:
            {research}

            Today is {datetime.now().strftime("%Y-%m-%d")}.

            Before answering you write:
            (a) The time left until the outcome to the question is known.
            (b) The status quo outcome if nothing changed.
            (c) A brief description of a scenario that results in a No outcome.
            (d) A brief description of a scenario that results in a Yes outcome.

            You write your rationale remembering that good forecasters put extra weight on the status quo outcome since the world changes slowly most of the time.

            The last thing you write is your final answer as: "Probability: ZZ%", 0-100
            """
        )
        reasoning = await self._get_final_decision_llm().invoke(prompt)
        prediction: float = PredictionExtractor.extract_last_percentage_value(
            reasoning, max_prediction=1, min_prediction=0
        )
        logger.info(
            f"Forecasted {question.page_url} as {prediction} with reasoning:\n{reasoning}"
        )
        return ReasonedPrediction(prediction_value=prediction, reasoning=reasoning)
    
    async def _run_forecast_on_multiple_choice(
        self, question: MultipleChoiceQuestion, research: str
    ) -> ReasonedPrediction[PredictedOptionList]:
        prompt = clean_indents(
            f"""
            You are a professional forecaster interviewing for a job.

            Your interview question is:
            {question.question_text}

            The options are: {question.options}


            Background:
            {question.background_info}

            {question.resolution_criteria}

            {question.fine_print}


            Your research assistant says:
            {research}

            Today is {datetime.now().strftime("%Y-%m-%d")}.

            Before answering you write:
            (a) The time left until the outcome to the question is known.
            (b) The status quo outcome if nothing changed.
            (c) A description of an scenario that results in an unexpected outcome.

            You write your rationale remembering that (1) good forecasters put extra weight on the status quo outcome since the world changes slowly most of the time, and (2) good forecasters leave some moderate probability on most options to account for unexpected outcomes.

            The last thing you write is your final probabilities for the N options in this order {question.options} as:
            Option_A: Probability_A
            Option_B: Probability_B
            ...
            Option_N: Probability_N
            """
        )
        reasoning = await self._get_final_decision_llm().invoke(prompt)
        prediction: PredictedOptionList = (
            PredictionExtractor.extract_option_list_with_percentage_afterwards(
                reasoning, question.options
            )
        )
        logger.info(
            f"Forecasted {question.page_url} as {prediction} with reasoning:\n{reasoning}"
        )
        return ReasonedPrediction(prediction_value=prediction, reasoning=reasoning)

    async def _run_forecast_on_numeric(
        self, question: NumericQuestion, research: str
    ) -> ReasonedPrediction[NumericDistribution]:
        upper_bound_message, lower_bound_message = (
            self._create_upper_and_lower_bound_messages(question)
        )
        prompt = clean_indents(
            f"""
            You are a professional forecaster interviewing for a job.

            Your interview question is:
            {question.question_text}

            Background:
            {question.background_info}

            {question.resolution_criteria}

            {question.fine_print}


            Your research assistant says:
            {research}

            Today is {datetime.now().strftime("%Y-%m-%d")}.

            {lower_bound_message}
            {upper_bound_message}

            Formatting Instructions:
            - Please notice the units requested (e.g. whether you represent a number as 1,000,000 or 1m).
            - Never use scientific notation.
            - Always start with a smaller number (more negative if negative) and then increase from there

            Before answering you write:
            (a) The time left until the outcome to the question is known.
            (b) The outcome if nothing changed.
            (c) The outcome if the current trend continued.
            (d) The expectations of experts and markets.
            (e) A brief description of an unexpected scenario that results in a low outcome.
            (f) A brief description of an unexpected scenario that results in a high outcome.

            You remind yourself that good forecasters are humble and set wide 90/10 confidence intervals to account for unknown unknowns.

            The last thing you write is your final answer as:
            "
            Percentile 10: XX
            Percentile 20: XX
            Percentile 40: XX
            Percentile 60: XX
            Percentile 80: XX
            Percentile 90: XX
            "
            """
        )
        reasoning = await self._get_final_decision_llm().invoke(prompt)
        prediction: NumericDistribution = (
            PredictionExtractor.extract_numeric_distribution_from_list_of_percentile_number_and_probability(
                reasoning, question
            )
        )
        logger.info(
            f"Forecasted {question.page_url} as {prediction.declared_percentiles} with reasoning:\n{reasoning}"
        )
        return ReasonedPrediction(prediction_value=prediction, reasoning=reasoning)

    def _create_upper_and_lower_bound_messages(
        self, question: NumericQuestion
    ) -> tuple[str, str]:
        if question.open_upper_bound:
            upper_bound_message = ""
        else:
            upper_bound_message = (
                f"The outcome can not be higher than {question.upper_bound}."
            )
        if question.open_lower_bound:
            lower_bound_message = ""
        else:
            lower_bound_message = (
                f"The outcome can not be lower than {question.lower_bound}."
            )
        return upper_bound_message, lower_bound_message

In [12]:
# Step 3: Create sample data
question_url = "https://www.metaculus.com/questions/22427/number-of-new-leading-ai-labs/"

In [8]:
# Evaluate the question
def summarize_reports(forecast_reports: list[ForecastReport | BaseException]) -> None:
    print("Debug: Starting to summarize reports")
    valid_reports = [
        report for report in forecast_reports if isinstance(report, ForecastReport)
    ]
    exceptions = [
        report for report in forecast_reports if isinstance(report, BaseException)
    ]
    minor_exceptions = [
        report.errors for report in valid_reports if report.errors
    ]

    print(f"Debug: Found {len(valid_reports)} valid reports")
    print(f"Debug: Found {len(exceptions)} exceptions")
    print(f"Debug: Found {len(minor_exceptions)} minor exceptions")

    for report in valid_reports:
        question_summary = clean_indents(f"""
            URL: {report.question.page_url}
            Errors: {report.errors}
            Summary:
            {report.summary}
            ---------------------------------------------------------
        """)
        logger.info(question_summary)
        print(f"Debug: Processed report for URL: {report.question.page_url}")

    if exceptions:
        for exception in exceptions:
            logger.error(f"Exception occurred: {exception}")
            print(f"Debug: Exception occurred: {exception}")
        raise RuntimeError(
            f"{len(exceptions)} errors occurred while forecasting. Check logs for details."
        )
    if minor_exceptions:
        logger.error(
            f"{len(minor_exceptions)} minor exceptions occurred while forecasting: {minor_exceptions}"
        )
        print(f"Debug: {len(minor_exceptions)} minor exceptions occurred while forecasting: {minor_exceptions}")

In [13]:
# Step 4: Call the function
template_bot = QBot(
    research_reports_per_question=1,
    predictions_per_research_report=5,
    use_research_summary_to_forecast=False,
    publish_reports_to_metaculus=False,
    folder_to_save_reports_to=os.path.join(os.getcwd(), "results"),
    skip_previously_forecasted_questions=False,
)

print("Debug: QBot initialized")

# Step 5: Run the bot
question = [MetaculusApi.get_question_by_url(question_url)]
print(f"Debug: Retrieved question: {question}")

forecast_report = asyncio.run(template_bot.forecast_questions(question, return_exceptions=True))
print(f"Debug: Forecast report: {forecast_report}")

forecast_report = typeguard.check_type(forecast_report, list[ForecastReport | BaseException])
print(f"Debug: Type checked forecast report: {forecast_report}")

summarize_reports(forecast_report)
print("Debug: Summarized reports")

2025-03-04 09:10:12,581 - forecasting_tools.forecast_helpers.metaculus_api - INFO - Retrieving question details for question 22427


Debug: QBot initialized


2025-03-04 09:10:13,917 - forecasting_tools.forecast_helpers.metaculus_api - INFO - Retrieved question details for question 22427


Debug: Retrieved question: [MultipleChoiceQuestion(question_text='Before 2030, how many new AI labs will be leading labs within 2 years of their founding?', id_of_post=22427, page_url='https://www.metaculus.com/questions/22427', id_of_question=22427, state=<QuestionState.OPEN: 'open'>, num_forecasters=31, num_predictions=158, resolution_criteria='This question resolves as the number of new AI labs that become leading labs within 2 years of their founding and before 2030.\n\nFor the purposes of this question:\n\n- A “new” AI lab is one that was founded after question launch.\n\n- A “leading” AI lab is a top 5 lab based on model ELO,* as measured by [Chatbot Arena’s](https://chat.lmsys.org/?leaderboard) ELO rating. As of April 15, 2024, for example, the leading AI labs by this metric are OpenAI, Anthropic, Google DeepMind, Cohere, and Mistral AI.\n\n- A lab’s founding date will be taken to be the “Founded” date quoted on its Wikipedia page. If a lab doesn’t have a Wikipedia page, or that

2025-03-04 09:10:16,389 - httpx - INFO - HTTP Request: POST https://auth.asknews.app/oauth2/token "HTTP/1.1 200 OK"
2025-03-04 09:10:17,221 - httpx - INFO - HTTP Request: GET https://api.asknews.app/v1/news/search?query=Before%202030%2C%20how%20many%20new%20AI%20labs%20will%20be%20leading%20labs%20within%202%20years%20of%20their%20founding%3F&n_articles=6&time_filter=crawl_date&return_type=both&method=kw&historical=false&offset=0&categories=All&similarity_score_threshold=0.5&strategy=latest%20news&hours_back=24&premium=false "HTTP/1.1 200 OK"
2025-03-04 09:10:19,296 - httpx - INFO - HTTP Request: GET https://api.asknews.app/v1/news/search?query=Before%202030%2C%20how%20many%20new%20AI%20labs%20will%20be%20leading%20labs%20within%202%20years%20of%20their%20founding%3F&n_articles=10&time_filter=crawl_date&return_type=both&method=kw&historical=false&offset=0&categories=All&similarity_score_threshold=0.5&strategy=news%20knowledge&hours_back=24&premium=false "HTTP/1.1 200 OK"
2025-03-04 09:

ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
Final returned optional params: {'temperature': 0.3, 'extra_body': {}}


2025-03-04 09:10:22,297 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


RAW RESPONSE:
{"id": "chatcmpl-B7Ghw5wYZ3ZFAfs9OWV04swhEMK7L", "choices": [{"finish_reason": "stop", "index": 0, "logprobs": null, "message": {"content": "The research encompasses a variety of reports and articles that collectively analyze the impact of artificial intelligence (AI) on the job market and technological advancements leading up to 2030. Key findings suggest that AI is expected to create approximately 170 million new jobs while displacing around 92 million roles, resulting in a net gain of 78 million jobs. The reports emphasize that technological advancements, particularly in AI and robotics, will significantly transform industries, requiring a shift in skills among the workforce. Employers are increasingly prioritizing skills related to AI, big data, and cybersecurity, with many planning to invest in employee training to adapt to these changes.\n\nAdditionally, the research highlights the emergence of new AI labs and innovations in various sectors, such as construction, wh

2025-03-04 09:10:36,534 - httpx - INFO - HTTP Request: POST https://llm-proxy.metaculus.com/proxy/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-04 09:10:36,539 - __main__ - INFO - Forecasted https://www.metaculus.com/questions/22427 as predicted_options=[PredictedOption(option_name='0 or 1', probability=0.5), PredictedOption(option_name='2 or 3', probability=0.25), PredictedOption(option_name='4 or 5', probability=0.15), PredictedOption(option_name='6 or 7', probability=0.05), PredictedOption(option_name='8 or 9', probability=0.03), PredictedOption(option_name='10 or more', probability=0.02)] with reasoning:
(a) The time left until the outcome to the question is known is approximately 5 years, as the question resolves by the end of 2029.

(b) The status quo outcome if nothing changed would likely be '0 or 1'. Currently, the leading AI labs are well-established entities with significant resources, expertise, and market presence. The barriers to becoming a leading AI lab are high,

RAW RESPONSE:
{"id": "chatcmpl-B7Gi0lfJoWfwuJxenpwnyQA2vCelg", "choices": [{"finish_reason": "stop", "index": 0, "logprobs": null, "message": {"content": "(a) The time left until the outcome to the question is known is approximately 5 years, as the question resolves by the end of 2029.\n\n(b) The status quo outcome if nothing changed would likely be '0 or 1'. Currently, the leading AI labs are well-established entities with significant resources, expertise, and market presence. The barriers to becoming a leading AI lab are high, and new entrants would need to achieve substantial technological advancements or breakthroughs to compete with existing leaders.\n\n(c) A scenario that results in an unexpected outcome could involve a major technological breakthrough in AI that significantly lowers the barriers to entry, allowing new labs to quickly develop competitive models. For example, a new open-source AI framework could emerge, enabling smaller teams to rapidly iterate and improve their m

2025-03-04 09:10:36,790 - httpx - INFO - HTTP Request: POST https://llm-proxy.metaculus.com/proxy/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-04 09:10:36,798 - __main__ - INFO - Forecasted https://www.metaculus.com/questions/22427 as predicted_options=[PredictedOption(option_name='0 or 1', probability=0.4), PredictedOption(option_name='2 or 3', probability=0.3), PredictedOption(option_name='4 or 5', probability=0.15), PredictedOption(option_name='6 or 7', probability=0.08), PredictedOption(option_name='8 or 9', probability=0.05), PredictedOption(option_name='10 or more', probability=0.02)] with reasoning:
(a) The time left until the outcome to the question is known: The question asks about events occurring before 2030, and today is March 4, 2025. Therefore, there are approximately 5 years and 10 months left until the outcome is known.

(b) The status quo outcome if nothing changed: As of now, the leading AI labs are OpenAI, Anthropic, Google DeepMind, Cohere, and Mistral AI. I

RAW RESPONSE:
{"id": "chatcmpl-B7Gi0sgoKagHx6ZeSaRig1HoQMj7z", "choices": [{"finish_reason": "stop", "index": 0, "logprobs": null, "message": {"content": "(a) The time left until the outcome to the question is known: The question asks about events occurring before 2030, and today is March 4, 2025. Therefore, there are approximately 5 years and 10 months left until the outcome is known.\n\n(b) The status quo outcome if nothing changed: As of now, the leading AI labs are OpenAI, Anthropic, Google DeepMind, Cohere, and Mistral AI. If no new AI labs were to emerge as leading labs within two years of their founding, the status quo outcome would be '0 or 1' new leading AI labs.\n\n(c) A description of a scenario that results in an unexpected outcome: An unexpected outcome could occur if there is a significant breakthrough in AI technology that lowers the barrier to entry for new labs to develop competitive models quickly. This could be facilitated by open-source advancements, a surge in AI t

2025-03-04 09:10:37,468 - httpx - INFO - HTTP Request: POST https://llm-proxy.metaculus.com/proxy/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-04 09:10:37,473 - __main__ - INFO - Forecasted https://www.metaculus.com/questions/22427 as predicted_options=[PredictedOption(option_name='0 or 1', probability=0.5), PredictedOption(option_name='2 or 3', probability=0.25), PredictedOption(option_name='4 or 5', probability=0.15), PredictedOption(option_name='6 or 7', probability=0.05), PredictedOption(option_name='8 or 9', probability=0.03), PredictedOption(option_name='10 or more', probability=0.02)] with reasoning:
(a) The time left until the outcome to the question is known: The question resolves by the end of 2030, so there are approximately 5 years and 9 months remaining until the outcome is known.

(b) The status quo outcome if nothing changed: As of now, there are no new AI labs that have become leading labs within 2 years of their founding since the question's launch. Therefore, 

RAW RESPONSE:
{"id": "chatcmpl-B7Gi0P5rN8pDpG5gSkYTKCOVweYFu", "choices": [{"finish_reason": "stop", "index": 0, "logprobs": null, "message": {"content": "(a) The time left until the outcome to the question is known: The question resolves by the end of 2030, so there are approximately 5 years and 9 months remaining until the outcome is known.\n\n(b) The status quo outcome if nothing changed: As of now, there are no new AI labs that have become leading labs within 2 years of their founding since the question's launch. Therefore, the status quo outcome would be '0 or 1'.\n\n(c) A description of a scenario that results in an unexpected outcome: An unexpected outcome could occur if there is a significant breakthrough in AI technology that lowers the barriers to entry for new AI labs. This could lead to a surge in the number of new labs being founded, some of which might quickly rise to prominence due to their innovative approaches or partnerships with major tech companies. Additionally, ge

2025-03-04 09:10:46,537 - httpx - INFO - HTTP Request: POST https://llm-proxy.metaculus.com/proxy/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-04 09:10:46,545 - __main__ - INFO - Forecasted https://www.metaculus.com/questions/22427 as predicted_options=[PredictedOption(option_name='0 or 1', probability=0.5), PredictedOption(option_name='2 or 3', probability=0.25), PredictedOption(option_name='4 or 5', probability=0.15), PredictedOption(option_name='6 or 7', probability=0.05), PredictedOption(option_name='8 or 9', probability=0.03), PredictedOption(option_name='10 or more', probability=0.02)] with reasoning:
(a) The time left until the outcome to the question is known is approximately 5 years, as the question resolves by the end of 2030.

(b) The status quo outcome if nothing changed would likely be '0 or 1'. This is based on the current landscape where a few key players dominate the AI space, and new labs becoming leading labs within two years of their founding is relatively ra

RAW RESPONSE:
{"id": "chatcmpl-B7GiCHZw2Uju8J5SC06grLtOgXM8K", "choices": [{"finish_reason": "stop", "index": 0, "logprobs": null, "message": {"content": "(a) The time left until the outcome to the question is known is approximately 5 years, as the question resolves by the end of 2030.\n\n(b) The status quo outcome if nothing changed would likely be '0 or 1'. This is based on the current landscape where a few key players dominate the AI space, and new labs becoming leading labs within two years of their founding is relatively rare. The existing top labs have significant resources, talent, and established reputations, making it challenging for new entrants to quickly rise to the top 5.\n\n(c) A scenario that results in an unexpected outcome could involve a major breakthrough in AI technology that significantly lowers the barriers to entry for new labs. For example, if a new open-source AI framework were developed that drastically reduces the time and cost required to develop competitive