In [8]:
# Initialise poetry
import os
import subprocess

# print(os.getcwd())

result = subprocess.run(["poetry", "--version"], capture_output=True, text=True)
print(result.stdout)

Poetry (version 2.1.1)



In [9]:
import subprocess

result = subprocess.run(["python", "--version"], capture_output=True, text=True)
print(result.stdout)

Python 3.12.6



In [10]:
!poetry install

Installing dependencies from lock file

No dependencies to install or update


In [11]:
# Step 1: Import necessary libraries
import argparse
import asyncio
import logging
import os
from datetime import datetime
from typing import Literal
from dotenv import load_dotenv

from forecasting_tools import (
    AskNewsSearcher,
    BinaryQuestion,
    ForecastBot,
    GeneralLlm,
    MetaculusApi,
    MetaculusQuestion,
    MultipleChoiceQuestion,
    NumericDistribution,
    NumericQuestion,
    PredictedOptionList,
    PredictionExtractor,
    ReasonedPrediction,
    SmartSearcher,
    ForecastReport,
    clean_indents,
)
import typeguard
import litellm

litellm.set_verbose = True

load_dotenv()

logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

# Suppress LiteLLM logging
litellm_logger = logging.getLogger("LiteLLM")
litellm_logger.setLevel(logging.WARNING)
litellm_logger.propagate = False


In [24]:
# Step 2: Define methods and classes
class QBot(ForecastBot):
    _max_concurrent_questions = (
        10  # Number of questions to ask in parallel (to speed up the process)
    )
    _concurrency_limiter = asyncio.Semaphore(_max_concurrent_questions)

    async def run_research(self, question):
        async with self._concurrency_limiter:
            research = ""
            print("Debug: Entered concurrency limiter")
            if os.getenv("ASKNEWS_CLIENT_ID") and os.getenv("ASKNEWS_SECRET"):
                print("Debug: Using AskNewsSearcher")
                research = AskNewsSearcher().get_formatted_news(question.question_text)
            elif os.getenv("OPENROUTER_API_KEY"):
                print("Debug: Using OpenRouter")
                research = await self._call_free_model(question.question_text, use_open_router=True)
            logger.info(f"Found Research for {question.page_url}:\n{research}")
            return research
        
    async def _call_free_model(self, question_text, use_open_router=False):
        prompt = clean_indents(
            f"""
            You are an assistant to a superforecaster.
            The superforecaster will give you a question they intend to forecast on.
            To be a great assistant, you generate a concise but detailed rundown of the most relevant news, including if the question would resolve Yes or No based on current information.
            You do not produce forecasts yourself.

            Question:
            {question_text}
            """
        )
        if use_open_router:
            model_name = "openrouter/deepseek/deepseek-r1"
        else:
            model_name = "google/gemini-2.0-pro-exp-02-05:free"
        model = GeneralLlm(
            model=model_name,
            temperature=0.1,
        )
        response = await model.invoke(prompt)
        return response
    
    def _get_final_decision_llm(self):
        model = None
        if os.getenv("METACULUS_TOKEN"):
            model = GeneralLlm(
                model="metaculus/gpt-4o",
                temperature=0.3,
            )
        else:
            raise ValueError("No API key found for final_decision_llm")
        return model
    
    async def _run_forecast_on_binary(self, question, research):
        prompt = clean_indents(
            f"""
            You are a professional forecaster interviewing for a job.

            Your interview question is:
            {question.question_text}

            Question background:
            {question.background_info}


            This question's outcome will be determined by the specific criteria below. These criteria have not yet been satisfied:
            {question.resolution_criteria}

            {question.fine_print}


            Your research assistant says:
            {research}

            Today is {datetime.now().strftime("%Y-%m-%d")}.

            Before answering you write:
            (a) The time left until the outcome to the question is known.
            (b) The status quo outcome if nothing changed.
            (c) A brief description of a scenario that results in a No outcome.
            (d) A brief description of a scenario that results in a Yes outcome.

            You write your rationale remembering that good forecasters put extra weight on the status quo outcome since the world changes slowly most of the time.

            The last thing you write is your final answer as: "Probability: ZZ%", 0-100
            """
        )
        reasoning = await self._get_final_decision_llm().invoke(prompt)
        prediction: float = PredictionExtractor.extract_last_percentage_value(
            reasoning, max_prediction=1, min_prediction=0
        )
        logger.info(
            f"Forecasted {question.page_url} as {prediction} with reasoning:\n{reasoning}"
        )
        return ReasonedPrediction(prediction_value=prediction, reasoning=reasoning)
    
    async def _run_forecast_on_multiple_choice(
        self, question: MultipleChoiceQuestion, research: str
    ) -> ReasonedPrediction[PredictedOptionList]:
        prompt = clean_indents(
            f"""
            You are a professional forecaster interviewing for a job.

            Your interview question is:
            {question.question_text}

            The options are: {question.options}


            Background:
            {question.background_info}

            {question.resolution_criteria}

            {question.fine_print}


            Your research assistant says:
            {research}

            Today is {datetime.now().strftime("%Y-%m-%d")}.

            Before answering you write:
            (a) The time left until the outcome to the question is known.
            (b) The status quo outcome if nothing changed.
            (c) A description of an scenario that results in an unexpected outcome.

            You write your rationale remembering that (1) good forecasters put extra weight on the status quo outcome since the world changes slowly most of the time, and (2) good forecasters leave some moderate probability on most options to account for unexpected outcomes.

            The last thing you write is your final probabilities for the N options in this order {question.options} as:
            Option_A: Probability_A
            Option_B: Probability_B
            ...
            Option_N: Probability_N
            """
        )
        reasoning = await self._get_final_decision_llm().invoke(prompt)
        prediction: PredictedOptionList = (
            PredictionExtractor.extract_option_list_with_percentage_afterwards(
                reasoning, question.options
            )
        )
        logger.info(
            f"Forecasted {question.page_url} as {prediction} with reasoning:\n{reasoning}"
        )
        return ReasonedPrediction(prediction_value=prediction, reasoning=reasoning)

    async def _run_forecast_on_numeric(
        self, question: NumericQuestion, research: str
    ) -> ReasonedPrediction[NumericDistribution]:
        upper_bound_message, lower_bound_message = (
            self._create_upper_and_lower_bound_messages(question)
        )
        prompt = clean_indents(
            f"""
            You are a professional forecaster interviewing for a job.

            Your interview question is:
            {question.question_text}

            Background:
            {question.background_info}

            {question.resolution_criteria}

            {question.fine_print}


            Your research assistant says:
            {research}

            Today is {datetime.now().strftime("%Y-%m-%d")}.

            {lower_bound_message}
            {upper_bound_message}

            Formatting Instructions:
            - Please notice the units requested (e.g. whether you represent a number as 1,000,000 or 1m).
            - Never use scientific notation.
            - Always start with a smaller number (more negative if negative) and then increase from there

            Before answering you write:
            (a) The time left until the outcome to the question is known.
            (b) The outcome if nothing changed.
            (c) The outcome if the current trend continued.
            (d) The expectations of experts and markets.
            (e) A brief description of an unexpected scenario that results in a low outcome.
            (f) A brief description of an unexpected scenario that results in a high outcome.

            You remind yourself that good forecasters are humble and set wide 90/10 confidence intervals to account for unknown unknowns.

            The last thing you write is your final answer as:
            "
            Percentile 10: XX
            Percentile 20: XX
            Percentile 40: XX
            Percentile 60: XX
            Percentile 80: XX
            Percentile 90: XX
            "
            """
        )
        reasoning = await self._get_final_decision_llm().invoke(prompt)
        prediction: NumericDistribution = (
            PredictionExtractor.extract_numeric_distribution_from_list_of_percentile_number_and_probability(
                reasoning, question
            )
        )
        logger.info(
            f"Forecasted {question.page_url} as {prediction.declared_percentiles} with reasoning:\n{reasoning}"
        )
        return ReasonedPrediction(prediction_value=prediction, reasoning=reasoning)

    def _create_upper_and_lower_bound_messages(
        self, question: NumericQuestion
    ) -> tuple[str, str]:
        if question.open_upper_bound:
            upper_bound_message = ""
        else:
            upper_bound_message = (
                f"The outcome can not be higher than {question.upper_bound}."
            )
        if question.open_lower_bound:
            lower_bound_message = ""
        else:
            lower_bound_message = (
                f"The outcome can not be lower than {question.lower_bound}."
            )
        return upper_bound_message, lower_bound_message

In [13]:
# Step 3: Create sample data
question_url = "https://www.metaculus.com/questions/578/human-extinction-by-2100/"

In [21]:
# Evaluate the question
def summarize_reports(forecast_reports: list[ForecastReport | BaseException]) -> None:
    print("Debug: Starting to summarize reports")
    valid_reports = [
        report for report in forecast_reports if isinstance(report, ForecastReport)
    ]
    exceptions = [
        report for report in forecast_reports if isinstance(report, BaseException)
    ]
    minor_exceptions = [
        report.errors for report in valid_reports if report.errors
    ]

    print(f"Debug: Found {len(valid_reports)} valid reports")
    print(f"Debug: Found {len(exceptions)} exceptions")
    print(f"Debug: Found {len(minor_exceptions)} minor exceptions")

    for report in valid_reports:
        question_summary = clean_indents(f"""
            URL: {report.question.page_url}
            Errors: {report.errors}
            Summary:
            {report.summary}
            ---------------------------------------------------------
        """)
        logger.info(question_summary)
        print(f"Debug: Processed report for URL: {report.question.page_url}")

    if exceptions:
        for exception in exceptions:
            logger.error(f"Exception occurred: {exception}")
            print(f"Debug: Exception occurred: {exception}")
        raise RuntimeError(
            f"{len(exceptions)} errors occurred while forecasting. Check logs for details."
        )
    if minor_exceptions:
        logger.error(
            f"{len(minor_exceptions)} minor exceptions occurred while forecasting: {minor_exceptions}"
        )
        print(f"Debug: {len(minor_exceptions)} minor exceptions occurred while forecasting: {minor_exceptions}")

In [25]:
# Step 4: Call the function
template_bot = QBot(
    research_reports_per_question=1,
    predictions_per_research_report=5,
    use_research_summary_to_forecast=False,
    publish_reports_to_metaculus=False,
    folder_to_save_reports_to=os.path.join(os.getcwd(), "results"),
    skip_previously_forecasted_questions=False,
)

print("Debug: QBot initialized")

# Step 5: Run the bot
question = [MetaculusApi.get_question_by_url(question_url)]
print(f"Debug: Retrieved question: {question}")

forecast_report = asyncio.run(template_bot.forecast_questions(question, return_exceptions=True))
print(f"Debug: Forecast report: {forecast_report}")

forecast_report = typeguard.check_type(forecast_report, list[ForecastReport | BaseException])
print(f"Debug: Type checked forecast report: {forecast_report}")

summarize_reports(forecast_report)
print("Debug: Summarized reports")

2025-03-03 10:22:50,821 - forecasting_tools.forecast_helpers.metaculus_api - INFO - Retrieving question details for question 578


Debug: QBot initialized


2025-03-03 10:22:52,834 - forecasting_tools.forecast_helpers.metaculus_api - INFO - Retrieved question details for question 578


Debug: Retrieved question: [BinaryQuestion(question_text='Will humans go extinct before 2100?', id_of_post=578, page_url='https://www.metaculus.com/questions/578', id_of_question=578, state=<QuestionState.OPEN: 'open'>, num_forecasters=1433, num_predictions=3032, resolution_criteria="This question will resolve as **Yes** if there are no known humans alive on January 1, 2100.  For these purposes we'll define humans as biological creatures who have as their ancestors – via a chain of live births from mothers –  circa 2000 humans OR who could mate with circa 2000 humans to produce viable offspring. (So AIs, ems, genetically engineered beings of a different species brought up in artificial wombs, etc. would not count).  Any living humans living anywhere in the observable universe (or multiverse) (who are known to the entities operating Metaculus) on that date will be sufficient to resolve the question negatively.\n\nN.B. Even though it is obviously the case that if human extinction occurs 

2025-03-03 10:22:55,203 - httpx - INFO - HTTP Request: POST https://auth.asknews.app/oauth2/token "HTTP/1.1 200 OK"
2025-03-03 10:22:56,014 - httpx - INFO - HTTP Request: GET https://api.asknews.app/v1/news/search?query=Will%20humans%20go%20extinct%20before%202100%3F&n_articles=6&time_filter=crawl_date&return_type=both&method=kw&historical=false&offset=0&categories=All&similarity_score_threshold=0.5&strategy=latest%20news&hours_back=24&premium=false "HTTP/1.1 200 OK"
2025-03-03 10:22:57,841 - httpx - INFO - HTTP Request: GET https://api.asknews.app/v1/news/search?query=Will%20humans%20go%20extinct%20before%202100%3F&n_articles=10&time_filter=crawl_date&return_type=both&method=kw&historical=false&offset=0&categories=All&similarity_score_threshold=0.5&strategy=news%20knowledge&hours_back=24&premium=false "HTTP/1.1 200 OK"
2025-03-03 10:22:58,270 - __main__ - INFO - Found Research for https://www.metaculus.com/questions/578:
Here are the relevant news articles:

**Elon Musk: AI Threat to 

ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
Final returned optional params: {'temperature': 0.3, 'extra_body': {}}

[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.





ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
Final returned optional params: {'temperature': 0.3, 'extra_body': {}}

[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.

ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
Final returned optional params: {'temperature': 0.3, 'extra_body': {}}
ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
Final returned optional params: {'temperature': 0.3, 'extra_body': {}}
ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
Final returned optional params: {'temperature': 0.3, 'extra_body': {}}
ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
Final returned optional params: {'temperature': 0.3, 'extra_body': {}}
ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None

[1;31mGive Feedba



ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
Final returned optional params: {'temperature': 0.3, 'extra_body': {}}

[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.

ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
Final returned optional params: {'temperature': 0.3, 'extra_body': {}}

[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.





ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
Final returned optional params: {'temperature': 0.3, 'extra_body': {}}

[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.





ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
Final returned optional params: {'temperature': 0.3, 'extra_body': {}}

[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.



2025-03-03 10:23:13,064 - forecasting_tools.forecast_bots.forecast_bot - ERROR - Error while processing question url: 'https://www.metaculus.com/questions/578': All 1 research reports/predictions failed: Errors: ["ExceptionGroup: Error while running research and predictions: Errors: ['AuthenticationError: litellm.AuthenticationError: AuthenticationError: OpenAIException - The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable', 'AuthenticationError: litellm.AuthenticationError: AuthenticationError: OpenAIException - The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable', 'AuthenticationError: litellm.AuthenticationError: AuthenticationError: OpenAIException - The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable', 'AuthenticationError: litellm.Authenticatio

ASYNC kwargs[caching]: False; litellm.cache: None; kwargs.get('cache'): None
Final returned optional params: {'temperature': 0.3, 'extra_body': {}}

[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new[0m
LiteLLM.Info: If you need to debug this error, use `litellm._turn_on_debug()'.

Debug: Forecast report: [ExceptionGroup('Error while processing question url: \'https://www.metaculus.com/questions/578\': All 1 research reports/predictions failed: Errors: ["ExceptionGroup: Error while running research and predictions: Errors: [\'AuthenticationError: litellm.AuthenticationError: AuthenticationError: OpenAIException - The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable\', \'AuthenticationError: litellm.AuthenticationError: AuthenticationError: OpenAIException - The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment v

RuntimeError: 1 errors occurred while forecasting. Check logs for details.