In [2]:
from __future__ import annotations

import re
from typing import Tuple, Optional

import requests


DEFAULT_HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/124.0 Safari/537.36"
    )
}


def fetch_page(url: str, timeout: float = 15.0) -> Tuple[Optional[str], str]:
    """Fetches a URL and returns (title, text). Best-effort, no external services.

    - Uses requests with a friendly UA and timeout.
    - Tries BeautifulSoup if available for better parsing; otherwise falls back to regex strip.
    """
    resp = requests.get(url, headers=DEFAULT_HEADERS, timeout=timeout)
    resp.raise_for_status()
    html = resp.text

    title = None
    text_content = None

    try:
        from bs4 import BeautifulSoup  # type: ignore

        soup = BeautifulSoup(html, "html.parser")
        title = soup.title.get_text(strip=True) if soup.title else None
        # Remove script/style
        for tag in soup(["script", "style", "noscript"]):
            tag.decompose()
        text_content = soup.get_text(" ", strip=True)
    except Exception:
        # Fallback: naive tag removal
        title_match = re.search(r"<title>(.*?)</title>", html, flags=re.I | re.S)
        if title_match:
            title = re.sub(r"\s+", " ", title_match.group(1)).strip()
        # Remove scripts/styles
        html = re.sub(r"<script[\s\S]*?</script>", " ", html, flags=re.I)
        html = re.sub(r"<style[\s\S]*?</style>", " ", html, flags=re.I)
        # Strip tags
        text_only = re.sub(r"<[^>]+>", " ", html)
        text_content = re.sub(r"\s+", " ", text_only).strip()

    return title, text_content or ""



In [2]:
url = 'https://www.cnbc.com/2025/10/31/cre-companies-ai-goals.html'
fetch_page(url)

("Few CRE companies have achieved their AI goals. Here's why",
 'Few CRE companies have achieved their AI goals. Here\'s why Skip Navigation Markets Pre-Markets U.S. Markets Europe Markets China Markets Asia Markets World Markets Currencies Cryptocurrency Futures & Commodities Bonds Funds & ETFs Business Economy Finance Health & Science Media Real Estate Energy Climate Transportation Industrials Retail Wealth Sports Life Small Business Investing Personal Finance Fintech Financial Advisors Options Action ETF Street Buffett Archive Earnings Trader Talk Tech Cybersecurity AI Enterprise Internet Media Mobile Social Media CNBC Disruptor 50 Tech Guide Politics White House Policy Defense Congress Expanding Opportunity Europe Politics China Politics Asia Politics World Politics Video Latest Video Full Episodes Livestream Top Video Live Audio Europe TV Asia TV CNBC Podcasts CEO Interviews Digital Originals Watchlist Investing Club Trust Portfolio Analysis Trade Alerts Meeting Videos Homestretch

In [None]:
title = "Few CRE companies have achieved their AI goals. Here's why"
text_content = '''Few CRE companies have achieved their AI goals. Here\'s why Skip Navigation Markets Pre-Markets U.S. Markets Europe Markets China Markets Asia Markets World Markets Currencies Cryptocurrency Futures & Commodities Bonds Funds & ETFs Business Economy Finance Health & Science Media Real Estate Energy Climate Transportation Industrials Retail Wealth Sports Life Small Business Investing Personal Finance Fintech Financial Advisors Options Action ETF Street Buffett Archive Earnings Trader Talk Tech Cybersecurity AI Enterprise Internet Media Mobile Social Media CNBC Disruptor 50 Tech Guide Politics White House Policy Defense Congress Expanding Opportunity Europe Politics China Politics Asia Politics World Politics Video Latest Video Full Episodes Livestream Top Video Live Audio Europe TV Asia TV CNBC Podcasts CEO Interviews Digital Originals Watchlist Investing Club Trust Portfolio Analysis Trade Alerts Meeting Videos Homestretch Jim\'s Columns Education Subscribe PRO Pro News Josh Brown Mike Santoli Calls of the Day My Portfolio Livestream Full Episodes Stock Screener Market Forecast Options Investing Chart Investing Subscribe Livestream Menu Make It select USA INTL Livestream Search quotes, news & videos Livestream Watchlist SIGN IN Create free account Markets Business Investing Tech Politics Video Watchlist Investing Club PRO Livestream Menu Real Estate Housing Construction REITs Rising Risks Newsletter Sign-up CNBC Property Play Just 5% of CRE companies have achieved their AI goals. Here\'s why Published Fri, Oct 31 2025 8:00 AM EDT Updated Fri, Oct 31 2025 8:11 AM EDT Diana Olick @in/dianaolick @DianaOlickCNBC @DianaOlick WATCH LIVE Key Points Real estate companies are moving beyond initial testing and exploration of AI into more targeted applications that aim to redefine value, according to a new survey from JLL. JLL found that 88% of investors, owners and landlords said they have started piloting AI, with most pursuing an average of five use cases simultaneously. Just 5% of respondents said they have achieved all their program goals, while close to half said they have achieved two to three goals. Diminishing perspective of downtown London skyscrapers Chunyip Wong | Istock | Getty Images A version of this article first appeared in the CNBC Property Play newsletter with Diana Olick. Property Play covers new and evolving opportunities for the real estate investor, from individuals to venture capitalists, private equity funds, family offices, institutional investors and large public companies. Sign up to receive future editions, straight to your inbox. The commercial real estate market has been historically slow to modernize, and yet it appears to be accelerating its adoption of artificial intelligence. Companies are moving beyond initial testing and exploration into more targeted applications that aim to redefine value, according to a new survey from JLL. The survey of more than 1,500 senior CRE investor and occupier decision-makers across various industries found that, while still in the early stages, organizations are making AI a priority in their technology budgets. They are also moving from using it just for efficiency to focusing on how it can grow their businesses. JLL found that 88% of investors, owners and landlords said they have started piloting AI, with most pursuing an average of five use cases simultaneously. And more than 90% of occupiers are running corporate real estate AI pilots, according to the report. Compare that with just 5% starting AI pilots two years ago. The adoption is fast, but not entirely easy. Just 5% of respondents said they have achieved all their program goals, while close to half said they have achieved two to three goals. Much of the efforts are still experimental, without much growth. "If you think about commercial real estate, traditionally, it is not a quick technology adopter, and it\'s usually skeptical," said Yao Morin, chief technology officer at JLL. "So the high number of adoptions is actually quite surprising to me. What is not surprising on the flip side is that only 5% actually thinks that they have achieved all the goals. This is pretty aligned with a lot of other industries as well." Get Property Play directly to your inbox CNBC\'s Property Play with Diana Olick covers new and evolving opportunities for the real estate investor, delivered weekly to your inbox. Subscribe here to get access today . The reason they\'re not hitting their goals is because the goal line has moved. Companies have gone beyond just wanting to do certain tasks faster, or so-called operational efficiencies. Now they are tying AI to their revenue goals. For example, some are using it to help them improve their investment risk models, making investment and portfolio decisions based on the output of AI. That will require big changes to the fundamental way they operate. "When you really start moving towards the revenue side, the margin expansion side, then it\'s going to require a lot more than just using a technology," Morin explained. "You can\'t just say, \'Well, I\'m saving you 10% to do this particular thing.\' Companies need to actually rethink their operating model, to rethink how they organize to actually achieve the savings." And so companies are investing heavily in AI, despite economic headwinds. More than half of investors surveyed by JLL have been able to get significant budget growth over the past two years in the space. Their No. 1 spend is on strategic advisory on technology or AI, and most report their budgets have increased solely due to AI. After that, the spending goes to upgrading both cyber- and data-security measures and infrastructure for AI integration. Morin said what she found really surprising is that while most think companies will start using AI for simple tasks, or, low-risk, low-hanging fruit, that was not at all the case. "Our survey showed the opposite. We are getting to a point of sophistication, beyond this initial skeptical phase, where companies are really focusing on the competitive advantage to pressing business problems, using AI to solve instead of [just] those simple low-risk operations." More In CNBC Property Play Why global investment firm Nuveen is betting on this niche real estate subsector Diana Olick Commercial real estate is finally embracing blockchain. Here\'s what investors should know Diana Olick Major real estate developers are fast becoming power brokers Diana Olick Read More Subscribe to CNBC PRO Subscribe to Investing Club Licensing & Reprints CNBC Councils Supply Chain Values CNBC on Peacock Join the CNBC Panel Digital Products News Releases Closed Captioning Corrections About CNBC Internships Site Map Ad Choices Careers Help Contact News Tips Got a confidential news tip? We want to hear from you. Get In Touch CNBC Newsletters Sign up for free newsletters and get more CNBC delivered to your inbox Sign Up Now Get this delivered to your inbox, and more info about our products and services. Advertise With Us Please Contact Us Privacy Policy Your Privacy Choices CA Notice Terms of Service © 2025 CNBC LLC. All Rights Reserved. A Division of NBCUniversal Data is a real-time snapshot *Data is delayed at least 15 minutes.\n      Global Business and Financial News, Stock Quotes, and Market Data\n      and Analysis. Market Data Terms of Use and Disclaimers Data also provided by'''

In [11]:
text_content

'Few CRE companies have achieved their AI goals. Here\'s why Skip Navigation Markets Pre-Markets U.S. Markets Europe Markets China Markets Asia Markets World Markets Currencies Cryptocurrency Futures & Commodities Bonds Funds & ETFs Business Economy Finance Health & Science Media Real Estate Energy Climate Transportation Industrials Retail Wealth Sports Life Small Business Investing Personal Finance Fintech Financial Advisors Options Action ETF Street Buffett Archive Earnings Trader Talk Tech Cybersecurity AI Enterprise Internet Media Mobile Social Media CNBC Disruptor 50 Tech Guide Politics White House Policy Defense Congress Expanding Opportunity Europe Politics China Politics Asia Politics World Politics Video Latest Video Full Episodes Livestream Top Video Live Audio Europe TV Asia TV CNBC Podcasts CEO Interviews Digital Originals Watchlist Investing Club Trust Portfolio Analysis Trade Alerts Meeting Videos Homestretch Jim\'s Columns Education Subscribe PRO Pro News Josh Brown Mike

# Test flow

In [None]:
import os
import sys
import re
import logging
from typing import Tuple, Optional
import requests

import google.generativeai as genai
from pydantic_ai import Agent
from pydantic_ai.models.google import GoogleModel
from pydantic_ai.providers.google import GoogleProvider

# Create and configure logger
logging.basicConfig(filename="./logs/newfile.log",
                    format='%(asctime)s %(message)s',
                    filemode='w')
logger = logging.getLogger()

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from dotenv import load_dotenv
load_dotenv()

GEMINI_KEY = os.getenv('GOOGLE_API_KEY')
DEFAULT_HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/124.0 Safari/537.36"
    )
}

# Import pydant
provider = GoogleProvider(api_key=GEMINI_KEY)
model = GoogleModel('gemini-2.5-flash', provider=provider)

In [6]:
import re
import os
import logging
import asyncio
import httpx
from typing import Tuple, Optional

from pydantic_ai import Agent
from pydantic_ai.models.google import GoogleModel, GoogleProvider

from models import ClassificationResultFromText
from dotenv import load_dotenv
load_dotenv()

# Configure logger
logging.basicConfig(filename="./logs/newfile.log",
                    format='%(asctime)s %(message)s',
                    filemode='w')
logger = logging.getLogger(__name__)

GEMINI_KEY = os.getenv('GOOGLE_API_KEY')
# Default headers for HTTP requests
DEFAULT_HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
TIMEOUT = 30


class NewsAnalyzer:
    def __init__(self, gemini_key: str):
        self.provider = GoogleProvider(api_key=gemini_key)
        self.model = GoogleModel('gemini-2.5-flash-lite', provider=self.provider)

        # Create agent with system prompt and output type
        self.agent = Agent(
            self.model,
            output_type=ClassificationResultFromText,
            system_prompt="""You are a professional news analyst specializing in financial and business reporting.
                Interpret a given news article and produce a concise, structured analysis.

                TASKS
                1) Financial Relevance
                - Decide if the news is about finance/economy/markets. Output: Yes/No.

                2) Sector Classification
                - Name the primary sector (e.g., Technology, Banking, F&B, Heavy Industry, Manufacturing, Energy, Healthcare, etc.).

                3) Companies Mentioned (TARGET COMPANIES ONLY)
                - List ONLY operating companies materially involved or affected in the article’s CONTENT.
                - INCLUDE: named operating companies and subsidiaries explicitly referenced as actors or impacted parties.
                - EXCLUDE (unless they are themselves the main subject of the story):
                • Media (Reuters, CNBC, etc.)
                • Data/survey/benchmark providers (S&P Global, PMI compilers, Markit, etc.)
                • Government bodies, regulators, NGOs, think tanks
                • Stock indices/ETFs (S&P 500, MSCI, etc.)
                • Generic groups with no named firms (“Chinese automakers”)
                - Deduplicate; use canonical names (e.g., “Apple Inc.”).
                - If no target companies are present, output: None.

                (OPTIONAL) Tickers — only if present in the article text
                - If the article explicitly shows a ticker next to a company (examples: AAPL, TSLA.O, 0700.HK, 600519.SS, NYSE:T, 000001.SZ), append it in parentheses after the company name.
                Example: Apple Inc. (AAPL), Tencent Holdings Ltd. (0700.HK)
                - Do NOT infer tickers; never guess.

                (OPTIONAL) Country/Region Tags — only if present in the article text
                - If the article explicitly states a company’s country/region (e.g., “U.S. chipmaker…”, “Chinese EV maker…”), add a short tag in square brackets after the company name or ticker.
                Example: BYD Co. Ltd. (1211.HK) [China]
                - Do NOT infer geography if not stated.

                4) Sentiment Analysis
                - Classify overall tone: Positive / Negative / Neutral.
                - Provide a confidence score between 1.0 and 10.0 (10.0 = highest confidence).

                5) Summaries
                - English summary: 2–3 sentences.
                - Turkish summary: 2–3 sentences conveying the same meaning.

                OUTPUT FORMAT (keys only; fill values)
                - Financial Check: Yes/No
                - Sector: <sector name>
                - Companies Mentioned: <comma-separated list or "None">
                - Sentiment: <Positive|Negative|Neutral> 
                — Confidence: <x.x>/10.0
                - English Summary: <2–3 sentences>
                - Turkish Summary: <2–3 sentences>

                CLARIFICATIONS & EXAMPLES
                - If S&P Global is only cited as a PMI data source, do NOT list it under Companies Mentioned.
                - “Tesla cuts prices in China…” → Companies Mentioned: Tesla, Inc. (TSLA) [U.S.]  (only if TSLA is actually in the text; otherwise omit ticker)
                - “Chinese smartphone makers raised prices…” with no firm names → Companies Mentioned: None"""
        )

        # Test the agent with a simple query
        logger.info("✓ Pydantic AI Agent configured successfully")

    async def extract_url(self, url: str, timeout: float = TIMEOUT) -> Tuple[Optional[str], str]:
        """Fetches a URL and returns (title, text). Best-effort, no external services.

        - Uses httpx with a friendly UA and timeout.
        - Tries BeautifulSoup if available for better parsing; otherwise falls back to regex strip.
        """
        async with httpx.AsyncClient(headers=DEFAULT_HEADERS, timeout=timeout) as client:
            resp = await client.get(url)
            resp.raise_for_status()
            html = resp.text

        title = None
        text_content = None

        try:
            from bs4 import BeautifulSoup  # type: ignore

            soup = BeautifulSoup(html, "html.parser")
            title = soup.title.get_text(strip=True) if soup.title else None
            # Remove script/style
            for tag in soup(["script", "style", "noscript"]):
                tag.decompose()
            text_content = soup.get_text(" ", strip=True)
            logger.info(
                f"Get the contents of the news with length: {len(text_content)}")
        except Exception as e:
            # Fallback: naive tag removal
            title_match = re.search(
                r"<title>(.*?)</title>", html, flags=re.I | re.S)
            if title_match:
                title = re.sub(r"\s+", " ", title_match.group(1)).strip()
            # Remove scripts/styles
            html = re.sub(r"<script[\s\S]*?</script>", " ", html, flags=re.I)
            html = re.sub(r"<style[\s\S]*?</style>", " ", html, flags=re.I)
            # Strip tags
            text_only = re.sub(r"<[^>]+>", " ", html)
            text_content = re.sub(r"\s+", " ", text_only).strip()
            logger.error(f" Cannot get the contents from the url: {e}")

        return title, text_content or ""

    async def llm_analyzer(
        self, contents: str, title: str, timeout: float = TIMEOUT
    ) -> ClassificationResultFromText:
        """Analyze news content with LLM. Includes timeout protection for long inference."""
        user_message = f"- Title: {title}\n- Contents: {contents}"

        try:
            logger.info(f"Analyzing {len(contents)} chars of text via LLM")

            response = await asyncio.wait_for(self.agent.run(user_message), timeout=timeout)

            return ClassificationResultFromText(
                page_title=title,
                is_financial=response.output.is_financial,
                country=getattr(response.output, "country", None),
                sector=response.output.sector,
                companies=response.output.companies,
                confident_score=response.output.confident_score,
                sentiment=response.output.sentiment,
                summary_en=response.output.summary_en,
                summary_tr=response.output.summary_tr,
                extracted_characters=len(contents or ""),
            )

        except asyncio.TimeoutError:
            logger.error("LLM analysis timed out.")
            raise TimeoutError(
                f"LLM analysis exceeded timeout of {timeout} seconds.")
        except Exception as e:
            logger.exception(f"Error during LLM analysis: {e}")
            raise

    async def analyze_with_url(self, url: str, timeout=TIMEOUT) -> ClassificationResultFromText:
        """Complete analysis pipeline: extract URL content and analyze with LLM."""
        title, text = await self.extract_url(url)
        llm_output = await self.llm_analyzer(contents=text, title=title, timeout=TIMEOUT)

        return llm_output

    async def analyze_with_contents(self, text: str, title: str, timeout=TIMEOUT) -> ClassificationResultFromText:
        """Complete analysis pipeline: analyze with text and title"""
        llm_output = await self.llm_analyzer(contents=text, title=title)

        return llm_output

In [7]:
def get_analyzer() -> NewsAnalyzer:
    """Get or create the NewsAnalyzer singleton instance."""
    api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
    if not api_key:
        raise ValueError(
            "Missing API key. Set GOOGLE_API_KEY or GEMINI_API_KEY environment variable."
        )
    analyzer = NewsAnalyzer(gemini_key=api_key)
    return analyzer

In [8]:
analyzer = get_analyzer()

In [None]:
title = "Few CRE companies have achieved their AI goals. Here's why"
text_content = '''Few CRE companies have achieved their AI goals. Here\'s why Skip Navigation Markets Pre-Markets U.S. Markets Europe Markets China Markets Asia Markets World Markets Currencies Cryptocurrency Futures & Commodities Bonds Funds & ETFs Business Economy Finance Health & Science Media Real Estate Energy Climate Transportation Industrials Retail Wealth Sports Life Small Business Investing Personal Finance Fintech Financial Advisors Options Action ETF Street Buffett Archive Earnings Trader Talk Tech Cybersecurity AI Enterprise Internet Media Mobile Social Media CNBC Disruptor 50 Tech Guide Politics White House Policy Defense Congress Expanding Opportunity Europe Politics China Politics Asia Politics World Politics Video Latest Video Full Episodes Livestream Top Video Live Audio Europe TV Asia TV CNBC Podcasts CEO Interviews Digital Originals Watchlist Investing Club Trust Portfolio Analysis Trade Alerts Meeting Videos Homestretch Jim\'s Columns Education Subscribe PRO Pro News Josh Brown Mike Santoli Calls of the Day My Portfolio Livestream Full Episodes Stock Screener Market Forecast Options Investing Chart Investing Subscribe Livestream Menu Make It select USA INTL Livestream Search quotes, news & videos Livestream Watchlist SIGN IN Create free account Markets Business Investing Tech Politics Video Watchlist Investing Club PRO Livestream Menu Real Estate Housing Construction REITs Rising Risks Newsletter Sign-up CNBC Property Play Just 5% of CRE companies have achieved their AI goals. Here\'s why Published Fri, Oct 31 2025 8:00 AM EDT Updated Fri, Oct 31 2025 8:11 AM EDT Diana Olick @in/dianaolick @DianaOlickCNBC @DianaOlick WATCH LIVE Key Points Real estate companies are moving beyond initial testing and exploration of AI into more targeted applications that aim to redefine value, according to a new survey from JLL. JLL found that 88% of investors, owners and landlords said they have started piloting AI, with most pursuing an average of five use cases simultaneously. Just 5% of respondents said they have achieved all their program goals, while close to half said they have achieved two to three goals. Diminishing perspective of downtown London skyscrapers Chunyip Wong | Istock | Getty Images A version of this article first appeared in the CNBC Property Play newsletter with Diana Olick. Property Play covers new and evolving opportunities for the real estate investor, from individuals to venture capitalists, private equity funds, family offices, institutional investors and large public companies. Sign up to receive future editions, straight to your inbox. The commercial real estate market has been historically slow to modernize, and yet it appears to be accelerating its adoption of artificial intelligence. Companies are moving beyond initial testing and exploration into more targeted applications that aim to redefine value, according to a new survey from JLL. The survey of more than 1,500 senior CRE investor and occupier decision-makers across various industries found that, while still in the early stages, organizations are making AI a priority in their technology budgets. They are also moving from using it just for efficiency to focusing on how it can grow their businesses. JLL found that 88% of investors, owners and landlords said they have started piloting AI, with most pursuing an average of five use cases simultaneously. And more than 90% of occupiers are running corporate real estate AI pilots, according to the report. Compare that with just 5% starting AI pilots two years ago. The adoption is fast, but not entirely easy. Just 5% of respondents said they have achieved all their program goals, while close to half said they have achieved two to three goals. Much of the efforts are still experimental, without much growth. "If you think about commercial real estate, traditionally, it is not a quick technology adopter, and it\'s usually skeptical," said Yao Morin, chief technology officer at JLL. "So the high number of adoptions is actually quite surprising to me. What is not surprising on the flip side is that only 5% actually thinks that they have achieved all the goals. This is pretty aligned with a lot of other industries as well." Get Property Play directly to your inbox CNBC\'s Property Play with Diana Olick covers new and evolving opportunities for the real estate investor, delivered weekly to your inbox. Subscribe here to get access today . The reason they\'re not hitting their goals is because the goal line has moved. Companies have gone beyond just wanting to do certain tasks faster, or so-called operational efficiencies. Now they are tying AI to their revenue goals. For example, some are using it to help them improve their investment risk models, making investment and portfolio decisions based on the output of AI. That will require big changes to the fundamental way they operate. "When you really start moving towards the revenue side, the margin expansion side, then it\'s going to require a lot more than just using a technology," Morin explained. "You can\'t just say, \'Well, I\'m saving you 10% to do this particular thing.\' Companies need to actually rethink their operating model, to rethink how they organize to actually achieve the savings." And so companies are investing heavily in AI, despite economic headwinds. More than half of investors surveyed by JLL have been able to get significant budget growth over the past two years in the space. Their No. 1 spend is on strategic advisory on technology or AI, and most report their budgets have increased solely due to AI. After that, the spending goes to upgrading both cyber- and data-security measures and infrastructure for AI integration. Morin said what she found really surprising is that while most think companies will start using AI for simple tasks, or, low-risk, low-hanging fruit, that was not at all the case. "Our survey showed the opposite. We are getting to a point of sophistication, beyond this initial skeptical phase, where companies are really focusing on the competitive advantage to pressing business problems, using AI to solve instead of [just] those simple low-risk operations." More In CNBC Property Play Why global investment firm Nuveen is betting on this niche real estate subsector Diana Olick Commercial real estate is finally embracing blockchain. Here\'s what investors should know Diana Olick Major real estate developers are fast becoming power brokers Diana Olick Read More Subscribe to CNBC PRO Subscribe to Investing Club Licensing & Reprints CNBC Councils Supply Chain Values CNBC on Peacock Join the CNBC Panel Digital Products News Releases Closed Captioning Corrections About CNBC Internships Site Map Ad Choices Careers Help Contact News Tips Got a confidential news tip? We want to hear from you. Get In Touch CNBC Newsletters Sign up for free newsletters and get more CNBC delivered to your inbox Sign Up Now Get this delivered to your inbox, and more info about our products and services. Advertise With Us Please Contact Us Privacy Policy Your Privacy Choices CA Notice Terms of Service © 2025 CNBC LLC. All Rights Reserved. A Division of NBCUniversal Data is a real-time snapshot *Data is delayed at least 15 minutes.\n      Global Business and Financial News, Stock Quotes, and Market Data\n      and Analysis. Market Data Terms of Use and Disclaimers Data also provided by'''
test = await analyzer.analyze_with_url(text=text_content, title=title)

In [10]:
output = test.model_dump()
output

{'page_title': "Few CRE companies have achieved their AI goals. Here's why",
 'is_financial': 'Yes',
 'country': [],
 'sector': [],
 'companies': [],
 'confident_score': 9.5,
 'sentiment': 'Neutral',
 'summary_en': 'A recent JLL survey indicates that while commercial real estate (CRE) companies are increasingly adopting AI, with 88% piloting it for an average of five use cases, only 5% have fully achieved their AI goals. This is attributed to the moving goalposts, as companies now aim to tie AI to revenue and business growth rather than just operational efficiencies, requiring fundamental changes to operating models.',
 'summary_tr': 'Yeni bir JLL anketine göre, ticari gayrimenkul (TG) şirketleri giderek daha fazla YZ benimsemesine rağmen (ortalamada beş kullanım durumu için pilot uygulama yapanların %88',
 'extracted_characters': 7365}

In [11]:
def count_total_characters(data) -> int:
    """
    Recursively count total characters across all string values in a nested dict/list structure.
    Non-string fields (numbers, booleans, None) are ignored.

    Example:
        count_total_characters({"a": "Hello", "b": ["World", 123]})  # -> 10
    """
    total_chars = 0

    if isinstance(data, str):
        total_chars += len(data)

    elif isinstance(data, dict):
        for value in data.values():
            total_chars += count_total_characters(value)

    elif isinstance(data, list):
        for item in data:
            total_chars += count_total_characters(item)

    return total_chars


count_total_characters(output)

648

In [12]:
from cost_estimation import calc_text_cost_usd

system_prompt="""You are a professional news analyst specializing in financial and business reporting.
    Interpret a given news article and produce a concise, structured analysis.

    TASKS
    1) Financial Relevance
    - Decide if the news is about finance/economy/markets. Output: Yes/No.

    2) Sector Classification
    - Name the primary sector (e.g., Technology, Banking, F&B, Heavy Industry, Manufacturing, Energy, Healthcare, etc.).

    3) Companies Mentioned (TARGET COMPANIES ONLY)
    - List ONLY operating companies materially involved or affected in the article’s CONTENT.
    - INCLUDE: named operating companies and subsidiaries explicitly referenced as actors or impacted parties.
    - EXCLUDE (unless they are themselves the main subject of the story):
    • Media (Reuters, CNBC, etc.)
    • Data/survey/benchmark providers (S&P Global, PMI compilers, Markit, etc.)
    • Government bodies, regulators, NGOs, think tanks
    • Stock indices/ETFs (S&P 500, MSCI, etc.)
    • Generic groups with no named firms (“Chinese automakers”)
    - Deduplicate; use canonical names (e.g., “Apple Inc.”).
    - If no target companies are present, output: None.

    (OPTIONAL) Tickers — only if present in the article text
    - If the article explicitly shows a ticker next to a company (examples: AAPL, TSLA.O, 0700.HK, 600519.SS, NYSE:T, 000001.SZ), append it in parentheses after the company name.
    Example: Apple Inc. (AAPL), Tencent Holdings Ltd. (0700.HK)
    - Do NOT infer tickers; never guess.

    (OPTIONAL) Country/Region Tags — only if present in the article text
    - If the article explicitly states a company’s country/region (e.g., “U.S. chipmaker…”, “Chinese EV maker…”), add a short tag in square brackets after the company name or ticker.
    Example: BYD Co. Ltd. (1211.HK) [China]
    - Do NOT infer geography if not stated.

    4) Sentiment Analysis
    - Classify overall tone: Positive / Negative / Neutral.
    - Provide a confidence score between 1.0 and 10.0 (10.0 = highest confidence).

    5) Summaries
    - English summary: 2–3 sentences.
    - Turkish summary: 2–3 sentences conveying the same meaning.

    OUTPUT FORMAT (keys only; fill values)
    - Financial Check: Yes/No
    - Sector: <sector name>
    - Companies Mentioned: <comma-separated list or "None">
    - Sentiment: <Positive|Negative|Neutral> 
    — Confidence: <x.x>/10.0
    - English Summary: <2–3 sentences>
    - Turkish Summary: <2–3 sentences>

    CLARIFICATIONS & EXAMPLES
    - If S&P Global is only cited as a PMI data source, do NOT list it under Companies Mentioned.
    - “Tesla cuts prices in China…” → Companies Mentioned: Tesla, Inc. (TSLA) [U.S.]  (only if TSLA is actually in the text; otherwise omit ticker)
    - “Chinese smartphone makers raised prices…” with no firm names → Companies Mentioned: None"""

    # system_prompt: str,
    # user_content: str,
    # model_output_chars: int,
    # chars_per_token: float = 4.0,
    # pricing: Pricing = Pricing(**GEMINI_25_FLASH_RATES),

# plan: Plan,                 # "batch" (your use-case) or "standard"
# prompt_tokens: int,         # total input tokens for ONE request (system + title + content + etc.)
# output_tokens: int,         # output tokens for ONE request
# cache_write_tokens: int = 0,
# cache_storage_tokens: int = 0,
# cache_storage_hours: float = 0.0,
# grounded_search_prompts_paid: int = 0,  # after subtracting free quota

result = calc_text_cost_usd(plan="standard", 
                            prompt_tokens=len(system_prompt+title+text_content),
                            output_tokens=count_total_characters(output))

result

{'plan': 'standard',
 'input_tokens': 10249,
 'output_tokens': 1080,
 'costs': {'input_usd': 0.003075,
  'output_usd': 0.0027,
  'cache_write_usd': 0.0,
  'cache_storage_usd': 0.0,
  'grounding_search_usd': 0.0},
 'total_usd': 0.005775,
 'rates_per_1M': {'input': 0.3,
  'output': 2.5,
  'cache_write': 0.03,
  'cache_storage_per_hour': 1.0,
  'grounding_search_per_1000': 35.0}}

In [13]:
0.003075*10000

30.75