<a href="https://colab.research.google.com/github/22Himanshu/KPMG_-_PwC_article_resarch_automation/blob/main/AI_web_scraper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install selenium
!pip install webdriver-manager



# Kpmg article links

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, WebDriverException
from bs4 import BeautifulSoup
import time
import re
import tempfile
import os
import requests
from urllib.parse import urlparse, urljoin
import shutil
from concurrent.futures import ProcessPoolExecutor, as_completed

# Define the base pattern to find insights (or similar) links ending in .html, flexible on prefix
# This pattern gets candidates before applying the specific date logic
base_pattern = re.compile(r"^/.*?/insights.*\.html$")
domain = "https://kpmg.com"

# Define the specific date pattern for filtering
date_pattern_acceptable = re.compile(r"/2025/(03|04|05)/")

# Function to scrape insights for a single country code
def scrape_country_insights(country_code):
    """Scrapes the insights page for a given country code and returns potential links."""
    temp_profile = None
    driver = None
    country_potential_links = []

    try:
        # Each process needs its own temporary profile
        temp_profile = tempfile.mkdtemp()
        options = Options()
        options.add_argument("--headless")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-gpu")
        options.add_argument("--window-size=1920,1080")
        options.add_argument(f"--user-data-dir={temp_profile}")

        # Construct the URL
        url = f"{domain}/{country_code}/en/insights.html"
        print(f"Process for {country_code}: Visiting {url}")

        driver = webdriver.Chrome(options=options)
        driver.get(url)

        # Wait for initial load
        time.sleep(3)

        # Pagination
        print(f"Process for {country_code}: Starting pagination...")
        while True:
            try:
                time.sleep(4) # Wait between clicks
                next_button = driver.find_element(By.CSS_SELECTOR, 'button[data-analytics-title="pagination-next"]')
                driver.execute_script("arguments[0].click();", next_button)
            except NoSuchElementException:
                # print(f"Process for {country_code}: Next button not found. Pagination complete.")
                break
            except Exception as e:
                print(f"Process for {country_code}: An error during pagination: {e}")
                break # Exit loop on other errors

        # Parse page source
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # Collect links based on the base pattern
        print(f"Process for {country_code}: Collecting links...")
        for a in soup.find_all('a', href=True):
            href = a['href']
            if not href or not isinstance(href, str):
                 continue

            checked_href = None
            if href.startswith('/'):
                 checked_href = href
            elif href.startswith(domain):
                 checked_href = href[len(domain):]
            else:
                 continue # Skip external

            if checked_href and base_pattern.match(checked_href):
                 # Ensure the full URL is correctly formed
                if checked_href.startswith('/'):
                    full_url = domain + checked_href
                else:
                    full_url = urljoin(url, checked_href)

                country_potential_links.append(full_url)

        print(f"Process for {country_code}: Found {len(country_potential_links)} potential links.")

    except WebDriverException as e:
        print(f"Process for {country_code}: WebDriver error accessing {url}: {e}")
    except Exception as e:
        print(f"Process for {country_code}: An unexpected error accessing {url}: {e}")
    finally:
        if driver:
            driver.quit()
        if temp_profile:
            try:
                shutil.rmtree(temp_profile, ignore_errors=True)
            except Exception as e:
                 print(f"Process for {country_code}: Error cleaning up temp profile {temp_profile}: {e}")

    return country_potential_links # Return the list of links found by this process

# --- Main Execution ---
if __name__ == "__main__": # Essential for multiprocessing on some platforms
    # Define a list of potential country codes
    country_codes = [
        "in",  "ae", "qa", "us", "gb", "de", "dp", "ar", "au", "at", "be", "br", "ca",
        "cn", "co", "cr", "dk", "fi", "hk", "hu", "ie", "il", "it", "jp", "kz", "lu", "my",
        "mx", "nl", "nz", "no", "ph", "ru", "sg", "za", "kr", "es", "se", "ch", "tw",
        "th", "ua", "uk",
        "pk", "pl", "sa", "cz", "cl"
        # Add more codes here if needed
    ]

    all_potential_links = set()

    print(f"Starting parallel scraping for {len(country_codes)} countries...")

    # Use ProcessPoolExecutor to run scraping tasks in parallel
    # max_workers controls how many processes run simultaneously
    # Adjust max_workers based on your system's capabilities (CPU cores, RAM)
    # Start with a moderate number, e.g., 4 or 8.
    with ProcessPoolExecutor(max_workers=8) as executor:
        # Submit tasks to the executor and store future objects
        future_to_country = {executor.submit(scrape_country_insights, code): code for code in country_codes}

        # Process results as they complete
        for future in as_completed(future_to_country):
            country_code = future_to_country[future]
            try:
                potential_links_from_country = future.result()
                print(f"Finished process for {country_code}. Received {len(potential_links_from_country)} links.")
                all_potential_links.update(potential_links_from_country) # Add to the global set
            except Exception as exc:
                print(f"Process for {country_code} generated an exception: {exc}")

    print(f"\n--- Finished collecting potential links from all countries ---")
    print(f"Found a total of {len(all_potential_links)} unique potential links matching the base pattern.")

    # --- Apply the specific date filtering logic ---
    final_article_links = []
    print("\n--- Filtering links based on date condition ---")
    for url in all_potential_links:
        parsed_url = urlparse(url)
        path = parsed_url.path

        if "/202" in path:
            # If it contains "/202", check if it matches the acceptable date pattern
            if date_pattern_acceptable.search(path):
                final_article_links.append(url)
            # Else, it contains /202 but not the right date, so it's excluded
        else:
            # If it does NOT contain "/202" at all, it's acceptable
            final_article_links.append(url)

    print(f"✅ Found a total of {len(final_article_links)} unique article links after applying date filter.\n")

Starting parallel scraping for 49 countries...
Process for qa: Visiting https://kpmg.com/qa/en/insights.htmlProcess for us: Visiting https://kpmg.com/us/en/insights.htmlProcess for gb: Visiting https://kpmg.com/gb/en/insights.htmlProcess for de: Visiting https://kpmg.com/de/en/insights.htmlProcess for ae: Visiting https://kpmg.com/ae/en/insights.html
Process for in: Visiting https://kpmg.com/in/en/insights.html



Process for ar: Visiting https://kpmg.com/ar/en/insights.html
Process for dp: Visiting https://kpmg.com/dp/en/insights.html

Process for in: Starting pagination...
Process for in: Collecting links...
Process for in: Found 45 potential links.
Process for ar: Starting pagination...
Process for gb: Starting pagination...
Process for au: Visiting https://kpmg.com/au/en/insights.html
Finished process for in. Received 45 links.
Process for dp: Starting pagination...
Process for qa: Starting pagination...
Process for ar: Collecting links...
Process for ar: Found 9 potential links.
P

In [3]:
final_article_links

['https://kpmg.com/cn/en/home/insights.html',
 'https://kpmg.com/ae/en/home/insights/2025/04/you-can-with-ai.html',
 'https://kpmg.com/in/en/insights/artificial-intelligence.html',
 'https://kpmg.com/za/en/home/insights/2025/03/german-southern-african-business-outlook-2025.html',
 'https://kpmg.com/in/en/insights/energy-and-renewables.html',
 'https://kpmg.com/ch/en/insights/esg-sustainability/eudr-deforestation.html',
 'https://kpmg.com/uk/en/insights/data.html',
 'https://kpmg.com/th/en/home/insights/2025/04/venture-pulse.html',
 'https://kpmg.com/in/en/insights/workforce.html',
 'https://kpmg.com/us/en/insights-by-topic/advisory.html',
 'https://kpmg.com/pt/pt/home/insights/2025/03/principais-riscos-geopoliticos-2025.html',
 'https://kpmg.com/be/en/home/insights.html',
 'https://kpmg.com/in/en/insights/transformation.html',
 'https://kpmg.com/pt/pt/home/insights/2025/03/transformacao-dos-seguros.html',
 'https://kpmg.com/th/en/home/insights/2016/03/th-gjp-newsletter.html',
 'https:/

# PWC urls

In [4]:
import re
import requests
from urllib.parse import urljoin # Import urljoin for handling relative URLs

def extract_urls_from_page(url: str) -> list[str]:
    """
    Fetches content from a single URL (via Jina) and extracts specific HTML URLs.

    Args:
        url (str): The URL of the webpage to fetch (pre-pended with Jina reader URL).

    Returns:
        list[str]: A list of extracted unique URLs.
    """
    print(f"Attempting to extract from: {url}")
    extracted_urls = []
    try:
        # Fetch the content of the page using requests
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes
        text_string = response.text

        # --- Regex Pattern ---
        # This pattern looks for URLs ending in .html that are likely article/content pages.
        # It's based on the pattern observed on the gx/en site.
        # You might need to adjust this regex if URL patterns differ significantly
        # on other country sites that *do* resolve correctly.
        pattern = re.compile(r"https://[^\\s]*/(?:issues|services|trends)/[^\\s]*?\.html")

        # Find all occurrences of the pattern in the text
        found_urls = pattern.findall(text_string)

        # Add found URLs to the list
        extracted_urls.extend(found_urls)

        return extracted_urls

    except requests.exceptions.RequestException as e:
        print(f"Error fetching the URL {url}: {e}")
        return []
    except Exception as e:
        print(f"An error occurred during processing {url}: {e}")
        return []

# --- List of country codes to use ---
country_codes = [
    "gx", "dk", "ie", "nz", "au", "jp"
    # Add more codes here if needed
]

# Base URL structure to modify
# We'll replace '{country_code}' with each code from the list
base_pwc_url_template = "https://www.pwc.com/{country_code}/en/research-insights/insights-library.html"
jina_reader_prefix = "https://r.jina.ai/"

# --- Construct the list of URLs to scrape ---
pwc_insights_urls = []
for code in country_codes:
    # Construct the PwC URL for the specific country code (keeping '/en/' and the rest of the path)
    country_url = base_pwc_url_template.format(country_code=code)
    # Prepend the Jina reader URL
    full_url_to_fetch = jina_reader_prefix + country_url
    pwc_insights_urls.append(full_url_to_fetch)

# --- Important Caveat ---
# Most of the URLs generated this way are likely to result in errors (e.g., 404 Not Found)
# because PwC's actual website structure for different countries varies greatly
# and the path '/en/research-insights/insights-library.html' is specific to the
# global or a few regional sites, not universal.
# The script will attempt to fetch each, and the 'Error fetching' message will appear
# for those that don't resolve correctly.

# Collect URLs from all specified pages
all_html_urls = []
for url in pwc_insights_urls:
    urls_from_page = extract_urls_from_page(url)
    all_html_urls.extend(urls_from_page)

# Use a set to store unique URLs and automatically remove duplicates across all pages
unique_urls_set = set(all_html_urls)

# Convert the set back into a list
unique_urls_list = list(unique_urls_set)

print("\nExtracted Unique HTML URLs from all specified pages:")
if unique_urls_list:
    for url in unique_urls_list:
        print(url)
    print(f"\nTotal unique URLs found: {len(unique_urls_list)}")
else:
    print("No URLs extracted from the specified pages using the constructed URLs.")

# Note: This script only extracts the URLs ending in .html based on the regex.
# It does NOT extract the publication dates associated with these URLs,
# as the regex method only finds URL patterns in the text content provided by Jina,
# not the specific HTML structure linking URLs and dates.
# To get dates, you would need a different approach (like BeautifulSoup) for each working URL,
# and that would require identifying date elements on each country's page structure,
# which we found challenging previously due to varying HTML.

Attempting to extract from: https://r.jina.ai/https://www.pwc.com/gx/en/research-insights/insights-library.html
Attempting to extract from: https://r.jina.ai/https://www.pwc.com/dk/en/research-insights/insights-library.html
Attempting to extract from: https://r.jina.ai/https://www.pwc.com/ie/en/research-insights/insights-library.html
Attempting to extract from: https://r.jina.ai/https://www.pwc.com/nz/en/research-insights/insights-library.html
Attempting to extract from: https://r.jina.ai/https://www.pwc.com/au/en/research-insights/insights-library.html
Attempting to extract from: https://r.jina.ai/https://www.pwc.com/jp/en/research-insights/insights-library.html

Extracted Unique HTML URLs from all specified pages:
https://www.pwc.dk/da/services/legal.html
https://www.pwc.ie/services/tax.html
https://www.pwc.com.au/services/artificial-intelligence.html
https://www.pwc.ie/services/workforce/gender-pay-gap-reporting.html
https://www.pwc.com/gx/en/issues/reinventing-the-future/take-on-to

In [None]:
final_article_links.extend(unique_urls_list)

In [8]:
!pip install langchain_google_genai

Collecting langchain_google_genai
  Downloading langchain_google_genai-2.1.3-py3-none-any.whl.metadata (4.7 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain_google_genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting google-ai-generativelanguage<0.7.0,>=0.6.16 (from langchain_google_genai)
  Downloading google_ai_generativelanguage-0.6.17-py3-none-any.whl.metadata (9.8 kB)
Downloading langchain_google_genai-2.1.3-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Downloading google_ai_generativelanguage-0.6.17-py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: filetype, google-ai-generativelanguage, langchain_google_genai
  Attempting uninstall: google-ai-generativelangu

In [None]:
import requests
import json
import os

from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata  # Keep this if you are in Colab

os.environ['GOOGLE_API_KEY'] = userdata.get('GOOGLE_API_KEY') if 'google.colab' in str(__import__('sys').modules) else os.environ.get('GOOGLE_API_KEY')

def scrape_url(url):
    """Fetches the content of a single URL."""
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return None

def extract_info_with_llm(formatted_text: str, llm_model, custom_prompt: str = None):
    if not llm_model:
        print("LLM model not initialized. Cannot perform extraction.")
        return None

    output_parser = JsonOutputParser()

    if custom_prompt:
        prompt_template_string = custom_prompt
    else:
        prompt_template_string = """
        You are an expert article extractor. Your task is to extract key information from text data formatted by Jina AI Reader.

        The text has some information removed and formatted by Jina AI Reader.You need to extract the following information from the text:
        1. check if the text is an article and is in english. If it is not an article or is not in english, return an empty JSON object.
        2. check the date published. If the date published is given and is before March 2025, return an empty JSON object.
        3. If date is not given, proceed with extraction.
        4. If date is given and is March 2025 or later, proceed with extraction.

        If it is an article, extract the following information:
        Extract:
        1.  **Article URL:** The main web address for this specific article.
        2.  **Date Published:** The publication date of the article.
        3.  **Published Content:** The full main body text of the article. Exclude any navigation menus, links to other articles, any link that opens in new tab, email address,Disclaimers, headers, footers, sidebars, comments sections, unrelated promotional content or anything unaffiliated from the main content. Focus only on the core article content. You can concatinate 2 or more lines if they seem to be connected to each other
        "article_url": string
        "date_published": string (Use YYYY-MM-DD format if clearly available, otherwise use the format seen)
        "published_content": string (The clean, full text of the article)

        If any piece of information is not found in the text, use the value "N/A" for that key.
        You can remove \n and other special characters if they are too much and don't affect the meaning of the text.
        ---
        Formatted Text:
        {formatted_text}
        ---

        {format_instructions}
        """

    prompt = PromptTemplate(
        template=prompt_template_string,
        input_variables=["formatted_text"],
        partial_variables={"format_instructions": output_parser.get_format_instructions()},
    )

    chain = prompt | llm_model | output_parser

    try:
        print("Calling LLM for extraction...")
        output = chain.invoke({"formatted_text": formatted_text})
        return output
    except Exception as e:
        print(f"Error during LLM extraction: {e}")
        print("This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.")
        return None

def save_to_json(data_list, filename="extracted_articles.json"):
    """Saves a list of data dictionaries to a JSON file."""
    try:
        with open(filename, "w", encoding="utf-8") as f:
            json.dump(data_list, f, ensure_ascii=False, indent=4)
        print(f"\nData successfully saved to {filename}")
    except IOError as e:
        print(f"\nError saving data to JSON file: {e}")

# --- Main Execution ---
if __name__ == "__main__":
    # --- Replace with your list of URLs ---
    list_of_urls = final_article_links
    # --- End of list of URLs ---

    llm = None
    google_api_key = os.getenv("GOOGLE_API_KEY")

    if google_api_key:
        print("GOOGLE_API_KEY environment variable found.")
        try:m your 1-month free trial of Premium Career.

Cancel anytime before May 23, 2025. We’ll send you a reminder 7 days before your free trial ends.
            llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=google_api_key, temperature=0)
            print("Google Gemini 2.0 Flash LLM initialized successfully.")
        except Exception as e:
            print(f"Error initializing Google Gemini LLM with the found API key: {e}")
            print("Please double-check your API key and that it has access to the 'gemini-2.0-flash' model.")
    else:
        print("Warning: GOOGLE_API_KEY environment variable not set.")
        print("Please set your Google API key in your environment variables.")
        print("If you are in Google Colab, use the Secrets feature on the left sidebar.")

    extracted_data_list = []
    if llm:
        for url in list_of_urls:
            print(f"\nProcessing URL: {url}")
            scraped_text = scrape_url(url)
            if scraped_text:
                extracted_info = extract_info_with_llm(scraped_text, llm)
                if extracted_info:
                    extracted_info['article_url'] = url  # Ensure URL is included in the output
                    extracted_data_list.append(extracted_info)
            else:
                print(f"Skipping LLM extraction for {url} due to failed scraping.")

        if extracted_data_list:
            save_to_json(extracted_data_list)
        else:
            print("No data was extracted.")

GOOGLE_API_KEY environment variable found.
Google Gemini 2.0 Flash LLM initialized successfully.

Processing URL: https://kpmg.com/cn/en/home/insights.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/ae/en/home/insights/2025/04/you-can-with-ai.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/in/en/insights/artificial-intelligence.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/za/en/home/insights/2025/03/german-southern-african-business-outlook-2025.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/in/en/insights/energy-and-renewables.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/ch/en/insights/esg-sustainability/eudr-deforestation.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/uk/en/insights/data.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/th/en/home/insights/2025/04/venture-pulse.html
Calling LLM for extraction...

Processing URL: https




Processing URL: https://kpmg.com/us/en/insights-by-topic/advisory.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/pt/pt/home/insights/2025/03/principais-riscos-geopoliticos-2025.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/be/en/home/insights.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/in/en/insights/transformation.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 4
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 1
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/pt/pt/home/insights/2025/03/transformacao-dos-seguros.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 58
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 56
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/th/en/home/insights/2016/03/th-gjp-newsletter.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 55
}
].



Processing URL: https://kpmg.com/za/en/home/insights/2025/03/namibia-budget-highlights2025.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/my/en/home/insights/2025/03/tax-developments.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/ua/en/home/insights/2025/03/your-business-in-ukraine.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/pt/pt/home/insights/2025/03/pulse-of-fintech.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/sg/en/home/insights/2025/03/fpi-report.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/se/en/insights/ai/intelligent-banking.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/th/en/home/insights.html
Calling LLM for extraction...





Processing URL: https://kpmg.com/za/en/home/insights/2025/04/you-can-with-ai.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/uk/en/insights/finance/frs-102-amendments.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/in/en/insights/2025/04/global-mobility-new-world-be-brave.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/us/en/insights-by-topic/tax.html
Calling LLM for extraction...





Processing URL: https://kpmg.com/pt/pt/home/insights/2025/03/kpmg-global-tech-report-healthcare-insights.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/de/en/home/insights/overview/esg.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/se/en/insights/business-transformation/navigating-the-forces-of-change-in-tax.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/th/en/home/insights/2025/04/th-tax-news-flash-issue-153.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/hu/en/home/insights.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/uk/en/insights/operations.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/cz/en/home/insights.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/in/en/insights/2025/04/you-can-with-ai.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/in/en/insights/global-capability-centres.html
Calling LLM for extracti

  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 1
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 59
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/ch/en/insights/transformation.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 57
}
].



Processing URL: https://kpmg.com/us/en/insights-by-industry/insights-alternative-investments.html
Calling LLM for extraction...





Processing URL: https://kpmg.com/nz/en/home/insights/2025/03/fips-banks-2024.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/cn/en/home/insights/2025/04/china-hk-ipo-2025-q1-review.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/za/en/home/insights/2025/04/global-tech-innovator-2025.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/uk/en/insights/technology/accelerating-with-intent.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/nl/en/home/insights.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/in/en/insights/2025/04/venture-pulse.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/ch/en/insights/executives-board-members.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/uk/en/insights/sustainability/perspectives-on-the-energy-transition.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/uk/en/insights/ai.html
Calling LLM for extr

  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 58
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/ch/en/insights/transformation/financial-consolidation-modernization-solutions.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 56
}
].



Processing URL: https://kpmg.com/in/en/insights/trade-and-tariffs.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/de/en/home/insights/overview/digital-transformation.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/qa/en/home/insights/2025/03/gcc-listed-banks-results-report-fy24.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/nz/en/home/insights.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/cn/en/home/insights/2018/02/china-economic-monitor.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/in/en/insights/2025/04/protecting-your-sap-landscape.html
Calling LLM for extraction...





Processing URL: https://kpmg.com/za/en/home/insights/2025/03/family-office-and-private-client-brochure.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/za/en/home/insights/2025/03/from-smart-to-smarter-cities.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/au/en/home/insights/2025/04/us-trade-tariffs.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/jp/en/home/insights/2016/05/tax.html
Calling LLM for extraction...





Processing URL: https://kpmg.com/jp/en/home/insights.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/uk/en/insights/tax/it-is-time-for-tax-teams-to-get-prompting.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 10
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 7
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/kz/en/home/insights/2025/04/navigating-the-forces-of-change-in-tax.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 6
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 4
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/qa/en/home/insights.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 1
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 59
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/se/en/insights/ai/global-tech-report-financial-services.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/au/en/home/insights/202




Processing URL: https://kpmg.com/pt/pt/home/insights.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 13
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 11
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/ch/en/insights.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 10
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 8
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/my/en/home/insights/2025/04/reciprocal-tariffs-announced-by-us-on-2-april-2025.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 7
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 5
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/th/en/home/insights/2025/03/top-risks-forecast-2025.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 4
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 1
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/in/en/insights/2025/04/management-capability-development-index-mcdi-india-2024-report.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 59
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 56
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/xx/en/home/insights.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 56
}
].



Processing URL: https://kpmg.com/us/en/insights-by-topic/audit.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/my/en/home/insights/2025/03/cybersecurity-considerations-2025.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/uk/en/insights/culture.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/in/en/insights/cyber-security.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/ch/en/insights/law.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/se/en/insights.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/us/en/insights-by-topic/technology.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/uk/en/insights/fintech.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/pt/pt/home/insights/2025/03/modernizar-os-pagamentos.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/ch/en/insights/cybersecurity-risk.html
Calling LLM for 

  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 58
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/us/en/insights-by-industry/insights-asset-management.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 57
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 55
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/ch/en/insights/deals/mergers-acquisitions-market-trends.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/us/en/insights-by-topic




Processing URL: https://kpmg.com/th/en/home/insights/2025/04/intelligent-tech-enterprise.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/cn/en/home/insights/2019/11/risk-management-and-regulation-newsletter.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/xx/en/home/insights/2018/05/ceo-outlook.html
Calling LLM for extraction...





Processing URL: https://kpmg.com/nz/en/home/insights/2025/04/top-geopolitical-risks-2025.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/se/en/insights/regulations.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/us/en/insights-by-industry/insights-energy-chemicals.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/uk/en/insights/regulatory/embracing-a-culture-of-controls-for-a-resilient-future.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/ae/en/home/insights/2025/04/dubai-hospitality-report.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/br/en/home/insights.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 5
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 2
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/cn/en/home/insights/2025/03/intelligent-retail.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 2
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 15
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 58
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/ca/en/home/insights/2025/03/future-of-corporate-services.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/ch/en/insights/esg-sustainability.ht

  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 9
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 6
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/ch/en/insights/technology.html
Calling LLM for extraction...
Error during LLM extraction: Invalid json output: 
For troubleshooting, visit: https://p

  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
}
].


Error during LLM extraction: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 58
}
]
This could be due to API issues, incorrect model name, or prompt-related problems after successful initialization.

Processing URL: https://kpmg.com/de/en/home/insights/overview/artificial-intelligence.html
Calling LLM for extraction...


  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.0-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 1000000
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 57
}
].



Processing URL: https://kpmg.com/ph/en/home/insights.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/tw/en/home/insights/2016/07/social-enterprise-service.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/dk/en/home/insights.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/ph/en/home/insights/tax-calendar-app.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/uk/en/insights/legal.html
Calling LLM for extraction...

Processing URL: https://kpmg.com/ie/en/home/insights/2025/04/us-tariffs-eu-trade-tax.html
Calling LLM for extraction...
