Tikr market 

In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import time
import sys
import warnings
import os
import xlwings as xw

# Set up Excel workbook and worksheet
book = xw.Book('Richspread.xlsx')
sht = book.sheets['MarketSnapshot']

# Suppress warnings and logging
warnings.filterwarnings("ignore")
os.environ['WDM_LOG_LEVEL'] = '0'

# Define where each table should go in the Excel sheet
table_locations = {
    "Most Active": "B2",
    "Gainers": "G2",
    "Losers": "L2",
    "Sectors": "N16",
    "Regions": "B16",
    "Assets": "F16",
    "Factors": "J16"
}

def scrape_tikr_data():
    # Configure Chrome to run in headless mode with various optimizations
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")
    chrome_options.add_argument("--disable-logging")
    chrome_options.add_argument("--log-level=3")
    chrome_options.add_argument("--silent")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    
    # Redirect stderr to suppress selenium noise
    original_stderr = sys.stderr
    sys.stderr = open(os.devnull, 'w')
    
    driver = None
    try:
        print("Initializing Chrome driver...")
        driver = webdriver.Chrome(options=chrome_options)
        driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
        
        print("Navigating to TIKR login page...")
        driver.get('https://app.tikr.com/markets?fid=1&ref=1p8x1t')
        wait = WebDriverWait(driver, 15)
        
        # Login process - replace with your credentials
        print("Entering login credentials...")
        email_field = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[@placeholder='Enter your email']")))
        driver.execute_script("arguments[0].value = 'YOUR_EMAIL_HERE';", email_field)
        driver.execute_script("arguments[0].dispatchEvent(new Event('input', { bubbles: true }));", email_field)
        
        password_field = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[@type='password']")))
        driver.execute_script("arguments[0].value = 'YOUR_PASSWORD_HERE';", password_field)
        driver.execute_script("arguments[0].dispatchEvent(new Event('input', { bubbles: true }));", password_field)
        
        time.sleep(1)
        
        print("Submitting login form...")
        try:
            password_field.send_keys(Keys.ENTER)
            time.sleep(3)
        except Exception as e:
            print(f"Login submission failed: {e}")
        
        # Give the page time to fully load after login
        print("Waiting for markets page to load...")
        time.sleep(5)
        
        # Dictionary to store all scraped data
        all_results = {}
        
        # Process the top navigation tabs (Most Active, Gainers, Losers)
        print("\n" + "="*30)
        print("Processing Most Active tab...")
        print("="*30)
        
        # Click on the Most Active tab
        tab_clicked = driver.execute_script("""
            var topTabs = document.querySelectorAll('.tab, [role="tab"], li');
            for (var i = 0; i < topTabs.length; i++) {
                if (topTabs[i].textContent.trim() === "Most Active") {
                    topTabs[i].click();
                    return true;
                }
            }
            return false;
        """)
        
        if not tab_clicked:
            print("Could not find or click Most Active tab")
        
        time.sleep(2)  # Wait for content to load
        
        # Extract stock data from the table
        stocks_data = driver.execute_script("""
            var data = [];
            var table = document.querySelector('table');
            if (table) {
                var rows = table.querySelectorAll('tr');
                for (var i = 1; i < rows.length; i++) {  // Skip header row
                    var cells = rows[i].querySelectorAll('td');
                    if (cells.length >= 4) {
                        data.push({
                            ticker: cells[0].textContent.trim(),
                            lastPrice: cells[1].textContent.trim(),
                            change: cells[2].textContent.trim(),
                            percentChange: cells[3].textContent.trim()
                        });
                    }
                }
            }
            return data;
        """)
        
        if stocks_data:
            print("\nMost Active Table Output:\n")
            print(f"{'Ticker':<8} | {'Last Price':<12} | {'Change':<10} | {'% Change':<10}")
            print("-" * 50)
            for stock in stocks_data:
                print(f"{stock['ticker']:<8} | {stock['lastPrice']:<12} | {stock['change']:<10} | {stock['percentChange']:<10}")
            all_results["Most Active"] = stocks_data
        else:
            print("No data found for Most Active")
            all_results["Most Active"] = []
        
        # Process Gainers tab
        print("\n" + "="*30)
        print("Processing Gainers tab...")
        print("="*30)
        
        tab_clicked = driver.execute_script("""
            var topTabs = document.querySelectorAll('.tab, [role="tab"], li');
            for (var i = 0; i < topTabs.length; i++) {
                if (topTabs[i].textContent.trim() === "Gainers") {
                    topTabs[i].click();
                    return true;
                }
            }
            return false;
        """)
        
        if not tab_clicked:
            print("Could not find or click Gainers tab")
        
        time.sleep(2)
        
        stocks_data = driver.execute_script("""
            var data = [];
            var table = document.querySelector('table');
            if (table) {
                var rows = table.querySelectorAll('tr');
                for (var i = 1; i < rows.length; i++) {
                    var cells = rows[i].querySelectorAll('td');
                    if (cells.length >= 4) {
                        data.push({
                            ticker: cells[0].textContent.trim(),
                            lastPrice: cells[1].textContent.trim(),
                            change: cells[2].textContent.trim(),
                            percentChange: cells[3].textContent.trim()
                        });
                    }
                }
            }
            return data;
        """)
        
        if stocks_data:
            print("\nGainers Table Output:\n")
            print(f"{'Ticker':<8} | {'Last Price':<12} | {'Change':<10} | {'% Change':<10}")
            print("-" * 50)
            for stock in stocks_data:
                print(f"{stock['ticker']:<8} | {stock['lastPrice']:<12} | {stock['change']:<10} | {stock['percentChange']:<10}")
            all_results["Gainers"] = stocks_data
        else:
            print("No data found for Gainers")
            all_results["Gainers"] = []
        
        # Process Losers tab
        print("\n" + "="*30)
        print("Processing Losers tab...")
        print("="*30)
        
        tab_clicked = driver.execute_script("""
            var topTabs = document.querySelectorAll('.tab, [role="tab"], li');
            for (var i = 0; i < topTabs.length; i++) {
                if (topTabs[i].textContent.trim() === "Losers") {
                    topTabs[i].click();
                    return true;
                }
            }
            return false;
        """)
        
        if not tab_clicked:
            print("Could not find or click Losers tab")
        
        time.sleep(2)
        
        stocks_data = driver.execute_script("""
            var data = [];
            var table = document.querySelector('table');
            if (table) {
                var rows = table.querySelectorAll('tr');
                for (var i = 1; i < rows.length; i++) {
                    var cells = rows[i].querySelectorAll('td');
                    if (cells.length >= 4) {
                        data.push({
                            ticker: cells[0].textContent.trim(),
                            lastPrice: cells[1].textContent.trim(),
                            change: cells[2].textContent.trim(),
                            percentChange: cells[3].textContent.trim()
                        });
                    }
                }
            }
            return data;
        """)
        
        if stocks_data:
            print("\nLosers Table Output:\n")
            print(f"{'Ticker':<8} | {'Last Price':<12} | {'Change':<10} | {'% Change':<10}")
            print("-" * 50)
            for stock in stocks_data:
                print(f"{stock['ticker']:<8} | {stock['lastPrice']:<12} | {stock['change']:<10} | {stock['percentChange']:<10}")
            all_results["Losers"] = stocks_data
        else:
            print("No data found for Losers")
            all_results["Losers"] = []
        
        # Process the bottom navigation tabs (Sectors, Regions, Assets, Factors)
        # Define expected content for each tab to help with validation
        bottom_tabs = [
            {
                "name": "Sectors", 
                "expected_items": ["Technology", "Health Care", "Financials", "Energy", "Real Estate", 
                                 "Consumer Discretionary", "Consumer Staples", "Industrials", "Materials", 
                                 "Utilities", "Communication Services"]
            },
            {
                "name": "Regions", 
                "expected_items": ["Developed ex-US", "Emerging Markets", "Canada", "United Kingdom", 
                                 "China", "Japan", "Germany", "Brazil", "India", "Australia"]
            },
            {
                "name": "Assets", 
                "expected_items": ["Commodities", "Gold", "Silver", "Oil", "Natural Gas", 
                                 "U.S. Treasuries", "Municipals", "TIPS", "EM Govt Bonds"]
            },
            {
                "name": "Factors", 
                "expected_items": ["Value", "Growth", "Momentum", "Quality", "Size", "Dividend", 
                                 "ESG", "High Dividend", "International Value"]
            }
        ]
        
        # Process each bottom tab
        for tab_info in bottom_tabs:
            tab_name = tab_info["name"]
            expected_items = tab_info["expected_items"]
            
            print(f"\n{'='*30}")
            print(f"Processing {tab_name} tab...")
            print(f"{'='*30}")
            
            # Click on the tab
            driver.execute_script("""
                var tabContainers = document.querySelectorAll('[role="tablist"], .tabs, .tab-container, .nav');
                var bottomTabContainer = null;
                for (var i = 0; i < tabContainers.length; i++) {
                    var text = tabContainers[i].textContent;
                    if (text.includes('Sectors') && text.includes('Regions') && text.includes('Assets')) {
                        bottomTabContainer = tabContainers[i];
                        break;
                    }
                }
                if (bottomTabContainer) {
                    var tabs = bottomTabContainer.querySelectorAll('[role="tab"], .tab, li');
                    for (var i = 0; i < tabs.length; i++) {
                        if (tabs[i].textContent.trim() === arguments[0]) {
                            tabs[i].click();
                            return true;
                        }
                    }
                }
                return false;
            """, tab_name)
            
            print(f"Clicked on {tab_name} tab")
            time.sleep(3)
            
            # Extract data from the table and validate it
            data = driver.execute_script("""
                var data = [];
                var expectedItems = arguments[1];
                var tables = document.querySelectorAll('table');
                var targetTable = null;
                
                // Find the correct table by checking for expected content
                for (var t = 0; t < tables.length; t++) {
                    var table = tables[t];
                    var tableText = table.textContent;
                    var matchCount = 0;
                    
                    for (var e = 0; e < expectedItems.length; e++) {
                        if (tableText.includes(expectedItems[e])) {
                            matchCount++;
                        }
                    }
                    
                    if (matchCount >= 2) {
                        targetTable = table;
                        break;
                    }
                }
                
                if (targetTable) {
                    var rows = targetTable.querySelectorAll('tr');
                    for (var i = 1; i < rows.length; i++) {
                        var cells = rows[i].querySelectorAll('td');
                        if (cells.length >= 3) {
                            var name = cells[0].textContent.trim();
                            if (name && name !== 'ETF' && name !== 'Name' && !name.includes('Last Price')) {
                                data.push({
                                    name: name,
                                    lastPrice: cells[1].textContent.trim(),
                                    change: cells[2].textContent.trim()
                                });
                            }
                        }
                    }
                }
                return data;
            """, tab_name, expected_items)
            
            if data and len(data) > 0:
                print(f"\n{tab_name} Table Output:\n")
                print(f"{'Name':<30} | {'Last Price':<12} | {'% Change':<10}")
                print("-" * 55)
                for item in data[:10]:  # Show first 10 items
                    print(f"{item['name']:<30} | {item['lastPrice']:<12} | {item['change']:<10}")
                all_results[tab_name] = data
            else:
                print(f"No data found for {tab_name}")
                all_results[tab_name] = []
        
        # Export all data to Excel
        print("\n" + "="*50)
        print("Summary of extracted data:")
        print("="*50)
        for tab_name, data in all_results.items():
            if data:
                print(f"{tab_name}: {len(data)} items")
            else:
                print(f"{tab_name}: No data")
        
        # Write each table to its designated location in Excel
        for tab_name, data in all_results.items():
            if not data or tab_name not in table_locations:
                continue
            
            start_cell = table_locations[tab_name]
            
            # Format data based on table type
            if tab_name in ["Most Active", "Gainers", "Losers"]:
                headers = ["ticker", "lastPrice", "change", "percentChange"]
                rows = [[item.get("ticker", ""), item.get("lastPrice", ""), 
                        item.get("change", ""), item.get("percentChange", "")] for item in data]
            else:
                headers = ["name", "lastPrice", "change"]
                rows = [[item.get("name", ""), item.get("lastPrice", ""), 
                        item.get("change", "")] for item in data]
            
            # Write to Excel
            sht.range(start_cell).value = [headers] + rows
        
        print("\nData successfully exported to Excel.")
        book.save()
    
    except Exception as e:
        print(f"ERROR: {str(e)}")
        import traceback
        traceback.print_exc()
    
    finally:
        # Restore stderr and cleanup
        sys.stderr = original_stderr
        if driver is not None:
            driver.quit()

if __name__ == "__main__":
    scrape_tikr_data()

Initializing Chrome driver...
Navigating to TIKR login page...
Entering login credentials...
Submitting login form...
Waiting for markets page to load...

Processing Most Active tab...
Could not find or click Most Active tab
No data found for Most Active

Processing Gainers tab...
Could not find or click Gainers tab
No data found for Gainers

Processing Losers tab...
Could not find or click Losers tab
No data found for Losers

Processing Sectors tab...
Clicked on Sectors tab
No data found for Sectors

Processing Regions tab...
Clicked on Regions tab
No data found for Regions

Processing Assets tab...
Clicked on Assets tab
No data found for Assets

Processing Factors tab...
Clicked on Factors tab
No data found for Factors

Summary of extracted data:
Most Active: No data
Gainers: No data
Losers: No data
Sectors: No data
Regions: No data
Assets: No data
Factors: No data

Data successfully exported to Excel.


Money Market 

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import xlwings as xw
import re

def scrape_mmf_data():
    """
    Scrapes money market fund assets data from ICI.org and exports to Excel.
    The data includes Government, Retail, and Institutional fund categories.
    """
    url = "https://www.ici.org/research/stats/mmf"
    print(f"Fetching data from {url}...")

    # Set headers to mimic a real browser request
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
    }

    try:
        response = requests.get(url, headers=headers, timeout=30)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, "html.parser")

        # Try to find the money market funds table
        table = None
        
        # First, look for a heading that mentions "Assets of Money Market Funds"
        heading = soup.find(lambda tag: tag.name in ['h1', 'h2', 'h3', 'h4', 'h5'] and 
                            "Assets of Money Market Funds" in tag.text)
        if heading:
            table = heading.find_next('table')
        
        # If that doesn't work, search all tables for one containing our expected data
        if not table:
            tables = soup.find_all('table')
            for t in tables:
                if "Government" in t.text and "Retail" in t.text and "Institutional" in t.text:
                    table = t
                    break
        
        if not table:
            print("Could not find the Money Market Funds table.")
            return None

        print("Table found! Extracting data...")

        # Extract the header row
        header_row = table.find('tr')
        headers_row = [th.text.strip() for th in header_row.find_all(['th', 'td'])] if header_row else []
        data_rows = table.find_all('tr')[1:] if headers_row else table.find_all('tr')

        # Extract all data rows
        rows = []
        for row in data_rows:
            cells = row.find_all(['td', 'th'])
            if cells:
                row_data = [cell.text.strip() for cell in cells]
                rows.append(row_data)

        # Create DataFrame from the scraped data
        df = pd.DataFrame(rows)
        if headers_row and len(headers_row) == df.shape[1]:
            df.columns = headers_row
        else:
            # If headers don't match, create generic column names
            df.columns = ['Column_' + str(i) for i in range(df.shape[1])]

        # Convert numeric columns (skip the first column which is usually text)
        for col in df.columns[1:]:
            try:
                df[col] = df[col].str.replace(',', '').astype(float)
            except Exception:
                # If conversion fails, leave as string
                pass

        # Only keep the first 5 rows of data
        if len(df) > 5:
            df = df.iloc[:5]

        # Clean up date column headers to MM/DD/YYYY format
        date_cols = [col for col in df.columns if re.match(r'\d{1,2}/\d{1,2}/\d{4}', str(col))]
        for col in date_cols:
            parts = str(col).split('/')
            if len(parts) == 3:
                month = parts[0].zfill(2)
                day = parts[1].zfill(2)
                year = parts[2]
                new_col = f"{month}/{day}/{year}"
                if col != new_col:
                    df.rename(columns={col: new_col}, inplace=True)

        # Find all date columns again after cleaning
        date_cols = [col for col in df.columns if re.match(r'\d{2}/\d{2}/\d{4}', str(col))]
        date_cols_sorted = sorted(date_cols, key=lambda x: datetime.strptime(x, '%m/%d/%Y'))

        # Convert date format from US (MM/DD/YYYY) to Irish (DD/MM/YYYY)
        for col in date_cols_sorted:
            dt = datetime.strptime(col, '%m/%d/%Y')
            new_col = dt.strftime('%d/%m/%Y')
            if col != new_col:
                df.rename(columns={col: new_col}, inplace=True)

        # Reorder columns: non-date columns first, then dates in chronological order, then change column
        date_cols_irish = [col for col in df.columns if re.match(r'\d{2}/\d{2}/\d{4}', str(col))]
        date_cols_irish_sorted = sorted(date_cols_irish, key=lambda x: datetime.strptime(x, '%d/%m/%Y'))

        non_date_cols = [col for col in df.columns if col not in date_cols_irish and col != '$ Change*']
        change_col = ['$ Change*'] if '$ Change*' in df.columns else []
        new_order = non_date_cols + date_cols_irish_sorted + change_col

        df = df[new_order]

        # Add timestamp for tracking when data was scraped
        current_time = datetime.now().strftime('%d/%m/%Y %H:%M:%S')
        print(f"Data extracted successfully at {current_time}")

        # Show a preview of what we got
        print("\nData Preview:")
        print(df)

        # Export to Excel file
        try:
            excel_path = "Richspread.xlsx"
            sheet_name = "MarketSnapshot"
            try:
                book = xw.Book(excel_path)
            except Exception:
                print(f"Excel file '{excel_path}' not found or cannot be opened.")
                return df
            
            sht = book.sheets[sheet_name]
            sht.range("B40").options(index=False).value = df
            print(f"Data exported to {excel_path} in sheet '{sheet_name}' at cell B40")

        except Exception as e:
            print(f"Error exporting to Excel: {e}")

        return df

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
    except Exception as e:
        print(f"Error processing data: {e}")

    return None

if __name__ == "__main__":
    print("Money Market Fund Assets Scraper")
    print("=" * 40)
    scrape_mmf_data()

treasury and sofr

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import pandas as pd
import time
import xlwings as xw

def scrape_chatham_market_rates():
    """
    Scrapes U.S. Treasury and SOFR rates from Chatham Financial's market rates page.
    Exports the data to an Excel file for analysis.
    """
    url = "https://www.chathamfinancial.com/technology/us-market-rates"
    
    # Configure Chrome to run in headless mode (no visible browser window)
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    
    print("Starting browser and navigating to Chatham Financial...")
    driver = webdriver.Chrome(options=options)
    
    try:
        driver.get(url)
        print("Page loaded, waiting for dynamic content...")
        time.sleep(5)  # Give the page time to load all dynamic content
        
        def scrape_table(table_xpath):
            """
            Helper function to extract table data using XPath.
            Returns a list of lists containing the table data.
            """
            try:
                table = driver.find_element(By.XPATH, table_xpath)
                rows = table.find_elements(By.TAG_NAME, "tr")
                data = []
                
                for row in rows:
                    # Get both header cells (th) and data cells (td)
                    cells = row.find_elements(By.TAG_NAME, "th") + row.find_elements(By.TAG_NAME, "td")
                    data.append([cell.text for cell in cells])
                
                return data
            except Exception as e:
                print(f"Error scraping table: {e}")
                return []
        
        # XPaths to find the specific tables we want
        us_treasuries_xpath = "//h2[contains(text(), 'U.S. Treasuries')]/following::table[1]"
        sofr_xpath = "//h2[contains(text(), 'Secured Overnight Financing Rate')]/following::table[1]"
        
        print("Extracting U.S. Treasuries data...")
        us_treasuries_data = scrape_table(us_treasuries_xpath)
        
        print("Extracting SOFR data...")
        sofr_data = scrape_table(sofr_xpath)
        
        # Convert the scraped data to pandas DataFrames
        if us_treasuries_data and len(us_treasuries_data) > 1:
            us_treasuries_df = pd.DataFrame(us_treasuries_data[1:], columns=us_treasuries_data[0])
        else:
            print("No U.S. Treasuries data found")
            us_treasuries_df = pd.DataFrame()
        
        if sofr_data and len(sofr_data) > 1:
            sofr_df = pd.DataFrame(sofr_data[1:], columns=sofr_data[0])
        else:
            print("No SOFR data found")
            sofr_df = pd.DataFrame()
        
        # Display the scraped data
        if not us_treasuries_df.empty:
            print("\nU.S. Treasuries Rates:")
            print(us_treasuries_df)
        
        if not sofr_df.empty:
            print("\nSOFR Rates:")
            print(sofr_df)
        
        # Export to Excel
        excel_path = "Richspread.xlsx"
        sheet_name = "MarketSnapshot"
        
        try:
            book = xw.Book(excel_path)
            sht = book.sheets[sheet_name]
            
            # Add a header for the section
            sht.range("B49").value = "Treasury and SOFR Rates"
            
            # Export U.S. Treasuries data
            if not us_treasuries_df.empty:
                sht.range("B50").options(index=False).value = us_treasuries_df
                print("U.S. Treasuries data exported to Excel")
            
            # Export SOFR data below the Treasuries table with a gap
            if not sofr_df.empty:
                next_row = "B{}".format(50 + len(us_treasuries_df) + 2)
                sht.range(next_row).options(index=False).value = sofr_df
                print("SOFR data exported to Excel")
            
            print(f"All data exported to {excel_path} in sheet '{sheet_name}' starting at cell B50")
            
        except Exception as e:
            print(f"Excel file '{excel_path}' not found or cannot be opened: {e}")
            return us_treasuries_df, sofr_df
        
        return us_treasuries_df, sofr_df
    
    except Exception as e:
        print(f"Error during scraping: {e}")
        return None, None
    
    finally:
        # Always close the browser, even if there's an error
        driver.quit()
        print("Browser closed")

if __name__ == "__main__":
    print("Chatham Financial Market Rates Scraper")
    print("=" * 45)
    scrape_chatham_market_rates()

Chatham Financial Market Rates Scraper
Starting browser and navigating to Chatham Financial...
Page loaded, waiting for dynamic content...
Extracting U.S. Treasuries data...
Extracting SOFR data...

U.S. Treasuries Rates:
           17 Jul 2025 18 Jun 2025 18 Jul 2024
0   1 Year      4.102%      4.086%      4.865%
1   2 Year      3.914%      3.932%      4.469%
2   3 Year      3.885%      3.884%      4.236%
3   5 Year      3.998%      3.978%      4.115%
4   7 Year      4.217%      4.165%      4.134%
5  10 Year      4.456%      4.382%      4.195%
6  30 Year      5.006%      4.881%      4.411%

SOFR Rates:
                       16 Jul 2025 17 Jun 2025 17 Jul 2024
0                 SOFR    4.34000%    4.31000%    5.35000%
1  30-Day Average SOFR    4.34732%    4.30185%    5.34343%
2  90-Day Average SOFR    4.34107%    4.34210%    5.35700%
3    1-month Term SOFR    4.34979%    4.31758%    5.34119%
4    3-month Term SOFR    4.32900%    4.31890%    5.27947%
U.S. Treasuries data exported to Ex

Eco Calendar

In [2]:
import pandas as pd
import xlwings as xw
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
import traceback
import re

def scrape_economic_calendar():
    """
    Scrapes high-importance economic events from Trading Economics calendar.
    Extracts dates, times, countries, events, and economic indicators.
    Exports formatted data to Excel with proper styling.
    """
    
    # Configure Chrome for headless operation with anti-detection measures
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    
    driver = None
    try:
        print("Starting Chrome browser...")
        driver = webdriver.Chrome(options=chrome_options)
        # Hide the fact that we're using automation
        driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
        
        wait = WebDriverWait(driver, 20)
        print("Navigating to Trading Economics Calendar...")
        driver.get('https://tradingeconomics.com/calendar?importance=3')
        print("Waiting for calendar data to load...")
        wait.until(EC.presence_of_element_located((By.ID, "calendar")))
        time.sleep(5)
        print("Extracting economic events...")
        
        # Take a screenshot to verify the page loaded correctly
        driver.save_screenshot('page_loaded.png')
        
        # Get the main calendar table
        calendar_table = driver.find_element(By.ID, "calendar")
        
        def convert_to_number(text_value):
            """
            Converts text values to appropriate numeric types.
            Handles percentages, K/M/B suffixes, and plain numbers.
            """
            if not text_value or text_value == "-" or text_value == "…":
                return None
                
            text_value = text_value.strip()
            
            # Handle percentage values (e.g., "2.5%" -> 0.025)
            if "%" in text_value:
                try:
                    return float(text_value.replace("%", "")) / 100
                except ValueError:
                    return text_value
            
            # Handle values with K, M, B suffixes (thousands, millions, billions)
            if any(suffix in text_value for suffix in ["K", "M", "B"]):
                try:
                    match = re.match(r"(-?\d+\.?\d*)([KMB])", text_value)
                    if match:
                        num, suffix = match.groups()
                        num = float(num)
                        if suffix == "K":
                            return num * 1000
                        elif suffix == "M":
                            return num * 1000000
                        elif suffix == "B":
                            return num * 1000000000
                    return text_value
                except:
                    return text_value
                    
            # Try to convert plain numbers
            try:
                if text_value.isdigit():
                    return int(text_value)
                return float(text_value)
            except ValueError:
                return text_value
        
        all_data = []
        current_date = None
        
        # Extract all rows from the calendar table
        rows = calendar_table.find_elements(By.TAG_NAME, "tr")
        print(f"Found {len(rows)} rows in calendar table")
        
        for row in rows:
            # Check if this row contains a date header
            date_cells = row.find_elements(By.XPATH, "./th[contains(@style, 'text-align: left')]")
            if date_cells and date_cells[0].text.strip():
                current_date = date_cells[0].text.strip()
                print(f"Found date header: {current_date}")
                continue
            
            # Process event data rows
            cells = row.find_elements(By.TAG_NAME, "td")
            if len(cells) < 5:  # Skip rows without enough data
                continue
            
            # Check if this is an actual event row (has time in first column)
            time_cell = cells[0].text.strip() if cells else ""
            if not time_cell or not any(x in time_cell for x in ["AM", "PM"]):
                continue
            
            # Extract event information
            event_data = {
                'Date': current_date,
                'Time': time_cell,
                'Country': cells[1].text.strip() if len(cells) > 1 else "",
                'Event': cells[4].text.strip() if len(cells) > 4 else ""
            }
            
            # Extract economic indicator values
            actual_value = cells[5].text.strip() if len(cells) > 5 else ""
            previous_value = cells[6].text.strip() if len(cells) > 6 else ""
            consensus_value = cells[7].text.strip() if len(cells) > 7 else ""
            forecast_value = cells[8].text.strip() if len(cells) > 8 else ""
            
            # Convert text values to appropriate numeric types
            event_data['Actual'] = convert_to_number(actual_value)
            event_data['Previous'] = convert_to_number(previous_value)
            event_data['Consensus'] = convert_to_number(consensus_value)
            event_data['Forecast'] = convert_to_number(forecast_value)
            
            # Debug output for data validation
            print(f"Event: {event_data['Event']}")
            print(f"Raw values - Actual: {actual_value}, Previous: {previous_value}")
            print(f"Converted - Actual: {event_data['Actual']}, Previous: {event_data['Previous']}")
            
            # Only add events with valid data
            if event_data['Event'] and event_data['Date']:
                all_data.append(event_data)
        
        if not all_data:
            print("No data was extracted. Taking screenshot for debugging...")
            driver.save_screenshot('no_data_extracted.png')
            return

        print(f"Successfully extracted {len(all_data)} economic events.")
        
        # Create and format the DataFrame
        print("Creating pandas DataFrame...")
        df = pd.DataFrame(all_data)
        
        # Convert numeric columns properly
        numeric_cols = ['Actual', 'Previous', 'Consensus', 'Forecast']
        for col in numeric_cols:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors='coerce')
        
        # Set the desired column order
        column_order = ["Date", "Time", "Country", "Event", "Actual", "Previous", "Consensus", "Forecast"]
        df = df[column_order]
        
        # Display DataFrame information for verification
        print("\nDataFrame Info:")
        print(df.info())
        print("\nFirst 10 rows:")
        print(df.head(10))
        print("\nDataFrame shape:", df.shape)
        
        # Export to Excel with professional formatting
        print("\nExporting to Excel with formatting...")
        
        # Open the Excel workbook
        book = xw.Book('Richspread.xlsx')
        sht = book.sheets['MarketSnapshot']
        
        # Clear the target area before writing new data
        end_row = 70 + len(df) + 5
        sht.range(f'B70:J{end_row}').clear()
        
        # Write column headers
        headers = list(df.columns)
        sht.range('B70').value = headers
        
        # Write the actual data
        data_values = df.values.tolist()
        if data_values:
            sht.range('B71').value = data_values
        
        # Apply professional table formatting
        table_range = sht.range(f'B70:I{70 + len(df)}')
        
        # Add borders to the entire table
        for border_id in range(7, 13):
            table_range.api.Borders(border_id).LineStyle = 1
            table_range.api.Borders(border_id).Weight = 2
        
        # Format the header row
        header_range = sht.range('B70:I70')
        header_range.api.Font.Bold = True
        header_range.api.Interior.Color = 0xF0F0F0  # Light gray background
        header_range.api.HorizontalAlignment = -4108  # Center alignment
        
        # Set column-specific formatting
        sht.range(f'B71:B{70 + len(df)}').api.HorizontalAlignment = -4131  # Date - Left align
        sht.range(f'C71:C{70 + len(df)}').api.HorizontalAlignment = -4108  # Time - Center align
        sht.range(f'D71:D{70 + len(df)}').api.HorizontalAlignment = -4108  # Country - Center align
        sht.range(f'E71:E{70 + len(df)}').api.HorizontalAlignment = -4131  # Event - Left align
        sht.range(f'F71:I{70 + len(df)}').api.HorizontalAlignment = -4152  # Numeric - Right align
        
        # Format numeric values with appropriate number formats
        for idx, row in df.iterrows():
            excel_row = 71 + idx
            for col_idx, col in enumerate(['Actual', 'Previous', 'Consensus', 'Forecast']):
                col_letter = chr(ord('F') + col_idx)  # F, G, H, I
                cell_value = row[col]
                
                if pd.notna(cell_value) and isinstance(cell_value, (int, float)):
                    # Format small decimals as percentages, others as numbers
                    if abs(cell_value) < 1 and cell_value != 0:
                        sht.range(f'{col_letter}{excel_row}').number_format = '0.00%'
                    else:
                        sht.range(f'{col_letter}{excel_row}').number_format = '#,##0.00'
        
        # Auto-fit all columns and set minimum widths
        sht.range('B:I').columns.autofit()
        sht.range('B:B').api.ColumnWidth = 20  # Date column
        sht.range('C:C').api.ColumnWidth = 10  # Time column
        sht.range('D:D').api.ColumnWidth = 8   # Country column
        sht.range('E:E').api.ColumnWidth = 40  # Event column
        sht.range('F:I').api.ColumnWidth = 12  # Numeric columns
        
        # Apply conditional formatting to time cells
        for idx, row in df.iterrows():
            time_cell = sht.range(f'C{71 + idx}')
            time_value = row['Time']
            
            # Highlight early morning times (market opening hours)
            if pd.notna(time_value) and "AM" in str(time_value):
                try:
                    hour = int(str(time_value).split(':')[0])
                    if hour < 3 or (7 <= hour <= 9):
                        time_cell.api.Interior.Color = 0x5C1F1F  # Dark red background
                        time_cell.api.Font.Color = 0xFFFFFF  # White text
                except:
                    pass
        
        # Save the workbook
        book.save()
        print(f"\nData successfully exported to Richspread.xlsx at cell B70")
        print(f"Total rows exported: {len(df)}")
        
        return df

    except Exception as e:
        print(f"An error occurred: {e}")
        traceback.print_exc()
        if driver:
            driver.save_screenshot('error_screenshot.png')
        return None
    finally:
        if driver:
            driver.quit()
            print("Browser closed.")

if __name__ == "__main__":
    print("Economic Calendar Scraper")
    print("=" * 30)
    
    # Run the scraper and get the DataFrame
    df = scrape_economic_calendar()
    
    # Provide feedback on the results
    if df is not None:
        print("\n" + "="*50)
        print("Economic calendar data successfully scraped!")
        print(f"DataFrame contains {len(df)} events")
        print("="*50)
        
        # Display basic statistics if running interactively
        try:
            # These will only work in Jupyter/IPython environments
            display(df.head())
            display(df.describe())
        except NameError:
            # Fallback for regular Python execution
            print("\nFirst 5 rows:")
            print(df.head())
            print("\nNumeric summary:")
            print(df.describe())
    else:
        print("No data was returned. Check the error messages above.")

Economic Calendar Scraper
Starting Chrome browser...
Navigating to Trading Economics Calendar...
Waiting for calendar data to load...
Extracting economic events...
Found 82 rows in calendar table
Found date header: Tuesday July 22 2025
Event: RBA Meeting Minutes
Raw values - Actual: , Previous: 
Converted - Actual: None, Previous: None
Event: Fed Chair Powell Speech
Raw values - Actual: , Previous: 
Converted - Actual: None, Previous: None
Found date header: Wednesday July 23 2025
Event: Existing Home Sales JUN
Raw values - Actual: , Previous: 4.03M
Converted - Actual: None, Previous: 4030000.0000000005
Found date header: Thursday July 24 2025
Event: GfK Consumer Confidence AUG
Raw values - Actual: , Previous: -20.3
Converted - Actual: None, Previous: -20.3
Event: HCOB Manufacturing PMI Flash JUL
Raw values - Actual: , Previous: 49.0
Converted - Actual: None, Previous: 49.0
Event: S&P Global Manufacturing PMI Flash JUL
Raw values - Actual: , Previous: 47.7
Converted - Actual: None, Pre

Traceback (most recent call last):
  File "/opt/miniconda3/lib/python3.13/site-packages/aeosa/appscript/reference.py", line 596, in __getattr__
    selectortype, code = self.AS_appdata.referencebyname()[name]
                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^
KeyError: 'Borders'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/var/folders/rx/whpr72693f3cgk151m8h7fh80000gn/T/ipykernel_93503/1837625719.py", line 199, in scrape_economic_calendar
    table_range.api.Borders(border_id).LineStyle = 1
    ^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/miniconda3/lib/python3.13/site-packages/aeosa/appscript/reference.py", line 598, in __getattr__
    raise AttributeError("Unknown property, element or command: {!r}".format(name)) from e
AttributeError: Unknown property, element or command: 'Borders'


Simfa Bonds

In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import re
import json
import xlwings as xw

def scrape_sifma_corporate_bonds():
    """
    Scrapes SIFMA corporate bond statistics including year-to-date issuance,
    trading volumes, and outstanding amounts. Uses both exact XPaths and
    fallback methods for robust data extraction.
    """
    url = "https://www.sifma.org/resources/research/statistics/us-corporate-bonds-statistics/"
    
    # Configure Chrome for headless operation with anti-detection
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)

    driver = webdriver.Chrome(options=chrome_options)

    try:
        print(f"Loading SIFMA page: {url}")
        driver.get(url)
        print("Waiting for page content to load...")
        time.sleep(5)
        
        # Extract the main statistics using precise XPath targeting
        print("\nExtracting corporate bond statistics...")
        summary_stats = extract_summary_statistics_exact(driver)
        
        # Export the data to Excel with formatting
        print("\nExporting data to Excel...")
        write_to_excel(summary_stats)
        
        return {
            'summary_statistics': summary_stats
        }
    
    except Exception as e:
        print(f"An error occurred during scraping: {e}")
        import traceback
        traceback.print_exc()
    
    finally:
        driver.quit()
        print("Browser session closed")
        return None

def extract_summary_statistics_exact(driver):
    """
    Extracts year-to-date summary statistics using precise XPath selectors.
    Falls back to alternative methods if the primary approach fails.
    """
    summary_stats = []
    
    try:
        # Define exact XPaths for the statistics we want
        ytd_header_xpath = "/html/body/div[2]/section/div[1]/article/section/div/p[2]"
        issuance_xpath = "/html/body/div[2]/section/div[1]/article/section/div/ul[1]/li[1]"
        trading_xpath = "/html/body/div[2]/section/div[1]/article/section/div/ul[1]/li[2]"
        outstanding_xpath = "/html/body/div[2]/section/div[1]/article/section/div/ul[1]/li[3]"
        
        # Wait for the content to be available
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, ytd_header_xpath)))
        
        # Get the year-to-date header information
        ytd_header = driver.find_element(By.XPATH, ytd_header_xpath)
        ytd_text = ytd_header.text.strip()
        print(f"YTD Header: {ytd_text}")
        
        # Extract each type of statistic
        xpaths = [issuance_xpath, trading_xpath, outstanding_xpath]
        types = ["Issuance", "Trading", "Outstanding"]
        
        for i, xpath in enumerate(xpaths):
            try:
                element = driver.find_element(By.XPATH, xpath)
                stat_text = element.text.strip()
                print(f"Found {types[i]}: {stat_text}")
                
                # Parse the raw text into structured data
                stat_dict = parse_statistic(stat_text)
                stat_dict['type'] = types[i]
                summary_stats.append(stat_dict)
            except Exception as e:
                print(f"Error extracting {types[i]}: {e}")
                # Add a placeholder entry so we don't lose track of missing data
                summary_stats.append({
                    'type': types[i],
                    'raw_text': f"Could not extract {types[i]} data",
                    'error': str(e)
                })
        
        return summary_stats
    
    except Exception as e:
        print(f"Primary extraction method failed: {e}")
        
        # Try a more flexible approach using content-based searching
        try:
            print("Attempting alternative extraction method...")
            
            # Search for list items containing our target keywords
            issuance_items = driver.find_elements(By.XPATH, "//li[contains(text(), 'Issuance')]")
            trading_items = driver.find_elements(By.XPATH, "//li[contains(text(), 'Trading')]")
            outstanding_items = driver.find_elements(By.XPATH, "//li[contains(text(), 'Outstanding')]")
            
            items = [
                (issuance_items, "Issuance"),
                (trading_items, "Trading"),
                (outstanding_items, "Outstanding")
            ]
            
            for element_list, type_name in items:
                if element_list:
                    stat_text = element_list[0].text.strip()
                    print(f"Found {type_name} via alternative method: {stat_text}")
                    
                    stat_dict = parse_statistic(stat_text)
                    stat_dict['type'] = type_name
                    summary_stats.append(stat_dict)
                else:
                    print(f"Could not find {type_name} data")
                    summary_stats.append({
                        'type': type_name,
                        'raw_text': f"Could not extract {type_name} data"
                    })
            
            return summary_stats
        except Exception as e:
            print(f"Alternative extraction method also failed: {e}")
            return []

def parse_statistic(stat_text):
    """
    Parses a raw statistic text string into structured components.
    Extracts dollar values, time periods, and percentage changes.
    """
    stat_dict = {'raw_text': stat_text}
    
    # Extract dollar amounts (e.g., "$1.23 trillion")
    dollar_match = re.search(r'\$([0-9,.]+)\s+(billion|trillion)', stat_text)
    if dollar_match:
        value = float(dollar_match.group(1).replace(',', ''))
        unit = dollar_match.group(2)
        stat_dict['value'] = value
        stat_dict['unit'] = unit
    
    # Extract time periods (e.g., "as of December 2024")
    period_match = re.search(r'\(as of ([^)]+)\)', stat_text)
    if period_match:
        stat_dict['period'] = period_match.group(1)
    
    # Extract percentage changes (e.g., "+5.2%" or "-3.1%")
    change_match = re.search(r'([+-][0-9.]+%)', stat_text)
    if change_match:
        stat_dict['change'] = change_match.group(1)
    
    return stat_dict

def write_to_excel(summary_stats):
    """
    Writes the extracted SIFMA data to Excel with professional formatting.
    Creates a clean, readable layout starting at cell B100.
    """
    try:
        # Connect to the active Excel workbook or create a new one
        try:
            wb = xw.books.active
        except:
            wb = xw.Book()
        
        # Try to use the MarketSnapshot sheet, fallback to active sheet
        try:
            sheet = wb.sheets['MarketSnapshot']
        except:
            sheet = wb.sheets.active
            print(f"Using active sheet: {sheet.name}")
        
        # Start writing data at cell B100
        start_cell = sheet.range('B100')
        current_row = start_cell.row
        current_col = start_cell.column
        
        # Create a main header for the section
        header_cell = sheet.cells(current_row, current_col)
        header_cell.value = "SIFMA U.S. Corporate Bond Statistics"
        header_cell.font.bold = True
        header_cell.font.size = 14
        current_row += 2
        
        # Write the summary statistics section
        if summary_stats:
            stats_header = sheet.cells(current_row, current_col)
            stats_header.value = "YTD Statistics Summary:"
            stats_header.font.bold = True
            current_row += 1
            
            # Write each statistic on its own row
            for stat in summary_stats:
                # Use the raw text if available, otherwise build from components
                if 'raw_text' in stat:
                    sheet.cells(current_row, current_col).value = stat['raw_text']
                elif 'type' in stat:
                    # Construct a readable string from the parsed components
                    text_parts = []
                    text_parts.append(stat['type'])
                    
                    if 'period' in stat:
                        text_parts.append(f"(as of {stat['period']})")
                    
                    if 'value' in stat and 'unit' in stat:
                        text_parts.append(f"${stat['value']:,.1f} {stat['unit']}")
                    
                    if 'change' in stat:
                        text_parts.append(f"{stat['change']} Y/Y")
                    
                    sheet.cells(current_row, current_col).value = " ".join(text_parts)
                
                current_row += 1
        
        print(f"Successfully exported SIFMA data to Excel starting at {start_cell.address}")
        
    except Exception as e:
        print(f"Error writing to Excel: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    print("SIFMA Corporate Bonds Statistics Scraper")
    print("=" * 45)
    scrape_sifma_corporate_bonds()

SIFMA Corporate Bonds Statistics Scraper
Loading SIFMA page: https://www.sifma.org/resources/research/statistics/us-corporate-bonds-statistics/
Waiting for page content to load...

Extracting corporate bond statistics...
Primary extraction method failed: Message: 
Stacktrace:
0   chromedriver                        0x000000010056755c cxxbridge1$str$ptr + 2731064
1   chromedriver                        0x000000010055f454 cxxbridge1$str$ptr + 2698032
2   chromedriver                        0x00000001000ae3f8 cxxbridge1$string$len + 90664
3   chromedriver                        0x00000001000f571c cxxbridge1$string$len + 382284
4   chromedriver                        0x0000000100136b1c cxxbridge1$string$len + 649548
5   chromedriver                        0x00000001000e9a0c cxxbridge1$string$len + 333884
6   chromedriver                        0x000000010052a5e0 cxxbridge1$str$ptr + 2481340
7   chromedriver                        0x000000010052d848 cxxbridge1$str$ptr + 2494244
8   chromedr

Sim Market Metrics

In [7]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import re
import xlwings as xw

def scrape_market_metrics(url, excel_start_cell="B110"):
    """
    Scrapes market metrics and musings from SIFMA's insights page.
    Extracts key financial indicators like VIX, S&P 500, trading volumes,
    and market commentary. Formats the data for clean Excel output.
    
    Parameters:
    url (str): The URL to scrape market data from
    excel_start_cell (str): Excel cell to start writing data (default: B110)
    """
    
    # Configure Chrome for headless operation with realistic browser behavior
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)

    driver = webdriver.Chrome(options=chrome_options)

    try:
        print(f"Loading SIFMA insights page: {url}")
        driver.get(url)
        print("Waiting for page content to load...")
        
        # Wait for the page body to be present
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.TAG_NAME, "body"))
        )
        time.sleep(3)  # Give dynamic content time to load
        
        # Get all page text for parsing
        page_text = driver.find_element(By.TAG_NAME, "body").text
        
        # Extract the two main sections we're interested in
        print("\nExtracting Market Metrics data...")
        metrics_data = extract_market_metrics_from_text(page_text)
        
        print("\nExtracting Market Musings data...")
        musings_data = extract_market_musings_from_text(page_text)
        
        # Export everything to Excel with proper formatting
        print("\nExporting data to Excel...")
        write_to_excel_improved(metrics_data, musings_data, excel_start_cell)
        
        return {
            'market_metrics': metrics_data,
            'market_musings': musings_data
        }
        
    except Exception as e:
        print(f"An error occurred during scraping: {e}")
        import traceback
        traceback.print_exc()
        return None
        
    finally:
        driver.quit()
        print("Browser session closed")

def extract_market_metrics_from_text(page_text):
    """
    Extracts structured market metrics data from the page text.
    Looks for key indicators like VIX, S&P 500, trading volumes, and performance data.
    """
    metrics_data = []
    
    # Find where the Market Metrics section starts and ends
    metrics_start = page_text.find("Market Metrics")
    metrics_end = page_text.find("Market Musings")
    
    if metrics_start == -1:
        print("Could not locate Market Metrics section")
        return metrics_data
    
    # Extract just the metrics section text
    if metrics_end > metrics_start:
        metrics_text = page_text[metrics_start:metrics_end]
    else:
        metrics_text = page_text[metrics_start:metrics_start+2000]  # Fallback: take next 2000 chars
    
    print(f"Found Market Metrics section ({len(metrics_text)} characters)")
    
    # Use regex patterns to extract specific metrics
    
    # 1. Volatility Index (VIX) data
    vix_pattern = r"Volatility\s*\(VIX\)[:\s]*Monthly\s+average\s+([0-9.]+);\s*([+-]?[0-9.]+%)\s*M/M,\s*([+-]?[0-9.]+%)\s*Y/Y"
    vix_match = re.search(vix_pattern, metrics_text, re.IGNORECASE)
    if vix_match:
        metrics_data.append({
            "text": f"Volatility (VIX): Monthly average {vix_match.group(1)}; {vix_match.group(2)} M/M, {vix_match.group(3)} Y/Y"
        })
        print("- Found VIX data")
    
    # 2. S&P 500 price data
    sp500_pattern = r"S&P\s*500\s*\(Price\)[:\s]*Monthly\s+average\s+([0-9,]+\.?[0-9]*);?\s*([+-]?[0-9.]+%)\s*M/M,\s*([+-]?[0-9.]+%)\s*Y/Y"
    sp500_match = re.search(sp500_pattern, metrics_text, re.IGNORECASE)
    if sp500_match:
        metrics_data.append({
            "text": f"S&P 500 (Price): Monthly average {sp500_match.group(1)}; {sp500_match.group(2)} M/M, {sp500_match.group(3)} Y/Y"
        })
        print("- Found S&P 500 data")
    
    # 3. Performance metrics (best/worst performers)
    perf_pattern = r"Performance\s*\(month[/]?year\)[:\s]*Best\s*=\s*([^;,]+?);\s*worst\s*=\s*([^;,]+)"
    perf_match = re.search(perf_pattern, metrics_text, re.IGNORECASE)
    if perf_match:
        metrics_data.append({
            "text": f"Performance (month/year): Best = {perf_match.group(1).strip()}; worst = {perf_match.group(2).strip()}"
        })
        print("- Found Performance data")
    
    # 4. Equity average daily volume
    equity_pattern = r"Equity\s*ADV[:\s]*Monthly\s+average\s+([0-9.]+)\s*billion\s+shares;\s*([+-]?[0-9.]+%)\s*M/M,\s*([+-]?[0-9.]+%)\s*Y/Y"
    equity_match = re.search(equity_pattern, metrics_text, re.IGNORECASE)
    if equity_match:
        metrics_data.append({
            "text": f"Equity ADV: Monthly average {equity_match.group(1)} billion shares; {equity_match.group(2)} M/M, {equity_match.group(3)} Y/Y"
        })
        print("- Found Equity ADV data")
    
    # 5. Options average daily volume
    options_pattern = r"Options\s*ADV[:\s]*Monthly\s+average\s+([0-9.]+)\s*million\s+contracts;\s*([+-]?[0-9.]+%)\s*[M/M,]*\s*([+-]?[0-9.]+%)\s*Y/Y"
    options_match = re.search(options_pattern, metrics_text, re.IGNORECASE)
    if options_match:
        metrics_data.append({
            "text": f"Options ADV: Monthly average {options_match.group(1)} million contracts; {options_match.group(2)} M/M, {options_match.group(3)} Y/Y"
        })
        print("- Found Options ADV data")
    
    # 6. Market resilience commentary
    if "Markets continue to show resilience" in metrics_text:
        resilience_pattern = r"Markets continue to show resilience[^.]+\."
        resilience_match = re.search(resilience_pattern, metrics_text)
        if resilience_match:
            metrics_data.append({
                "text": resilience_match.group(0).strip()
            })
            print("- Found market resilience statement")
    
    # 7. Volatility commentary
    muted_pattern = r"Volatility was muted[^.]+\."
    muted_match = re.search(muted_pattern, metrics_text)
    if muted_match:
        metrics_data.append({
            "text": muted_match.group(0).strip()
        })
        print("- Found volatility commentary")
    
    print(f"Total Market Metrics items extracted: {len(metrics_data)}")
    return metrics_data

def extract_market_musings_from_text(page_text):
    """
    Extracts market musings commentary from the page text.
    Breaks down the content into readable sentences for better Excel formatting.
    """
    musings_data = []
    
    # Find the Market Musings section boundaries
    musings_start = page_text.find("Market Musings")
    
    if musings_start == -1:
        print("Could not locate Market Musings section")
        return musings_data
    
    # Look for common section endings to determine where musings end
    possible_end_markers = ["Download Full Report", "Featured Charts", "Additional Resources", "Contact", "Footer"]
    musings_end = len(page_text)
    
    for marker in possible_end_markers:
        marker_pos = page_text.find(marker, musings_start)
        if marker_pos > musings_start and marker_pos < musings_end:
            musings_end = marker_pos
    
    # Extract the musings text
    musings_text = page_text[musings_start:musings_end]
    
    print(f"Found Market Musings section ({len(musings_text)} characters)")
    
    # Clean up by removing the section header
    musings_text = musings_text.replace("Market Musings", "", 1).strip()
    
    # Split into sentences for better readability in Excel
    sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z])', musings_text)
    
    # Process each sentence
    for sentence in sentences:
        sentence = sentence.strip()
        
        # Skip very short or empty sentences
        if len(sentence) < 10:
            continue
            
        # Skip navigation or non-content elements
        if any(skip_word in sentence for skip_word in ["Download", "Chart", "Report", "Contact", "Footer", "Navigation"]):
            continue
        
        # Add as a separate item for clean Excel formatting
        musings_data.append({
            "text": sentence
        })
    
    # If no sentences were found, try extracting key phrases
    if not musings_data:
        print("Trying alternative extraction method for Market Musings...")
        
        key_phrases = [
            "continue to show resilience",
            "posting a record",
            "Volatility was muted",
            "Trade policy concerns",
            "Price level indicators",
            "inflationary worries",
            "CPI and Core CPI",
            "USD/EUR"
        ]
        
        for phrase in key_phrases:
            if phrase in musings_text:
                # Extract the sentence containing this phrase
                start = musings_text.find(phrase)
                sentence_start = max(0, musings_text.rfind('.', 0, start) + 1)
                sentence_end = musings_text.find('.', start)
                if sentence_end == -1:
                    sentence_end = start + 100  # Take next 100 chars if no period found
                
                sentence = musings_text[sentence_start:sentence_end + 1].strip()
                if sentence and len(sentence) > 10:
                    musings_data.append({
                        "text": sentence
                    })
    
    print(f"Total Market Musings items extracted: {len(musings_data)}")
    return musings_data

def write_to_excel_improved(metrics_data, musings_data, start_cell):
    """
    Writes the extracted market data to Excel with professional formatting.
    Creates separate sections for metrics and musings with proper styling.
    """
    try:
        # Connect to the active Excel workbook or create new one
        try:
            wb = xw.books.active
        except:
            wb = xw.Book()
        
        # Try to use MarketSnapshot sheet, fallback to active sheet
        try:
            sheet = wb.sheets['MarketSnapshot']
        except:
            sheet = wb.sheets.active
            print(f"Using active sheet: {sheet.name}")
        
        # Set starting position
        start_range = sheet.range(start_cell)
        current_row = start_range.row
        current_col = start_range.column
        
        # Write Market Metrics section header
        header_cell = sheet.cells(current_row, current_col)
        header_cell.value = "Market Metrics"
        header_cell.font.bold = True
        header_cell.font.size = 14
        current_row += 2
        
        # Write Market Metrics data
        if metrics_data:
            for metric in metrics_data:
                cell = sheet.cells(current_row, current_col)
                cell.value = metric.get("text", "")
                cell.api.WrapText = True  # Enable text wrapping for long content
                current_row += 1
        else:
            sheet.cells(current_row, current_col).value = "No Market Metrics data found"
            current_row += 1
        
        current_row += 2
        
        # Write Market Musings section header
        header_cell = sheet.cells(current_row, current_col)
        header_cell.value = "Market Musings"
        header_cell.font.bold = True
        header_cell.font.size = 14
        current_row += 2
        
        # Write Market Musings data with bullet points
        if musings_data:
            for musing in musings_data:
                cell = sheet.cells(current_row, current_col)
                text = musing.get("text", "")
                
                # Add bullet point for visual organization
                if text and not text.startswith("•"):
                    text = f"• {text}"
                
                cell.value = text
                cell.api.WrapText = True
                
                # Auto-fit row height for content
                sheet.range(f'{current_row}:{current_row}').row_height = None
                
                current_row += 1
        else:
            sheet.cells(current_row, current_col).value = "No Market Musings data found"
        
        # Set column width for better readability
        column_letter = xlwings.utils.col_name(current_col)
        sheet.range(f'{column_letter}:{column_letter}').column_width = 80
        
        print(f"Successfully exported market data to Excel starting at {start_cell}")
        
    except Exception as e:
        print(f"Error writing to Excel: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    print("SIFMA Market Metrics and Musings Scraper")
    print("=" * 45)
    
    # Target URL for SIFMA market insights
    url = "https://www.sifma.org/resources/research/insights/insights-market-metrics-and-trends/"
    scrape_market_metrics(url)

SIFMA Market Metrics and Musings Scraper
Loading SIFMA insights page: https://www.sifma.org/resources/research/insights/insights-market-metrics-and-trends/
Waiting for page content to load...

Extracting Market Metrics data...
Found Market Metrics section (562 characters)
- Found VIX data
- Found S&P 500 data
- Found Performance data
- Found Equity ADV data
- Found Options ADV data
Total Market Metrics items extracted: 5

Extracting Market Musings data...
Found Market Musings section (706 characters)
Total Market Musings items extracted: 7

Exporting data to Excel...
Error writing to Excel: name 'xlwings' is not defined
Browser session closed


Traceback (most recent call last):
  File "/var/folders/rx/whpr72693f3cgk151m8h7fh80000gn/T/ipykernel_93503/2198042961.py", line 322, in write_to_excel_improved
    column_letter = xlwings.utils.col_name(current_col)
                    ^^^^^^^
NameError: name 'xlwings' is not defined


Sim Survey

In [8]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import re
import xlwings as xw

def scrape_economic_survey(excel_start_cell="B135"):
    """
    Scrapes SIFMA's US Economic Survey Key Takeaways using multiple extraction methods.
    Extracts survey dates and key economic insights including monetary policy,
    inflation forecasts, and economic growth projections.
    
    Parameters:
    excel_start_cell (str): Excel cell to start writing data (default: B135)
    """
    url = "https://www.sifma.org/resources/research/economics/us-economic-survey/"
    
    # Configure Chrome for headless operation with anti-detection
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)

    driver = webdriver.Chrome(options=chrome_options)

    try:
        print(f"Loading SIFMA Economic Survey page: {url}")
        driver.get(url)
        print("Waiting for page content to load...")
        time.sleep(5)
        
        # Extract when the survey was conducted
        survey_date = extract_survey_date(driver)
        
        # Extract the main insights from the Key Takeaways section
        print("\nExtracting Key Takeaways...")
        takeaways = extract_key_takeaways(driver)
        
        # Export everything to Excel with proper formatting
        print("\nExporting data to Excel...")
        write_to_excel(survey_date, takeaways, excel_start_cell)
        
        return {
            'survey_date': survey_date,
            'key_takeaways': takeaways
        }
        
    except Exception as e:
        print(f"An error occurred during scraping: {e}")
        import traceback
        traceback.print_exc()
        
    finally:
        driver.quit()
        print("Browser session closed")
        return None

def extract_survey_date(driver):
    """
    Extracts the survey date information using multiple approaches.
    Looks for text about when the survey was populated.
    """
    try:
        # Approach 1: Look for paragraph containing "survey was populated"
        survey_date_elements = driver.find_elements(By.XPATH, "//p[contains(text(), 'survey was populated')]")
        
        # Approach 2: Broader search for any element containing the phrase
        if not survey_date_elements:
            survey_date_elements = driver.find_elements(By.XPATH, "//*[contains(text(), 'survey was populated')]")
        
        # Approach 3: Look for date info near the main title
        if not survey_date_elements:
            title_elements = driver.find_elements(By.XPATH, "//h1[contains(text(), 'US Economic Survey')] | //h2[contains(text(), 'US Economic Survey')]")
            if title_elements:
                title = title_elements[0]
                survey_date_elements = driver.find_elements(By.XPATH, f"following-sibling::*[1]")
        
        if survey_date_elements:
            survey_date_text = survey_date_elements[0].text.strip()
            print(f"Found survey date: {survey_date_text}")
            return survey_date_text
        
        # Fallback: Extract from entire page content using regex
        page_text = driver.find_element(By.TAG_NAME, "body").text
        date_match = re.search(r"survey was populated between ([A-Za-z]+ \d+) and ([A-Za-z]+ \d+, \d{4})", page_text)
        if date_match:
            survey_date_text = f"This survey was populated between {date_match.group(1)} and {date_match.group(2)}."
            print(f"Found survey date from page text: {survey_date_text}")
            return survey_date_text
            
        return "Survey date not found"
        
    except Exception as e:
        print(f"Error extracting survey date: {e}")
        return "Error extracting survey date"

def extract_key_takeaways(driver):
    """
    Extracts Key Takeaways section using multiple extraction strategies.
    Tries heading-based extraction first, then falls back to keyword searches.
    """
    takeaways = []
    
    try:
        # Primary method: Find the Key Takeaways heading and extract list items
        heading_found = False
        heading_elements = driver.find_elements(By.XPATH, 
            "//h1[contains(text(), 'Key Takeaways')] | "
            "//h2[contains(text(), 'Key Takeaways')] | "
            "//h3[contains(text(), 'Key Takeaways')]"
        )
        
        if heading_elements:
            heading = heading_elements[0]
            print("Found 'Key Takeaways' heading")
            heading_found = True
            
            # Look for list items that follow the heading
            try:
                list_items = driver.find_elements(By.XPATH, 
                    f"//h1[contains(text(), 'Key Takeaways')]/following::li | "
                    f"//h2[contains(text(), 'Key Takeaways')]/following::li | "
                    f"//h3[contains(text(), 'Key Takeaways')]/following::li"
                )
                
                # Alternative: Look for bullet points in content area
                if not list_items:
                    list_items = driver.find_elements(By.CSS_SELECTOR, ".entry-content li")
                
                print(f"Found {len(list_items)} key takeaway items")
                
                for item in list_items:
                    text = item.text.strip()
                    if text:
                        print(f"- {text}")
                        parsed_takeaway = parse_takeaway(text)
                        takeaways.append(parsed_takeaway)
                        
                        # Stop if we reach content about "forecast tables" (typically the end)
                        if "forecast tables" in text.lower():
                            break
            except Exception as e:
                print(f"Error finding list items: {e}")
        
        # Fallback method: Search by economic keywords if primary method fails
        if not heading_found or not takeaways:
            print("Trying keyword-based extraction for Key Takeaways...")
            
            # Search for common economic survey categories
            keywords = ["Monetary Policy:", "Inflation:", "Economy:", "Growth:", "Interest Rates:"]
            
            for keyword in keywords:
                elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{keyword}')]")
                for element in elements:
                    # Navigate up the DOM to find the containing list item
                    parent = element
                    for _ in range(3):  # Check up to 3 parent levels
                        try:
                            if parent.tag_name == "li":
                                text = parent.text.strip()
                                print(f"Found with keyword '{keyword}': {text}")
                                parsed_takeaway = parse_takeaway(text)
                                
                                # Avoid duplicates by checking category
                                is_duplicate = False
                                for existing in takeaways:
                                    if existing.get('category') == parsed_takeaway.get('category'):
                                        is_duplicate = True
                                        break
                                        
                                if not is_duplicate:
                                    takeaways.append(parsed_takeaway)
                                break
                            parent = parent.find_element(By.XPATH, "..")
                        except:
                            break
        
        # Last resort: Extract from page text using pattern matching
        if not takeaways:
            print("Trying pattern-based extraction from page text...")
            page_text = driver.find_element(By.TAG_NAME, "body").text
            
            # Look for content related to key economic categories
            categories = ["Monetary Policy", "Inflation", "Economy", "Growth", "Interest Rates"]
            
            for category in categories:
                # Use regex to find content sections for each category
                pattern = f"{category}[:\s]+(.*?)(?=(?:{"|".join(categories)})[:\s]+|\Z)"
                matches = re.findall(pattern, page_text, re.DOTALL | re.IGNORECASE)
                
                for match in matches:
                    text = match.strip()
                    if text:
                        print(f"Found {category} content from page text")
                        takeaways.append({
                            'category': category,
                            'content': text,
                            'raw_text': f"{category}: {text}"
                        })
    
    except Exception as e:
        print(f"Error extracting Key Takeaways: {e}")
    
    return takeaways

def parse_takeaway(text):
    """
    Parses a key takeaway text into structured components.
    Extracts categories and specific data points like percentages and forecasts.
    """
    takeaway = {'raw_text': text}
    
    # Try to split into category and content using colon separator
    category_match = re.match(r'^([^:]+):\s*(.*)', text, re.DOTALL)
    
    if category_match:
        category = category_match.group(1).strip()
        content = category_match.group(2).strip()
        takeaway['category'] = category
        takeaway['content'] = content
        
        # Extract specific data for Monetary Policy content
        if "Monetary Policy" in category:
            # Extract percentage of economists expecting rate cuts
            rate_cut_match = re.search(r'(\d+)%\s+of our economists expect', content)
            if rate_cut_match:
                takeaway['rate_cut_percentage'] = rate_cut_match.group(1)
            
            # Extract target rate forecasts for 2025
            target_2025_match = re.search(r'end 2025 at (\d+\.\d+)%', content)
            if target_2025_match:
                takeaway['target_rate_2025'] = target_2025_match.group(1)
                
            # Extract target rate forecasts for 2026
            target_2026_match = re.search(r'end 2026 at (\d+\.\d+)%', content)
            if target_2026_match:
                takeaway['target_rate_2026'] = target_2026_match.group(1)
                
            # Extract neutral rate range estimates
            neutral_rate_match = re.search(r'neutral nominal fed funds rate to be (\d+\.\d+%-\d+\.\d+%)', content)
            if neutral_rate_match:
                takeaway['neutral_rate_range'] = neutral_rate_match.group(1)
        
        # Extract specific data for Inflation content
        elif "Inflation" in category:
            # Extract core PCE forecast
            inflation_match = re.search(r'end 2025 at (\d+\.\d+)%', content)
            if inflation_match:
                takeaway['core_pce_2025'] = inflation_match.group(1)
            
            # Extract change from previous surveys
            change_match = re.search(r'up (\d+\.\d+) pps from the last full survey', content)
            if change_match:
                takeaway['change_from_previous'] = change_match.group(1)
                
            # Extract top factors affecting inflation
            factors_match = re.search(r'top factors .* are (.*?)\.', content, re.DOTALL)
            if factors_match:
                factors = factors_match.group(1).strip()
                takeaway['top_factors'] = factors
        
        # Extract specific data for Economy content
        elif "Economy" in category:
            # Extract GDP growth forecast
            gdp_match = re.search(r'grow (\d+\.\d+)% in 2025', content)
            if gdp_match:
                takeaway['gdp_growth_2025'] = gdp_match.group(1)
            
            # Extract recession probability data
            recession_match = re.search(r'(\d+)% of our economists put the probability', content)
            if recession_match:
                takeaway['economists_percentage'] = recession_match.group(1)
                
            # Extract probability range for recession
            prob_range_match = re.search(r'probability of recession from (\d+)% to (\d+)%', content)
            if prob_range_match:
                takeaway['recession_probability_low'] = prob_range_match.group(1)
                takeaway['recession_probability_high'] = prob_range_match.group(2)
    else:
        # If no clear category found, infer from keywords in the text
        if "rate cuts" in text.lower() or "monetary policy" in text.lower():
            takeaway['category'] = "Monetary Policy"
            takeaway['content'] = text
        elif "inflation" in text.lower() or "pce" in text.lower():
            takeaway['category'] = "Inflation"
            takeaway['content'] = text
        elif "gdp" in text.lower() or "economy" in text.lower() or "recession" in text.lower():
            takeaway['category'] = "Economy"
            takeaway['content'] = text
        else:
            takeaway['category'] = "Other"
            takeaway['content'] = text
    
    return takeaway

def write_to_excel(survey_date, takeaways, start_cell):
    """
    Writes the extracted economic survey data to Excel with professional formatting.
    Creates organized sections for survey info and key takeaways.
    """
    try:
        # Connect to the active Excel workbook or create new one
        try:
            wb = xw.books.active
        except:
            wb = xw.Book()
        
        # Try to use MarketSnapshot sheet, fallback to active sheet
        try:
            sheet = wb.sheets['MarketSnapshot']
        except:
            sheet = wb.sheets.active
            print(f"Using active sheet: {sheet.name}")
        
        # Set starting position
        start_range = sheet.range(start_cell)
        current_row = start_range.row
        current_col = start_range.column
        
        # Write main header for the economic survey section
        header_cell = sheet.cells(current_row, current_col)
        header_cell.value = "US Economic Survey"
        header_cell.font.bold = True
        header_cell.font.size = 14
        current_row += 1
        
        # Write Key Takeaways section header
        takeaways_header = sheet.cells(current_row, current_col)
        takeaways_header.value = "Key Takeaways"
        takeaways_header.font.bold = True
        takeaways_header.font.size = 12
        current_row += 2
        
        # Write each key takeaway with proper formatting
        if takeaways:
            for takeaway in takeaways:
                # Write category as bold header
                if 'category' in takeaway:
                    category_cell = sheet.cells(current_row, current_col)
                    category_cell.value = f"{takeaway['category']}:"
                    category_cell.font.bold = True
                    
                    # Write content in the adjacent cell
                    if 'content' in takeaway:
                        content_cell = sheet.cells(current_row, current_col + 1)
                        content_cell.value = takeaway['content']
                        content_cell.api.WrapText = True  # Enable text wrapping
                    
                # Fallback: write raw text if structured data isn't available
                elif 'raw_text' in takeaway:
                    sheet.cells(current_row, current_col).value = takeaway['raw_text']
                
                current_row += 1
                current_row += 1  # Add spacing between categories
        else:
            sheet.cells(current_row, current_col).value = "No Key Takeaways found"
        
        print(f"Successfully exported US Economic Survey data to Excel starting at {start_cell}")
        
    except Exception as e:
        print(f"Error writing to Excel: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    print("SIFMA US Economic Survey Scraper")
    print("=" * 35)
    scrape_economic_survey()

SIFMA US Economic Survey Scraper
Loading SIFMA Economic Survey page: https://www.sifma.org/resources/research/economics/us-economic-survey/
Waiting for page content to load...
Found survey date: This survey was populated between

Extracting Key Takeaways...
Trying keyword-based extraction for Key Takeaways...
Found with keyword 'Monetary Policy:': Monetary Policy: 75% of our economists expect one or more rate cuts by the end of 2025 for a total decrease of roughly 50bps. The median forecaster looks for the midpoint of the target range to end 2025 at 3.926% (roughly 50 bps in cuts from current rate) and to end 2026 at 3.625% (a total of 75 bps in cuts from the current rate). Nearly 60% of our economists estimate the neutral nominal fed funds rate to be 3.0%-3.5%.
Found with keyword 'Inflation:': Inflation: The median forecaster looks for core PCE inflation to end 2025 at 3.1% (year-over-year), up 0.7 pps from the last full survey in November 2024 and 0.3 pps higher than the March 2025 f

2y 10y

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from fredapi import Fred
import xlwings as xw
from datetime import datetime
import os
import tempfile
import time

def analyze_treasury_yield_curve():
    """
    Retrieves and analyzes the 10-Year minus 2-Year Treasury yield spread from FRED.
    Creates summary statistics and charts, then exports everything to Excel.
    This indicator is commonly used to assess recession probability and yield curve shape.
    """
    
    # Initialize the FRED API - replace with your own API key
    api_key = 'YOUR_FRED_API_KEY_HERE'
    fred = Fred(api_key=api_key)
    
    # Retrieve the T10Y2Y data (10-Year Treasury minus 2-Year Treasury spread)
    print("Retrieving T10Y2Y data from FRED...")
    t10y2y = fred.get_series('T10Y2Y')
    
    # Display basic information about the data
    print("\nT10Y2Y - 10-Year Treasury Constant Maturity Minus 2-Year Treasury Constant Maturity")
    print(f"Latest value: {t10y2y.iloc[-1]:.2f}%")
    print(f"Data range: {t10y2y.index.min()} to {t10y2y.index.max()}")
    print(f"Total data points: {len(t10y2y)}")
    
    try:
        # Open the Excel workbook for data export
        print("\nOpening Excel workbook...")
        book = xw.Book('Richspread.xlsx')
        sheet = book.sheets['MarketSnapshot']
        
        # Export summary statistics to Excel
        export_summary_stats(t10y2y, book, sheet, "B150")
        
        # Create and insert chart into Excel
        chart_file = create_and_insert_chart(t10y2y, book, sheet, "G150")
        
        # Save the workbook with our new data
        print("\nSaving workbook...")
        book.save()
        
        print("\n✓ Export completed successfully!")
        print("- Summary statistics at cell B150")
        print("- Chart at cell G150")
        
        # Clean up temporary chart file
        cleanup_temp_file(chart_file)
        
    except Exception as e:
        print(f"\nError in main execution: {e}")
        import traceback
        traceback.print_exc()
    
    # Display the chart in Python for immediate viewing
    display_chart_in_python(t10y2y)

def export_summary_stats(series_data, book, sheet, stats_cell="B150"):
    """
    Exports summary statistics for the yield curve data to Excel.
    Creates a formatted table with key metrics and dates.
    """
    print("\nExporting summary statistics...")
    start_row = sheet.range(stats_cell).row
    start_col = sheet.range(stats_cell).column
    
    # Write section header
    current_row = start_row + 2
    stats_header = sheet.cells(current_row, start_col)
    stats_header.value = "Summary Statistics"
    stats_header.font.bold = True
    
    # Create summary data with key metrics
    summary_data = [
        ["Latest Value:", f"{series_data.iloc[-1]:.2f}%"],
        ["Date Range:", f"{series_data.index.min().strftime('%Y-%m-%d')} to {series_data.index.max().strftime('%Y-%m-%d')}"],
        ["Average:", f"{series_data.mean():.2f}%"],
        ["Min Value:", f"{series_data.min():.2f}% on {series_data.idxmin().strftime('%Y-%m-%d')}"],
        ["Max Value:", f"{series_data.max():.2f}% on {series_data.idxmax().strftime('%Y-%m-%d')}"]
    ]
    
    # Write each statistic to Excel
    for i, (label, value) in enumerate(summary_data):
        current_row += 1
        sheet.cells(current_row, start_col).value = label
        sheet.cells(current_row, start_col + 1).value = value
    
    # Auto-fit columns for better readability
    sheet.range((start_row, start_col), (current_row, start_col + 1)).columns.autofit()
    
    print("Summary statistics exported successfully!")

def create_and_insert_chart(series_data, book, sheet, chart_cell="G150"):
    """
    Creates a professional chart of the yield curve data and inserts it into Excel.
    Handles temporary file creation and cleanup automatically.
    """
    print("\nCreating chart...")
    
    # Create unique filename for temporary chart file
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    chart_filename = f"t10y2y_chart_{timestamp}.png"
    chart_path = os.path.join(tempfile.gettempdir(), chart_filename)
    
    try:
        # Create the matplotlib figure
        fig, ax = plt.subplots(figsize=(12, 6))
        
        # Plot the yield curve data
        ax.plot(series_data.index, series_data.values, color='#1f77b4', linewidth=1.5)
        
        # Add title and labels
        ax.set_title('10-Year Treasury Constant Maturity Minus 2-Year Treasury Constant Maturity',
                    fontsize=14, pad=20)
        ax.set_ylabel('Percentage Points', fontsize=12)
        
        # Add horizontal line at zero (important for yield curve analysis)
        ax.axhline(y=0, color='red', linestyle='-', alpha=0.3, linewidth=1)
        
        # Add grid for better readability
        ax.grid(True, alpha=0.3)
        
        # Set reasonable y-axis limits
        ax.set_ylim(-2.5, 3.5)
        
        # Format x-axis with 5-year intervals
        years = pd.date_range(start='1976', end='2025', freq='5Y')
        ax.set_xticks(years)
        ax.set_xticklabels([str(year.year) for year in years])
        
        # Adjust layout and save
        plt.tight_layout()
        plt.savefig(chart_path, dpi=150, bbox_inches='tight', facecolor='white')
        plt.close(fig)
        
        print(f"Chart saved to: {chart_path}")
        
        # Brief pause to ensure file is written
        time.sleep(0.5)
        
        # Verify the file was created successfully
        if not os.path.exists(chart_path):
            raise FileNotFoundError(f"Chart file was not created at {chart_path}")
        
        # Clear any existing charts in the target area
        clear_existing_pictures(sheet, chart_cell)
        
        # Insert the new chart into Excel
        insert_chart_into_excel(sheet, chart_path, chart_cell)
        
        return chart_path
        
    except Exception as e:
        print(f"Error creating/inserting chart: {e}")
        import traceback
        traceback.print_exc()
        return None

def clear_existing_pictures(sheet, chart_cell):
    """
    Removes any existing pictures in the target chart area to avoid overlap.
    """
    print("Clearing existing pictures in target area...")
    target_left = sheet.range(chart_cell).left
    target_top = sheet.range(chart_cell).top
    
    # Create a list copy to avoid modification during iteration
    for pic in list(sheet.pictures):
        try:
            # Check if picture is in our target area
            if (pic.left >= target_left - 50 and
                pic.left <= target_left + 650 and
                pic.top >= target_top - 50 and
                pic.top <= target_top + 350):
                pic.delete()
        except:
            pass  # Skip if picture can't be accessed

def insert_chart_into_excel(sheet, chart_path, chart_cell):
    """
    Inserts the chart image into Excel using multiple methods for compatibility.
    """
    print(f"Inserting chart at cell {chart_cell}...")
    target_left = sheet.range(chart_cell).left
    target_top = sheet.range(chart_cell).top
    
    # Method 1: Try standard xlwings approach
    try:
        pic = sheet.pictures.add(os.path.abspath(chart_path),
                               left=target_left,
                               top=target_top,
                               width=600,
                               height=300)
        print("Chart inserted successfully using standard method!")
        return
    except Exception as e:
        print(f"Standard method failed: {e}")
    
    # Method 2: Try alternative API approach
    try:
        pic = sheet.api.Pictures.Insert(os.path.abspath(chart_path))
        pic.Left = target_left
        pic.Top = target_top
        pic.Width = 600
        pic.Height = 300
        print("Chart inserted successfully using API method!")
        return
    except Exception as e:
        print(f"API method failed: {e}")
        raise Exception("Failed to insert chart with both methods")

def cleanup_temp_file(chart_file):
    """
    Safely removes the temporary chart file after Excel processing.
    """
    if chart_file and os.path.exists(chart_file):
        try:
            # Wait to ensure Excel has finished with the file
            time.sleep(2)
            os.remove(chart_file)
            print(f"Temporary chart file {chart_file} deleted")
        except:
            print(f"Note: Could not delete {chart_file}. You can delete it manually.")

def display_chart_in_python(series_data):
    """
    Displays the yield curve chart in Python for immediate viewing.
    """
    print("\nDisplaying chart in Python...")
    plt.figure(figsize=(12, 6))
    plt.plot(series_data.index, series_data.values, color="#164c72", linewidth=1.5)
    plt.title('10-Year Treasury Constant Maturity Minus 2-Year Treasury Constant Maturity')
    plt.ylabel('Percentage Points')
    plt.axhline(y=0, color='r', linestyle='-', alpha=0.3)
    plt.grid(True)
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    print("FRED Treasury Yield Curve Analyzer")
    print("=" * 40)
    print("This script analyzes the 10Y-2Y Treasury spread,")
    print("an important indicator for recession forecasting.")
    print("\nNote: You'll need to replace 'YOUR_FRED_API_KEY_HERE' with your actual FRED API key.")
    print("Get one free at: https://fred.stlouisfed.org/docs/api/api_key.html")
    print("=" * 40)
    
    analyze_treasury_yield_curve()

Treasury rates

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from fredapi import Fred
from datetime import datetime, timedelta
import xlwings as xw

def analyze_treasury_yield_curve():
    """
    Retrieves complete US Treasury yield curve data from FRED across all maturities.
    Exports the most recent 5 days of data to Excel and creates a yield curve visualization.
    This provides a comprehensive view of interest rates across different time periods.
    """
    
    # Initialize the FRED API - replace with your own API key
    api_key = 'YOUR_FRED_API_KEY_HERE'
    fred = Fred(api_key=api_key)
    
    # Connect to Excel workbook
    book = xw.Book('Richspread.xlsx')
    sheet = book.sheets['MarketSnapshot']
    start_cell = "B180"
    
    # Define all Treasury maturities and their FRED series identifiers
    maturity_series = {
        '1M': 'DGS1MO',   # 1-Month Treasury
        '3M': 'DGS3MO',   # 3-Month Treasury
        '6M': 'DGS6MO',   # 6-Month Treasury
        '1Y': 'DGS1',     # 1-Year Treasury
        '2Y': 'DGS2',     # 2-Year Treasury
        '3Y': 'DGS3',     # 3-Year Treasury
        '5Y': 'DGS5',     # 5-Year Treasury
        '7Y': 'DGS7',     # 7-Year Treasury
        '10Y': 'DGS10',   # 10-Year Treasury
        '20Y': 'DGS20',   # 20-Year Treasury
        '30Y': 'DGS30'    # 30-Year Treasury
    }
    
    # Retrieve yield data for each maturity
    treasury_rates = {}
    print("Retrieving Treasury yield data from FRED...")
    
    for label, series_id in maturity_series.items():
        try:
            treasury_rates[label] = fred.get_series(series_id)
            print(f"Retrieved {label} Treasury rate (Series: {series_id})")
        except Exception as e:
            print(f"Could not retrieve {label} ({series_id}): {str(e)}")
    
    # Convert to DataFrame for easy manipulation
    treasury_rates_df = pd.DataFrame(treasury_rates)
    
    # Get the most recent 5 days of data (excluding weekends/holidays when markets are closed)
    treasury_rates_last5 = treasury_rates_df.tail(5)
    
    # Display recent data for verification
    print("\nRecent Treasury Yield Curve Rates (last 5 days):")
    print(treasury_rates_last5)
    
    # Export to Excel with proper formatting
    sheet.range(start_cell).options(index=True, header=True).value = treasury_rates_last5
    print(f"Treasury yield curve DataFrame (last 5 days) exported to {book.name} at {start_cell}")
    
    # Create yield curve visualization
    create_yield_curve_chart(treasury_rates_last5, maturity_series)

def create_yield_curve_chart(data, maturity_series):
    """
    Creates a visual representation of the current Treasury yield curve.
    Shows how interest rates vary across different maturity periods.
    """
    if data.empty:
        print("No data available for plotting")
        return
    
    # Get the most recent day's data
    most_recent = data.iloc[-1]
    current_date = data.index[-1].strftime('%Y-%m-%d')
    
    # Convert maturity labels to numeric values for proper plotting
    # This allows us to show the curve shape accurately
    maturities_numeric = []
    for mat in maturity_series.keys():
        if 'MO' in mat or 'M' in mat:
            # Convert months to years (e.g., 3M = 0.25 years)
            num = int(''.join(filter(str.isdigit, mat)))
            maturities_numeric.append(num/12)
        else:
            # Convert years to numeric (e.g., 10Y = 10)
            maturities_numeric.append(int(''.join(filter(str.isdigit, mat))))
    
    # Create the yield curve plot
    plt.figure(figsize=(10, 5))
    plt.plot(maturities_numeric, most_recent.values, marker='o', linewidth=2, markersize=6)
    
    # Format the chart
    plt.xticks(maturities_numeric, list(maturity_series.keys()))
    plt.title(f"US Treasury Yield Curve on {current_date}")
    plt.xlabel("Maturity")
    plt.ylabel("Yield (%)")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    print("Treasury Yield Curve Analyzer")
    print("=" * 35)
    print("This script retrieves current Treasury rates across all maturities")
    print("and creates a yield curve visualization for market analysis.")
    print("\nNote: You'll need to replace 'YOUR_FRED_API_KEY_HERE' with your actual FRED API key.")
    print("Get one free at: https://fred.stlouisfed.org/docs/api/api_key.html")
    print("=" * 35)
    
    analyze_treasury_yield_curve()

Asw ERP

In [None]:
import pandas as pd
import re
import xlwings as xw
import traceback
import requests
from bs4 import BeautifulSoup

def extract_erp_data_to_excel():
    """
    Scrapes Equity Risk Premium (ERP) data from Aswath Damodaran's NYU homepage.
    Extracts both current and previous month implied ERP values, which are important
    indicators for equity market valuation and investment decision-making.
    """
    
    # Excel setup
    book = xw.Book('Richspread.xlsx')
    sheet = book.sheets['MarketSnapshot']
    start_cell = "B195"
    
    # Clear the target area before writing new data
    sheet.range("B194:B210").clear()
    
    # URL for Professor Damodaran's finance data
    url = "https://pages.stern.nyu.edu/~adamodar/New_Home_Page/home.htm"
    erp_tables = {}
    
    try:
        # Set up headers to mimic a real browser request
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
        }
        
        print(f"Fetching ERP data from: {url}")
        response = requests.get(url, headers=headers, timeout=20)
        response.raise_for_status()
        print("Page content retrieved successfully.")

        # Parse the HTML content
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract current month ERP data
        current_text = extract_current_erp(soup)
        
        # Extract previous month ERP data
        prev_text = extract_previous_erp(soup)

        # Store the extracted data
        if current_text:
            df = pd.DataFrame([[current_text, ""]], columns=["Raw Text", ""])
            erp_tables["Implied ERP (Current)"] = df
            print("Successfully stored current ERP data.")
        else:
            print("Warning: Could not find current ERP section on the page.")

        if prev_text:
            df = pd.DataFrame([[prev_text, ""]], columns=["Raw Text", ""])
            erp_tables["Implied ERP (Previous Month)"] = df
            print("Successfully stored previous month ERP data.")
        else:
            print("Warning: Could not find previous month ERP section on the page.")

        # Export to Excel if we found any data
        if erp_tables:
            write_erp_tables_to_excel(erp_tables, sheet, start_cell)
        else:
            print("\nNo ERP data was extracted. Nothing to write to Excel.")
        
        return erp_tables

    except requests.exceptions.RequestException as e:
        print(f"Network error occurred during web request: {e}")
        traceback.print_exc()
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        traceback.print_exc()

def extract_current_erp(soup):
    """
    Extracts the current month implied ERP text from the webpage.
    Looks for text blocks starting with "Implied ERP on..."
    """
    current_text = None
    
    # Search through all strong/bold tags for the current ERP section
    strong_tags = soup.find_all('strong')
    for tag in strong_tags:
        tag_text = tag.get_text(strip=True)
        if tag_text.startswith('Implied ERP on'):
            # Get the full text from the parent element
            current_text = tag.parent.get_text(separator=" ", strip=True)
            print("Found current ERP text block.")
            break
    
    return current_text

def extract_previous_erp(soup):
    """
    Extracts the previous month implied ERP text from the webpage.
    Looks for text blocks starting with "Implied ERP in previous month..."
    """
    prev_text = None
    
    # Search through all paragraph tags for the previous month ERP section
    p_tags = soup.find_all('p')
    for p in p_tags:
        p_text = p.get_text(separator=" ", strip=True)
        if p_text.startswith('Implied ERP in previous month'):
            prev_text = p_text
            print("Found previous month ERP text block.")
            break
    
    return prev_text

def cell_to_rowcol(cell_ref):
    """
    Converts Excel cell reference (e.g., 'B195') to (row, col) tuple.
    This helps with programmatic cell manipulation in Excel.
    """
    match = re.match(r"([A-Za-z]+)([0-9]+)", cell_ref)
    if not match:
        raise ValueError(f"Invalid cell reference: {cell_ref}")
    
    col_letters, row = match.groups()
    col = 0
    
    # Convert column letters to numeric value (A=1, B=2, etc.)
    for char in col_letters.upper():
        col = col * 26 + (ord(char) - ord('A') + 1)
    
    return int(row), col

def get_next_empty_row(sheet, start_cell):
    """
    Finds the next empty row in the Excel sheet starting from the given cell.
    This prevents overwriting existing data.
    """
    start_row, start_col = cell_to_rowcol(start_cell)
    current_row = start_row
    
    # Keep checking rows until we find an empty one
    while True:
        if not sheet.cells(current_row, start_col).value:
            return current_row
        current_row += 1

def write_erp_tables_to_excel(erp_tables, sheet, start_cell):
    """
    Writes the extracted ERP data to Excel with proper formatting.
    Creates organized sections for current and previous month data.
    """
    print("\nWriting ERP data to Excel...")
    
    # Find the next empty row to avoid overwriting data
    row = get_next_empty_row(sheet, start_cell)
    _, col = cell_to_rowcol(start_cell)
    
    # Write each ERP table section
    for key, df in erp_tables.items():
        # Write section header
        sheet.cells(row, col).value = key
        sheet.cells(row, col).font.bold = True  # Make headers bold
        row += 1
        
        # Write the data rows
        for i in range(df.shape[0]):
            for j in range(df.shape[1]):
                cell_value = df.iloc[i, j]
                sheet.cells(row, col + j).value = cell_value
                # Enable text wrapping for long content
                sheet.cells(row, col + j).api.WrapText = True
            row += 1
        
        # Add spacing between sections
        row += 1
    
    print("ERP data successfully written to Excel.")

if __name__ == "__main__":
    print("Equity Risk Premium (ERP) Data Scraper")
    print("=" * 45)
    print("This script extracts current equity risk premium data")
    print("from Professor Aswath Damodaran's NYU homepage.")
    print("ERP is a key metric for equity valuation and market analysis.")
    print("=" * 45)
    
    extract_erp_data_to_excel()

Fed Rate Probability Table

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import pandas as pd
import time
import xlwings as xw

def extract_fed_rate_tables_to_excel():
    """Extract Fed Rate Monitor probability tables and send to Excel in separate tables"""
    url = "https://www.investing.com/central-banks/fed-rate-monitor"
    
    # Setup Chrome options
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")
    chrome_options.add_argument("--disable-logging")
    chrome_options.add_argument("--log-level=3")
    chrome_options.add_argument("--silent")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
    chrome_options.add_experimental_option('useAutomationExtension', False)

    driver = webdriver.Chrome(options=chrome_options)

    try:
        print(f"Loading page: {url}")
        driver.get(url)
        print("Waiting for page to load...")
        time.sleep(5)
        
        # Handle cookie consent popup if it appears
        try:
            cookie_button = driver.find_element(By.XPATH, "//button[contains(text(), 'Accept') or contains(text(), 'accept') or contains(@id, 'cookie')]")
            print("Clicking cookie consent button...")
            cookie_button.click()
            time.sleep(1)
        except:
            print("No cookie consent popup found or it couldn't be clicked")
        
        # Use the exact XPaths provided by the user
        xpath_templates = [
            "/html/body/div[7]/section/div[4]/div[2]/label/div",  # First meeting
            "/html/body/div[7]/section/div[4]/div[3]/label/div",  # Second meeting
            "/html/body/div[7]/section/div[4]/div[4]/label/div",  # Third meeting
            "/html/body/div[7]/section/div[4]/div[5]/label/div"   # Fourth meeting 
        ]
        
        # Dictionary to store results for each meeting date
        meeting_data = {}
        
        # Process each XPath
        for i, xpath in enumerate(xpath_templates):
            try:
                # Try to find the element
                try:
                    label_div = driver.find_element(By.XPATH, xpath)
                    
                    # Get the meeting date from the div
                    meeting_date = label_div.text.strip()
                    if not meeting_date:
                        meeting_date = f"Meeting {i+1}"
                    
                    print(f"\nProcessing meeting: {meeting_date}")
                    
                    # Get the parent label element (for clicking)
                    parent_xpath = xpath.rsplit('/', 1)[0]  # Remove the '/div' part to get the label
                    parent_label = driver.find_element(By.XPATH, parent_xpath)
                    
                    # Scroll to the element
                    driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", parent_label)
                    time.sleep(1)
                    
                    # Click to expand if this is not the first label (which is already expanded)
                    if i > 0:
                        try:
                            print(f"Clicking to expand: {meeting_date}")
                            driver.execute_script("arguments[0].click();", parent_label)
                            time.sleep(2)  # Wait for expansion animation
                        except Exception as e:
                            print(f"Could not click label: {e}")
                    else:
                        print(f"First meeting is already expanded, skipping click")
                    
                    # Now find the table - it should be in the next sibling of the label
                    try:
                        # Use JavaScript to find the table
                        table = driver.execute_script("""
                            var label = arguments[0];
                            var container = label.nextElementSibling;
                            if (container) {
                                var tables = container.querySelectorAll('table');
                                if (tables.length > 0) {
                                    return tables[tables.length - 1]; // Get the last table (probability table)
                                }
                            }
                            return null;
                        """, parent_label)
                        
                        if not table:
                            print(f"No table found for {meeting_date} using JavaScript approach")
                            
                            # Try to find it using XPath - look for the Target Rate table
                            probability_table_xpath = f"{parent_xpath}/following-sibling::div[1]//table[.//th[contains(text(), 'Target Rate')]]"
                            table = driver.find_element(By.XPATH, probability_table_xpath)
                        
                        if table:
                            # Extract table data
                            rows = table.find_elements(By.TAG_NAME, "tr")
                            print(f"Found table with {len(rows)} rows")
                            
                            # Extract headers and data
                            headers = []
                            data = []
                            
                            for j, row in enumerate(rows):
                                cells = row.find_elements(By.TAG_NAME, "th") + row.find_elements(By.TAG_NAME, "td")
                                row_data = [cell.text.strip() for cell in cells]
                                
                                if j == 0:  # Header row
                                    headers = row_data
                                else:  # Data rows
                                    data.append(row_data)
                            
                            # Create DataFrame for this meeting
                            meeting_df = pd.DataFrame(data, columns=headers)
                            
                            # Store in our results
                            meeting_data[meeting_date] = meeting_df
                            
                            # Display results in console (keeping the console output format)
                            print(f"\nFed Rate Probability Table for {meeting_date}:")
                            print(meeting_df)
                        else:
                            print(f"No table found for {meeting_date}")
                    
                    except Exception as e:
                        print(f"Error extracting table for {meeting_date}: {e}")
                
                except Exception as e:
                    print(f"Could not find element with XPath {xpath}: {e}")
            
            except Exception as e:
                print(f"Error processing XPath {i+1}: {e}")
        
        # Write data to Excel in the console-like format
        write_to_excel_separate_tables(meeting_data)
        
        return meeting_data
    
    except Exception as e:
        print(f"An error occurred: {e}")
        import traceback
        traceback.print_exc()
    
    finally:
        driver.quit()
        print("Browser closed")
        return None

def write_to_excel_separate_tables(meeting_data):
    """Write the meeting data to Excel at cell M70, with each meeting as a separate table"""
    if not meeting_data:
        print("No data to write to Excel")
        return
    
    try:
        print("\nWriting data to Excel...")
        
        # Connect to Excel - use active workbook or create a new one
        try:
            wb = xw.books.active
        except:
            wb = xw.Book()
        
        # Try to select the MarketSnapshot sheet or use the active sheet
        try:
            sheet = wb.sheets['MarketSnapshot']
        except:
            sheet = wb.sheets.active
            print(f"Using active sheet: {sheet.name}")
        # Start at cell M70
        start_cell = sheet.range('M70')
        current_row = start_cell.row
        current_col = start_cell.column
        
        # Write each meeting's data as a separate table, one after another
        for meeting_date, df in meeting_data.items():
            # Write the meeting date as a title
            title_cell = sheet.cells(current_row, current_col)
            title_cell.value = f"Fed Rate Probability Table for {meeting_date}:"
            title_cell.font.bold = True
            current_row += 1
            
            # Write the headers
            for col_idx, header in enumerate(df.columns):
                header_cell = sheet.cells(current_row, current_col + col_idx)
                header_cell.value = header
                header_cell.font.bold = True
            current_row += 1
            
            # Write the data rows
            for _, row in df.iterrows():
                for col_idx, value in enumerate(row):
                    data_cell = sheet.cells(current_row, current_col + col_idx)
                    data_cell.value = value
                current_row += 1
            
            # Add an empty row between tables
            current_row += 2
        
        print(f"Successfully wrote Fed Rate data to Excel starting at cell M70")
        
    except Exception as e:
        print(f"Error writing to Excel: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    extract_fed_rate_tables_to_excel()

Loading page: https://www.investing.com/central-banks/fed-rate-monitor
Waiting for page to load...
Clicking cookie consent button...

Processing meeting: Jul 30, 2025
First meeting is already expanded, skipping click
Found table with 3 rows

Fed Rate Probability Table for Jul 30, 2025:
   Target Rate Current Probability% Previous Day Probability%  \
0  4.00 - 4.25                 4.3%                      6.4%   
1  4.25 - 4.50                95.7%                     93.6%   

  Previous Week Probability%  
0                       6.4%  
1                      93.6%  

Processing meeting: Sep 17, 2025
Clicking to expand: Sep 17, 2025
Found table with 4 rows

Fed Rate Probability Table for Sep 17, 2025:
   Target Rate Current Probability% Previous Day Probability%  \
0  3.75 - 4.00                 2.6%                      3.5%   
1  4.00 - 4.25                59.9%                     54.2%   
2  4.25 - 4.50                37.5%                     42.2%   

  Previous Week Probabilit