In [1]:
import os
import requests
import zipfile
import shutil
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import subprocess
import time

def get_chrome_version():
    """Get the installed Chrome browser version."""
    try:
        result = subprocess.run(
            ['reg', 'query', 'HKEY_CURRENT_USER\\Software\\Google\\Chrome\\BLBeacon', '/v', 'version'],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        if result.returncode == 0:
            version_line = result.stdout.strip().split("\n")[-1]
            return version_line.split()[-1]
        else:
            raise Exception("Chrome version not found.")
    except Exception as e:
        print(f"Error getting Chrome version: {e}")
        return None

def fetch_latest_chromedriver_version():
    """Fetch the latest ChromeDriver version from the website."""
    try:
        options = Options()
        options.add_argument("--headless")  # Run Chrome in headless mode
        options.add_argument("--disable-gpu")
        options.add_argument("--no-sandbox")

        with webdriver.Chrome(options=options) as driver:
            driver.get("https://googlechromelabs.github.io/chrome-for-testing/#stable")
            # Wait for the element to load and fetch the version number
            version_element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.XPATH, "/html/body/div/table/tbody/tr[1]/td[1]/code"))
            )
            version_number = version_element.text.strip()
            return version_number
    except Exception as e:
        print(f"Error fetching ChromeDriver version: {e}")
        return None

def download_chromedriver(version_number, download_path):
    """Download the appropriate version of ChromeDriver."""
    try:
        download_url = f"https://storage.googleapis.com/chrome-for-testing-public/{version_number}/win64/chromedriver-win64.zip"
        print(f"Downloading ChromeDriver version {version_number}...")
        zip_path = os.path.join(download_path, "chromedriver.zip")
        with requests.get(download_url, stream=True) as r:
            with open(zip_path, 'wb') as f:
                shutil.copyfileobj(r.raw, f)
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(download_path)
        os.remove(zip_path)
        print("ChromeDriver downloaded and extracted successfully.")
    except Exception as e:
        print(f"Error downloading ChromeDriver: {e}")

def check_and_download_chromedriver():
    """Check if ChromeDriver exists and matches the Chrome version."""
    notebook_dir = os.getcwd()  # Use current working directory in Jupyter Notebook
    chromedriver_dir = os.path.join(notebook_dir, "chromedriver-win64")  # Subfolder for ChromeDriver
    chromedriver_path = os.path.join(chromedriver_dir, "chromedriver.exe")
    chrome_version = get_chrome_version()

    if not chrome_version:
        print("Unable to determine Chrome version. Please ensure Chrome is installed.")
        return

    if not os.path.exists(chromedriver_path):
        print("ChromeDriver not found in 'chromedriver-win64'. Fetching the latest version...")
        version_number = fetch_latest_chromedriver_version()
        if version_number:
            # Ensure the subfolder exists
            os.makedirs(chromedriver_dir, exist_ok=True)
            download_chromedriver(version_number, chromedriver_dir)
    else:
        # Check ChromeDriver version
        try:
            result = subprocess.run(
                [chromedriver_path, '--version'],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True
            )
            if result.returncode == 0:
                chromedriver_version = result.stdout.split()[1]
                if not chromedriver_version.startswith(chrome_version.split('.')[0]):
                    print("ChromeDriver version mismatch. Fetching the latest version...")
                    version_number = fetch_latest_chromedriver_version()
                    if version_number:
                        download_chromedriver(version_number, chromedriver_dir)
                else:
                    print("ChromeDriver is up to date.")
            else:
                print("Error checking ChromeDriver version. Fetching the latest version...")
                version_number = fetch_latest_chromedriver_version()
                if version_number:
                    download_chromedriver(version_number, chromedriver_dir)
        except Exception as e:
            print(f"Error checking ChromeDriver version: {e}")
            version_number = fetch_latest_chromedriver_version()
            if version_number:
                download_chromedriver(version_number, chromedriver_dir)

# Run the check
check_and_download_chromedriver()

ChromeDriver is up to date.


In [None]:
url = "https://tradingeconomics.com/australia/labour-costs"

svg_path_element = "error"
svg_height = "error"
value_td2 = "error"
value_td3 = "error"

webdriver.Chrome(service=service, options=chrome_options).get(url)
    
# //*[@id="highcharts-msjuy5t-0"]/svg/g[7]/g[1]/path[1]

In [None]:

# Set up Chrome options for headless mode
chrome_options = Options()
# chrome_options.add_argument("--headless")
# chrome_options.add_argument("--disable-gpu")
# chrome_options.add_argument("--no-sandbox")

chromedriver_path = os.path.join(os.getcwd(), "chromedriver-win64", "chromedriver.exe")
service = Service(chromedriver_path)

url = "https://tradingeconomics.com/australia/labour-costs"

svg_path_element = "error"
svg_height = "error"
value_td2 = "error"
value_td3 = "error"

with webdriver.Chrome(service=service, options=chrome_options) as driver:
    driver.get(url)
    driver.maximize_window()
    wait = WebDriverWait(driver, 20)
    

    # Click the necessary buttons to show the correct chart
    try:
        wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="dateSpansDiv"]/a[3]'))).click()
    except Exception as e:
        print("Failed to click dateSpansDiv/a[3]:", e)

    time.sleep(2)

    try:
        wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="chart"]/div/div/div[1]/div/div[3]/div/button'))).click()
    except Exception as e:
        print("Failed to click chart selection button:", e)

    # time.sleep(2)

    try:
        wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="chart"]/div/div/div[1]/div/div[3]/div/div/div[5]/button'))).click()
    except Exception as e:
        print("Failed to click line chart submenu:", e)

    # 1. Get SVG path
    try:
        path_element = wait.until(
            EC.presence_of_element_located((By.XPATH, '//*[@class="highcharts-series highcharts-series-0 highcharts-line-series"]/path[2]'))
        )
        svg_path_element = path_element.get_attribute("d")
    except Exception as e:
        print("Failed to get SVG path:", e)
        svg_path_element = "error"

    # 2. Get SVG height
    try:
        rect_element = wait.until(
            EC.presence_of_element_located((By.XPATH, '//*[@id="highcharts-1kvr1fp-0"]/svg/rect[2]'))
        )
        svg_height = rect_element.get_attribute("height")
    except Exception as e:
        print("Failed to get SVG height:", e)
        svg_height = "error"

    # 3. Get value_td2
    try:
        value_td2_element = wait.until(
            EC.presence_of_element_located((By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_ctl00_ctl00_PanelPeers"]/div/div[1]/table/tbody/tr[7]/td[2]'))
        )
        value_td2 = value_td2_element.text
    except Exception as e:
        print("Failed to get value_td2:", e)
        value_td2 = "error"

    # 4. Get value_td3
    try:
        value_td3_element = wait.until(
            EC.presence_of_element_located((By.XPATH, '//*[@id="ctl00_ContentPlaceHolder1_ctl00_ctl00_PanelPeers"]/div/div[1]/table/tbody/tr[7]/td[3]'))
        )
        value_td3 = value_td3_element.text
    except Exception as e:
        print("Failed to get value_td3:", e)
        value_td3 = "error"

print("SVG Path:", svg_path_element)
print("SVG Height:", svg_height)
print("Value TD2:", value_td2)
print("Value TD3:", value_td3)


Failed to get SVG path: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF75BE1CF65+75717]
	GetHandleVerifier [0x00007FF75BE1CFC0+75808]
	(No symbol) [0x00007FF75BBE8F9A]
	(No symbol) [0x00007FF75BC3F4C6]
	(No symbol) [0x00007FF75BC3F77C]
	(No symbol) [0x00007FF75BC92577]
	(No symbol) [0x00007FF75BC673BF]
	(No symbol) [0x00007FF75BC8F39C]
	(No symbol) [0x00007FF75BC67153]
	(No symbol) [0x00007FF75BC30421]
	(No symbol) [0x00007FF75BC311B3]
	GetHandleVerifier [0x00007FF75C11D74D+3223469]
	GetHandleVerifier [0x00007FF75C117CF2+3200338]
	GetHandleVerifier [0x00007FF75C135B23+3322755]
	GetHandleVerifier [0x00007FF75BE36A3A+180890]
	GetHandleVerifier [0x00007FF75BE3E13F+211359]
	GetHandleVerifier [0x00007FF75BE252B4+109332]
	GetHandleVerifier [0x00007FF75BE25462+109762]
	GetHandleVerifier [0x00007FF75BE0BA79+4825]
	BaseThreadInitThunk [0x00007FFEAEFEE8D7+23]
	RtlUserThreadStart [0x00007FFEAFF9C5DC+44]



# Calculate Data Values from SVG Line Chart Path

This notebook calculates data values for each point in an SVG line chart path using a linear model with offset. The model is based on two known points:
- Last point: y-coordinate = 19.38571428570998, value = 110.7
- Second-to-last point: y-coordinate = 34.55714285713998, value = 108.9

The SVG chart has a height of 295, assumed as the baseline (y = 295). The model is:
\[ V = k \cdot (295 - H) + b \]
where \( V \) is the data value, \( H \) is the SVG y-coordinate, \( k \) is the coefficient, and \( b \) is the offset.

Steps:
1. Parse the SVG path to extract x, y coordinates for all points.
2. Calculate the coefficient \( k \) and offset \( b \) using the known points.
3. Compute the data value for each point and store in a pandas DataFrame with columns: index, x, y, value.

In [5]:
import re
import pandas as pd

# SVG path from the line chart
svg_path = "M 0 257.914285714286 L 14.149873701937 249.485714285714 L 28.299747403873 247.8 L 42.295818130789 245.271428571429 L 56.291888857704 250.328571428571 L 70.441762559641 252.014285714286 L 84.591636261577 246.957142857143 L 98.587706988493 257.071428571429 L 112.42997474039 255.38571428571402 L 126.57984844232 247.8 L 140.72972214426 243.585714285714 L 154.72579287118 236 L 168.56806062307 237.685714285714 L 182.71793432501 236.84285714285699 L 196.86780802694 230.1 L 210.86387875386 220.828571428571 L 224.70614650575 218.3 L 238.85602020769 207.34285714285699 L 253.00589390963 203.971428571429 L 267.00196463654 198.914285714286 L 280.99803536346 198.914285714286 L 295.14790906539 265.5 L 309.29778276733 270.557142857143 L 323.29385349425 214.928571428571 L 337.13612124614 198.914285714286 L 351.28599494808 173.62857142857 L 365.43586865001 163.51428571429 L 379.43193937693 166.04285714286 L 393.27420712882 158.45714285714 L 407.42408083076 143.28571428571 L 421.5739545327 126.42857142857 L 435.57002525961 116.31428571429001 L 449.41229301151 105.35714285713999 L 463.56216671344 89.34285714286 L 477.71204041538 74.17142857143 L 491.7081111423 61.52857142856999 L 505.70418186921 56.47142857143001 L 519.85405557115 42.14285714286001 L 534.00392927308 34.55714285713998 L 548 19.38571428570998"

# Known points
known_points = [
    {'y': 34.55714285713998, 'value': 108.9},  # Second-to-last point
    {'y': 19.38571428570998, 'value': 110.7}   # Last point
]
baseline = 295  # SVG chart height

# Step 1: Parse SVG path to extract x, y coordinates
def parse_svg_path(path):
    # Extract all numbers after M and L commands
    coordinates = re.findall(r'[ML]\s*([\d.+-]+)\s*([\d.+-]+)', path)
    # Convert to list of dictionaries with x, y
    return [{'x': float(x), 'y': float(y)} for x, y in coordinates]

points = parse_svg_path(svg_path)
print(f"Extracted {len(points)} points from the SVG path.")

# Step 2: Calculate coefficient k and offset b
# Model: V = k * (295 - H) + b
H1 = known_points[0]['y']
V1 = known_points[0]['value']
H2 = known_points[1]['y']
V2 = known_points[1]['value']

effective_height1 = baseline - H1
effective_height2 = baseline - H2

# Solve: V1 = k * effective_height1 + b
#        V2 = k * effective_height2 + b
k = (V2 - V1) / (effective_height2 - effective_height1)
b = V1 - k * effective_height1

print(f"Coefficient k: {k}")
print(f"Offset b: {b}")

# Step 3: Calculate values for all points and store in DataFrame
data = []
for i, point in enumerate(points):
    x = point['x']
    y = point['y']
    effective_height = baseline - y
    value = k * effective_height + b
    data.append({'index': i, 'x': x, 'y': y, 'value': round(value, 4)})

# Create DataFrame
df = pd.DataFrame(data, columns=['index', 'x', 'y', 'value'])

# Display results
print("\nDataFrame with calculated values (first 5 rows):")
print(df.head())
print("\nLast two points for verification:")
print(df.tail(2))

# Save DataFrame to CSV (optional)
# df.to_csv('line_chart_values.csv', index=False)
print("\nDataFrame created with values for all points. Uncomment the last line to save to CSV if needed.")

Extracted 40 points from the SVG path.
Coefficient k: 0.11864406779659882
Offset b: 78.00000000000261

DataFrame with calculated values (first 5 rows):
   index          x           y  value
0      0   0.000000  257.914286   82.4
1      1  14.149874  249.485714   83.4
2      2  28.299747  247.800000   83.6
3      3  42.295818  245.271429   83.9
4      4  56.291889  250.328571   83.3

Last two points for verification:
    index           x          y  value
38     38  534.003929  34.557143  108.9
39     39  548.000000  19.385714  110.7

DataFrame created with values for all points. Uncomment the last line to save to CSV if needed.
