In [1]:
#CODE Final V4
##https://www.casey.vic.gov.au/view-planning-applications

import os
import time
import shutil
import pandas as pd
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup

# Function to create a sanitized folder name
def create_folder_name(app_number):
    sanitized_name = app_number.replace('-', '')
    folder_path = os.path.join(download_folder, sanitized_name)
    os.makedirs(folder_path, exist_ok=True)
    return folder_path

# Function to move the most recently downloaded file to the specified folder
def move_latest_download_to_folder(download_folder, target_folder, max_attempts=5, wait=2):
    attempt = 0
    while attempt < max_attempts:
        try:
            list_of_files = os.listdir(download_folder)
            full_paths = [os.path.join(download_folder, file) for file in list_of_files]
            latest_file = max(full_paths, key=os.path.getctime)
            
            # Check if the file is a temporary download file
            if latest_file.endswith('.tmp') or latest_file.endswith('.crdownload'):
                time.sleep(wait)  # Wait for the download to complete
                continue

            shutil.move(latest_file, target_folder)
            return
        except PermissionError:
            time.sleep(wait)  # Wait and retry
            attempt += 1

    raise Exception(f"Failed to move file after {max_attempts} attempts.")

# Set up Chrome options for automatic download
download_folder = "downloaded_pdfs"
os.makedirs(download_folder, exist_ok=True)

chrome_options = Options()
chrome_options.add_experimental_option("prefs", {
    "download.default_directory": os.path.abspath(download_folder),
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
    "safebrowsing.enabled": True
})

# Initialize WebDriver
driver = webdriver.Chrome(options=chrome_options)
driver.get("https://www.casey.vic.gov.au/view-planning-applications")
print("Opened main page.")

# Click the button to view planning applications
button_xpath = '//*[@id="block-content"]/article/div/div/div/p[6]/a'
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, button_xpath))).click()
print("Clicked to view planning applications.")

# Initialize lists for data scraping
application_numbers_main = []
proposals_main = []
property_addresses_main = []
closing_dates_main = []
suburbs_main = []
detailed_app_numbers = []
detailed_proposals = []
detailed_app_types = []
detailed_categories = []
detailed_estimated_costs = []
detailed_closing_dates = []
detailed_property_addresses = []
detailed_land_descriptions = []
detailed_wards = []
additional_app_numbers = []
additional_proposals = []
additional_permit_types = []
additional_relationships = []
more_app_numbers = []
more_proposals = []
more_permit_types = []
more_relationships = []
eTrack_app_details = []

# Function to get detailed data
def get_detailed_data(driver):
    html_content = driver.page_source
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Function to safely extract data using CSS selector
    def get_data(selector):
        element = soup.select_one(selector)
        return element.text.strip() if element else "Data Not Available"

    return {
        "Application Number": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(1) > td:nth-child(2)"),
        "Proposal Description": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(2) > td:nth-child(2)"),
        "Application Type": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(3) > td:nth-child(2)"),
        "Categories": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(4) > td:nth-child(2)"),
        "Estimated Cost": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(5) > td:nth-child(2)"),
        "Closing Date": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(6) > td:nth-child(2)"),
        "Property Address": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl02_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr.normalRow > td:nth-child(2)"),
        "Land Description": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl02_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr.alternateRow > td:nth-child(2)"),
        "Ward": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl03_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr > td:nth-child(2)"),
        "Additional Application Number": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(1) > td:nth-child(2)"),
        "Additional Proposal": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(2) > td:nth-child(2)"),
        "Additional Permit Type": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr.alternateRow > td:nth-child(2)"),
        "Additional Relationship": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(4) > td:nth-child(2)"),
        "More Application Number": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl01_dtvWebGridListView > tbody > tr:nth-child(1) > td:nth-child(2)"),
        "More Proposal": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl01_dtvWebGridListView > tbody > tr:nth-child(2) > td:nth-child(2)"),
        "More Permit Type": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl01_dtvWebGridListView > tbody > tr.alternateRow > td:nth-child(2)"),
        "More Relationship": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl01_dtvWebGridListView > tbody > tr:nth-child(4) > td:nth-child(2)"),
        "eTrack Application Details Page": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(2) > td:nth-child(2)"),
    }

# Main extraction loop
table_rows_xpath = '//*[@id="ctl00_Content_cusApplicationResultsGrid_repWebGrid_ctl00_grdWebGridTabularView"]/tbody/tr'
table_rows = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, table_rows_xpath)))
print(f"Found {len(table_rows)-1} rows in the main table.")

for row_index in range(1, len(table_rows)):
    print(f"Processing row {row_index}...")

    row = table_rows[row_index]
    
    application_numbers_main.append(row.find_element(By.XPATH, './/td[1]/a').text)
    proposals_main.append(row.find_element(By.XPATH, './/td[2]').text)
    property_addresses_main.append(row.find_element(By.XPATH, './/td[3]').text)
    closing_dates_main.append(row.find_element(By.XPATH, './/td[4]').text)
    suburbs_main.append(row.find_element(By.XPATH, './/td[5]').text)

    # Click to go to the detailed page
    detail_link = row.find_element(By.XPATH, './/td[1]/a')
    driver.execute_script("arguments[0].click();", detail_link)
    print(f"Accessed detail page for row {row_index}.")

    # Wait for the detail page to load and extract data
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView")))
    detailed_info = get_detailed_data(driver)

    detailed_app_numbers.append(detailed_info["Application Number"])
    detailed_proposals.append(detailed_info["Proposal Description"])
    detailed_app_types.append(detailed_info["Application Type"])
    detailed_categories.append(detailed_info["Categories"])
    detailed_estimated_costs.append(detailed_info["Estimated Cost"])
    detailed_closing_dates.append(detailed_info["Closing Date"])
    detailed_property_addresses.append(detailed_info["Property Address"])
    detailed_land_descriptions.append(detailed_info["Land Description"])
    detailed_wards.append(detailed_info["Ward"])
    additional_app_numbers.append(detailed_info["Additional Application Number"])
    additional_proposals.append(detailed_info["Additional Proposal"])
    additional_permit_types.append(detailed_info["Additional Permit Type"])
    additional_relationships.append(detailed_info["Additional Relationship"])
    more_app_numbers.append(detailed_info["More Application Number"])
    more_proposals.append(detailed_info["More Proposal"])
    more_permit_types.append(detailed_info["More Permit Type"])
    more_relationships.append(detailed_info["More Relationship"])
    eTrack_app_details.append(detailed_info["eTrack Application Details Page"])
    print(f"Extracted details from detail page for row {row_index}.")
    
    
    # Download PDF
    application_number = detailed_app_numbers[-1]  # Assuming this is the application number for the current row
    folder_path = create_folder_name(application_number)

    try:
        pdf_link_selector = "#ctl00_Content_cusPageComponents_repPageComponents_ctl04_cusPageComponentGrid_repWebGrid_ctl00_grdWebGridTabularView > tbody > tr.normalRow > td:nth-child(1) > a"
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, pdf_link_selector)))
        pdf_link = driver.find_element(By.CSS_SELECTOR, pdf_link_selector)
        pdf_url = pdf_link.get_attribute('href')
        driver.get(pdf_url)
        print(f"Downloaded PDF for {application_number}.")
    except TimeoutException:
        print(f"TimeoutException occurred for {application_number}, PDF download link not found.")


    # Download PDF and move it to the appropriate sanitized folder
    application_number = detailed_app_numbers[-1]
    sanitized_folder_path = create_folder_name(application_number)
    move_latest_download_to_folder(download_folder, sanitized_folder_path)
    print(f"Moved PDF for {application_number} to its folder.")
    
    # Go back to the main page and refresh the rows list
    driver.back()
    time.sleep(2)  # Wait for page to reload
    table_rows = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, table_rows_xpath)))

# Create DataFrame
data = {
    "Application Number Main": application_numbers_main,
    "Proposal Main": proposals_main,
    "Property Address Main": property_addresses_main,
    "Closing Date Main": closing_dates_main,
    "Suburb Main": suburbs_main,
    "Detailed Application Number": detailed_app_numbers,
    "Detailed Proposal": detailed_proposals,
    "Detailed Application Type": detailed_app_types,
    "Detailed Categories": detailed_categories,
    "Detailed Estimated Cost": detailed_estimated_costs,
    "Detailed Closing Date": detailed_closing_dates,
    "Detailed Property Address": detailed_property_addresses,
    "Detailed Land Description": detailed_land_descriptions,
    "Detailed Ward": detailed_wards,
    "Additional Application Number": additional_app_numbers,
    "Additional Proposal": additional_proposals,
    "Additional Permit Type": additional_permit_types,
    "Additional Relationship": additional_relationships,
    "More Application Number": more_app_numbers,
    "More Proposal": more_proposals,
    "More Permit Type": more_permit_types,
    "More Relationship": more_relationships,
    "eTrack Application Details": eTrack_app_details
}
df = pd.DataFrame(data)

# Save the DataFrame to an Excel file
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = f"planning_advertised_applications_{timestamp}.xlsx"
df.to_excel(output_file, index=False)
print(f"Data saved to Excel file: {output_file}")

driver.quit()
print("Browser closed.")

Opened main page.
Clicked to view planning applications.
Found 10 rows in the main table.
Processing row 1...
Accessed detail page for row 1.
Extracted details from detail page for row 1.
Downloaded PDF for PA23-0736.
Moved PDF for PA23-0736 to its folder.
Processing row 2...
Accessed detail page for row 2.
Extracted details from detail page for row 2.
Downloaded PDF for PA23-0676.
Moved PDF for PA23-0676 to its folder.
Processing row 3...
Accessed detail page for row 3.
Extracted details from detail page for row 3.
Downloaded PDF for PA23-0672.
Moved PDF for PA23-0672 to its folder.
Processing row 4...
Accessed detail page for row 4.
Extracted details from detail page for row 4.
Downloaded PDF for PA23-0647.
Moved PDF for PA23-0647 to its folder.
Processing row 5...
Accessed detail page for row 5.
Extracted details from detail page for row 5.
Downloaded PDF for PA23-0545.
Moved PDF for PA23-0545 to its folder.
Processing row 6...
Accessed detail page for row 6.
Extracted details from 

In [4]:
#v5
#updated detailed 2nd part
#download pdf folder updated
#https://www.casey.vic.gov.au/view-planning-applications
#https://casey-web.t1cloud.com/T1PRDefault/WebApps/eProperty/P1/PublicNotices/AllPublicNotices.aspx?r=P1.WEBGUEST&f=P1.CSY.PUBNOTAL.ENQ

import os
import time
import shutil
import pandas as pd
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup

# Function to create a sanitized folder name
def create_folder_name(app_number):
    sanitized_name = app_number.replace('-', '')
    folder_path = os.path.join(download_folder, sanitized_name)
    os.makedirs(folder_path, exist_ok=True)
    return folder_path

# Function to move the most recently downloaded file to the specified folder

def move_latest_download_to_folder(download_folder, target_folder, max_attempts=5, wait=2):
    attempt = 0
    while attempt < max_attempts:
        try:
            list_of_files = os.listdir(download_folder)
            full_paths = [os.path.join(download_folder, file) for file in list_of_files]
            latest_file = max(full_paths, key=os.path.getctime)
            
            # Check if the file is a temporary download file
            if latest_file.endswith('.tmp') or latest_file.endswith('.crdownload'):
                time.sleep(wait)  # Wait for the download to complete
                continue

            # Generate a new file name if file already exists
            base_name = os.path.basename(latest_file)
            new_file_path = os.path.join(target_folder, base_name)
            count = 1
            while os.path.exists(new_file_path):
                name, extension = os.path.splitext(base_name)
                new_file_path = os.path.join(target_folder, f"{name}_{count}{extension}")
                count += 1

            shutil.move(latest_file, new_file_path)
            return
        except PermissionError:
            time.sleep(wait)  # Wait and retry
            attempt += 1

    raise Exception(f"Failed to move file after {max_attempts} attempts.")

# Set up Chrome options for automatic download
download_folder = "downloaded_pdfs"
os.makedirs(download_folder, exist_ok=True)

chrome_options = Options()
chrome_options.add_experimental_option("prefs", {
    "download.default_directory": os.path.abspath(download_folder),
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
    "safebrowsing.enabled": True
})

# Initialize WebDriver
driver = webdriver.Chrome(options=chrome_options)
driver.get("https://www.casey.vic.gov.au/view-planning-applications")
print("Opened main page.")

# Click the button to view planning applications
button_xpath = '//*[@id="block-content"]/article/div/div/div/p[6]/a'
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, button_xpath))).click()
print("Clicked to view planning applications.")

# Initialize lists for data scraping
application_numbers_main = []
proposals_main = []
property_addresses_main = []
closing_dates_main = []
suburbs_main = []
detailed_app_numbers = []
detailed_proposals = []
detailed_app_types = []
detailed_categories = []
detailed_estimated_costs = []
detailed_closing_dates = []
detailed_property_addresses = []
detailed_land_descriptions = []
detailed_wards = []
additional_app_numbers = []
additional_proposals = []
additional_permit_types = []
additional_relationships = []
more_app_numbers = []
more_proposals = []
more_permit_types = []
more_relationships = []
eTrack_app_details = []

# Function to get detailed data
def get_detailed_data(driver):
    html_content = driver.page_source
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Function to safely extract data using CSS selector
    def get_data(selector):
        element = soup.select_one(selector)
        return element.text.strip() if element else "Data Not Available"

    return {
        "Application Number": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(1) > td:nth-child(2)"),
        "Proposal Description": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(2) > td:nth-child(2)"),
        "Application Type": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(3) > td:nth-child(2)"),
        "Categories": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(4) > td:nth-child(2)"),
        "Estimated Cost": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(5) > td:nth-child(2)"),
        "Closing Date": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(6) > td:nth-child(2)"),
        "Property Address": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl02_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr.normalRow > td:nth-child(2)"),
        "Land Description": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl02_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr.alternateRow > td:nth-child(2)"),
        "Ward": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl03_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr > td:nth-child(2)"),
        "Additional Application Number": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(1) > td:nth-child(2)"),
        "eTrack Application Details Page": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(2) > td:nth-child(2)"),
        "Additional Proposal": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(3) > td:nth-child(2)"),
        "Additional Permit Type": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(4) > td:nth-child(2)"),
        "Additional Relationship": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView > tbody > tr:nth-child(5) > td:nth-child(2)"),
        "More Application Number": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl01_dtvWebGridListView > tbody > tr:nth-child(1) > td:nth-child(2)"),
        "More Proposal": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl01_dtvWebGridListView > tbody > tr:nth-child(2) > td:nth-child(2)"),
        "More Permit Type": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl01_dtvWebGridListView > tbody > tr.alternateRow > td:nth-child(2)"),
        "More Relationship": get_data("#ctl00_Content_cusPageComponents_repPageComponents_ctl05_cusPageComponentGrid_repWebGrid_ctl01_dtvWebGridListView > tbody > tr:nth-child(4) > td:nth-child(2)"),
    }

# Main extraction loop
table_rows_xpath = '//*[@id="ctl00_Content_cusApplicationResultsGrid_repWebGrid_ctl00_grdWebGridTabularView"]/tbody/tr'
table_rows = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, table_rows_xpath)))
print(f"Found {len(table_rows)-1} rows in the main table.")

for row_index in range(1, len(table_rows)):
    print(f"Processing row {row_index}...")

    row = table_rows[row_index]
    
    application_numbers_main.append(row.find_element(By.XPATH, './/td[1]/a').text)
    proposals_main.append(row.find_element(By.XPATH, './/td[2]').text)
    property_addresses_main.append(row.find_element(By.XPATH, './/td[3]').text)
    closing_dates_main.append(row.find_element(By.XPATH, './/td[4]').text)
    suburbs_main.append(row.find_element(By.XPATH, './/td[5]').text)

    # Click to go to the detailed page
    detail_link = row.find_element(By.XPATH, './/td[1]/a')
    driver.execute_script("arguments[0].click();", detail_link)
    print(f"Accessed detail page for row {row_index}.")

    # Wait for the detail page to load and extract data
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#ctl00_Content_cusPageComponents_repPageComponents_ctl00_cusPageComponentGrid_repWebGrid_ctl00_dtvWebGridListView")))
    detailed_info = get_detailed_data(driver)

    detailed_app_numbers.append(detailed_info["Application Number"])
    detailed_proposals.append(detailed_info["Proposal Description"])
    detailed_app_types.append(detailed_info["Application Type"])
    detailed_categories.append(detailed_info["Categories"])
    detailed_estimated_costs.append(detailed_info["Estimated Cost"])
    detailed_closing_dates.append(detailed_info["Closing Date"])
    detailed_property_addresses.append(detailed_info["Property Address"])
    detailed_land_descriptions.append(detailed_info["Land Description"])
    detailed_wards.append(detailed_info["Ward"])
    additional_app_numbers.append(detailed_info["Additional Application Number"])
    eTrack_app_details.append(detailed_info["eTrack Application Details Page"])
    additional_proposals.append(detailed_info["Additional Proposal"])
    additional_permit_types.append(detailed_info["Additional Permit Type"])
    additional_relationships.append(detailed_info["Additional Relationship"])
    more_app_numbers.append(detailed_info["More Application Number"])
    more_proposals.append(detailed_info["More Proposal"])
    more_permit_types.append(detailed_info["More Permit Type"])
    more_relationships.append(detailed_info["More Relationship"])
    print(f"Extracted details from detail page for row {row_index}.")
    
    
    # Download PDF
    application_number = detailed_app_numbers[-1]  # Assuming this is the application number for the current row
    folder_path = create_folder_name(application_number)

    try:
        pdf_link_selector = "#ctl00_Content_cusPageComponents_repPageComponents_ctl04_cusPageComponentGrid_repWebGrid_ctl00_grdWebGridTabularView > tbody > tr.normalRow > td:nth-child(1) > a"
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, pdf_link_selector)))
        pdf_link = driver.find_element(By.CSS_SELECTOR, pdf_link_selector)
        pdf_url = pdf_link.get_attribute('href')
        driver.get(pdf_url)
        print(f"Downloaded PDF for {application_number}.")
    except TimeoutException:
        print(f"TimeoutException occurred for {application_number}, PDF download link not found.")


    # Download PDF and move it to the appropriate sanitized folder
    application_number = detailed_app_numbers[-1]
    sanitized_folder_path = create_folder_name(application_number)
    move_latest_download_to_folder(download_folder, sanitized_folder_path)
    print(f"Moved PDF for {application_number} to its folder.")
    
    # Go back to the main page and refresh the rows list
    driver.back()
    time.sleep(2)  # Wait for page to reload
    table_rows = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, table_rows_xpath)))

# Create DataFrame
data = {
    "Application Number Main": application_numbers_main,
    "Proposal Main": proposals_main,
    "Property Address Main": property_addresses_main,
    "Closing Date Main": closing_dates_main,
    "Suburb Main": suburbs_main,
    "Detailed Application Number": detailed_app_numbers,
    "Detailed Proposal": detailed_proposals,
    "Detailed Application Type": detailed_app_types,
    "Detailed Categories": detailed_categories,
    "Detailed Estimated Cost": detailed_estimated_costs,
    "Detailed Closing Date": detailed_closing_dates,
    "Detailed Property Address": detailed_property_addresses,
    "Detailed Land Description": detailed_land_descriptions,
    "Detailed Ward": detailed_wards,
    "Additional Application Number": additional_app_numbers,
    "eTrack Application Details": eTrack_app_details,
    "Additional Proposal": additional_proposals,
    "Additional Permit Type": additional_permit_types,
    "Additional Relationship": additional_relationships,
    "More Application Number": more_app_numbers,
    "More Proposal": more_proposals,
    "More Permit Type": more_permit_types,
    "More Relationship": more_relationships
}
df = pd.DataFrame(data)

# Save the DataFrame to an Excel file
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = f"planning_advertised_applications_{timestamp}.xlsx"
df.to_excel(output_file, index=False)
print(f"Data saved to Excel file: {output_file}")

driver.quit()
print("Browser closed.")

Opened main page.
Clicked to view planning applications.
Found 10 rows in the main table.
Processing row 1...
Accessed detail page for row 1.
Extracted details from detail page for row 1.
Downloaded PDF for PA23-0736.
Moved PDF for PA23-0736 to its folder.
Processing row 2...
Accessed detail page for row 2.
Extracted details from detail page for row 2.
Downloaded PDF for PA23-0676.
Moved PDF for PA23-0676 to its folder.
Processing row 3...
Accessed detail page for row 3.
Extracted details from detail page for row 3.
Downloaded PDF for PA23-0672.
Moved PDF for PA23-0672 to its folder.
Processing row 4...
Accessed detail page for row 4.
Extracted details from detail page for row 4.
Downloaded PDF for PA23-0647.
Moved PDF for PA23-0647 to its folder.
Processing row 5...
Accessed detail page for row 5.
Extracted details from detail page for row 5.
Downloaded PDF for PA23-0545.
Moved PDF for PA23-0545 to its folder.
Processing row 6...
Accessed detail page for row 6.
Extracted details from 