# extracting just the route type and their percentage 

In [8]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import WebDriverException, TimeoutException
import requests
import xml.etree.ElementTree as ET
from collections import defaultdict
import re
import csv
from datetime import datetime

def setup_driver():
    """Set up and return a Chrome webdriver with configured options"""
    chrome_options = Options()
    chrome_options.add_experimental_option("detach", True)
    driver = webdriver.Chrome(options=chrome_options)
    return driver

def get_tour_ids():
    """Fetch tour IDs from the API"""
    api_url = "https://www.outdooractive.com/api/project/api-dev-oa/tours?key=yourtest-outdoora-ctiveapi"
    
    try:
        response = requests.get(api_url)
        response.raise_for_status()
        root = ET.fromstring(response.content)
        tour_ids = [elem.attrib['id'] for elem in root.findall('.//{http://www.outdooractive.com/api/}data')]
        return tour_ids
    except (requests.RequestException, ET.ParseError) as e:
        print(f"Error fetching tour IDs: {e}")
        return []

def handle_initial_consent(driver):
    """Handle the initial cookie consent popups"""
    try:
        # Wait for and click the first "Accept all" button
        accept_all = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "oax-cookie-consent-select-all"))
        )
        accept_all.click()

        # Wait for and click the second consent button
        consent_btn = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "fc-button-label"))
        )
        consent_btn.click()
        
        return True
    except TimeoutException:
        print("Consent buttons not found or already accepted")
        return False
    except Exception as e:
        print(f"Error handling consent: {e}")
        return False

def extract_route_type(url):
    """Extract the route type from the URL"""
    match = re.search(r'/route/([^/]+)/', url)
    if match:
        return match.group(1)
    return "unknown"

def save_to_csv(route_types, total_routes):
    """Save route types and their IDs to a CSV file"""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"route_types_{timestamp}.csv"
    
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        # Write header
        writer.writerow(['Route Type', 'Percentage', 'Count', 'IDs'])
        
        # Write data for each route type
        for route_type, ids in route_types.items():
            percentage = (len(ids) / total_routes) * 100
            writer.writerow([
                route_type,
                f"{percentage:.2f}%",
                len(ids),
                ','.join(ids)  # All IDs in a single column
            ])
    
    print(f"\nData saved to {filename}")

def process_tours():
    """Main function to process tours and track different route types"""
    driver = setup_driver()
    tour_ids = get_tour_ids()
    route_types = defaultdict(list)
    consent_handled = False
    
    try:
        for index, tour_id in enumerate(tour_ids):
            hiking_url = f"https://www.outdooractive.com/en/route/hiking-trail/{tour_id}"
            
            if index == 0:
                # Load first URL in main window
                driver.get(hiking_url)
                if not consent_handled:
                    handle_initial_consent(driver)
                    consent_handled = True
            else:
                # Open new tab
                driver.execute_script(f"window.open('{hiking_url}', '_blank')")
                driver.switch_to.window(driver.window_handles[-1])
            
            try:
                # Wait briefly for URL to update
                WebDriverWait(driver, 5).until(
                    lambda d: d.current_url != hiking_url
                )
            except TimeoutException:
                pass
            
            # Get the current URL and extract route type
            current_url = driver.current_url
            route_type = extract_route_type(current_url)
            route_types[route_type].append(tour_id)
            
            # Progress update
            if (index + 1) % 10 == 0:
                print(f"Processed {index + 1} routes...")
            
            # Close current tab (except for first iteration)
            if index > 0:
                driver.close()
                driver.switch_to.window(driver.window_handles[0])
            
    except Exception as e:
        print(f"Unexpected error: {e}")
    
    finally:
        # Calculate total routes
        total_routes = sum(len(ids) for ids in route_types.values())
        
        # Print summary and save to CSV
        print("\nRoute Type Summary:")
        print("-" * 50)
        for route_type, ids in route_types.items():
            percentage = (len(ids) / total_routes) * 100
            print(f"{route_type}: {len(ids)} routes ({percentage:.2f}%)")
        
        # Save data to CSV
        save_to_csv(route_types, total_routes)
        
        driver.quit()

if __name__ == "__main__":
    process_tours()

Unexpected error: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=131.0.6778.205)
Stacktrace:
	GetHandleVerifier [0x00007FF7A2D8FB05+28789]
	(No symbol) [0x00007FF7A2CF86E0]
	(No symbol) [0x00007FF7A2B9592A]
	(No symbol) [0x00007FF7A2B6F505]
	(No symbol) [0x00007FF7A2C16477]
	(No symbol) [0x00007FF7A2C1D918]
	(No symbol) [0x00007FF7A2C0F400]
	(No symbol) [0x00007FF7A2BDA938]
	(No symbol) [0x00007FF7A2BDBAA1]
	GetHandleVerifier [0x00007FF7A30C933D+3410093]
	GetHandleVerifier [0x00007FF7A30DE7DD+3497293]
	GetHandleVerifier [0x00007FF7A30D2A73+3448803]
	GetHandleVerifier [0x00007FF7A2E57BBB+848171]
	(No symbol) [0x00007FF7A2D03C3F]
	(No symbol) [0x00007FF7A2CFF6E4]
	(No symbol) [0x00007FF7A2CFF87D]
	(No symbol) [0x00007FF7A2CEED49]
	BaseThreadInitThunk [0x00007FF936BD259D+29]
	RtlUserThreadStart [0x00007FF937D6AF38+40]


Route Type Summary:
--------------------------------------------------
hiking-trail: 2 routes (66.67%

# script for just the textual discription

In [16]:
routes_to_include=[]

In [89]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import WebDriverException, TimeoutException
import requests
import xml.etree.ElementTree as ET
from collections import defaultdict
import re
import csv
import json
from datetime import datetime

def setup_driver():
    """Set up and return a Chrome webdriver with configured options"""
    chrome_options = Options()
    chrome_options.add_experimental_option("detach", True)
    driver = webdriver.Chrome(options=chrome_options)
    return driver

def get_tour_ids():
    """Fetch tour IDs from the API"""
    api_url = "https://www.outdooractive.com/api/project/api-dev-oa/tours?key=yourtest-outdoora-ctiveapi"
    
    try:
        response = requests.get(api_url)
        response.raise_for_status()
        root = ET.fromstring(response.content)
        tour_ids = [elem.attrib['id'] for elem in root.findall('.//{http://www.outdooractive.com/api/}data')]
        return tour_ids
    except (requests.RequestException, ET.ParseError) as e:
        print(f"Error fetching tour IDs: {e}")
        return []

def handle_initial_consent(driver):
    """Handle the initial cookie consent popups"""
    try:
        # Wait for and click the first "Accept all" button
        accept_all = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "oax-cookie-consent-select-all"))
        )
        accept_all.click()

        # Wait for and click the second consent button
        consent_btn = WebDriverWait(driver, 100).until(
            EC.presence_of_element_located((By.CLASS_NAME, "fc-button-label"))
        )
        consent_btn.click()
        
        return True
    except TimeoutException:
        print("Consent buttons not found or already accepted")
        return False
    except Exception as e:
        print(f"Error handling consent: {e}")
        return False

def extract_route_type(url):
    """Extract the route type from the URL"""
    match = re.search(r'/route/([^/]+)/', url)
    if match:
        return match.group(1)
    return "unknown"
 
def save_to_json(hiking_data):
    """Save hiking data to a JSON file"""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"hiking_data_{timestamp}.json"
    
    try:
        with open(filename, 'w', encoding='utf-8') as jsonfile:
            json.dump(hiking_data, jsonfile, indent=4, ensure_ascii=False)
        print(f"\nData saved to {filename}")
    except Exception as e:
        print(f"Error saving JSON file: {e}")

def extract_description(driver,tour_id):
    """Extract both the short and long hiking descriptions from the webpage"""
    try:
        # CSS selectors for both descriptions
        short_desc_selector = "#mainContent > div > div.oax-part-singleviewpage.oax-part-singleviewpage-tour.oax_singlePage_flex.oax.oax-dpvis-true > div.oax_container_12.oax_singlePage > div.oax_detail_main > div.oax_detail_tabs.oax_detail_tabs_content.oax_grid_8.oax_alpha.oax_pad_bottom_20.oax_marg_bottom_30 > div:nth-child(2) > div.oax_bold"
        long_desc_selector = "#mainContent > div > div.oax-part-singleviewpage.oax-part-singleviewpage-tour.oax_singlePage_flex.oax.oax-dpvis-true > div.oax_container_12.oax_singlePage > div.oax_detail_main > div.oax_detail_tabs.oax_detail_tabs_content.oax_grid_8.oax_alpha.oax_pad_bottom_20.oax_marg_bottom_30 > div:nth-child(2) > div:nth-child(6)"

        # Wait for and extract short description
        short_desc_element = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, short_desc_selector))
        )
        short_description = short_desc_element.text.strip()
        
        # Wait for and extract long description
        long_desc_element = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, long_desc_selector))
        )
        long_description = long_desc_element.text.strip()
        
        # Return both descriptions in a dictionary
        return {
            "short_description": short_description if short_description else None,
            "long_description": long_description if long_description else None
        }
    except TimeoutException:
        print(f"One or both description elements not found in id:{tour_id}")
        return {
            "short_description": None,
            "long_description": None
        }
    except Exception as e:
        print(f"Error extracting descriptions: {e}")
        return {
            "short_description": None,
            "long_description": None
        }

def process_tours():
    """Main function to process tours and collect hiking data"""
    driver = setup_driver()
    tour_ids = get_tour_ids()
    hiking_data = []
    consent_handled = False
    
    try:
        for index, tour_id in enumerate(tour_ids):
            hiking_url = f"https://www.outdooractive.com/en/route/hiking-trail/{tour_id}"
            
            if index == 0:
                driver.get(hiking_url)
                if not consent_handled:
                    handle_initial_consent(driver)
                    consent_handled = True
            else:
                driver.execute_script(f"window.open('{hiking_url}', '_blank')")
                driver.switch_to.window(driver.window_handles[-1])
            
            try:
                WebDriverWait(driver, 10).until(
                    lambda d: d.current_url != hiking_url
                )
            except TimeoutException:
                pass
            
            current_url = driver.current_url
            route_type = extract_route_type(current_url)
            
            if route_type in routes_to_include:
                # Extract both descriptions
                descriptions = extract_description(driver,tour_id)
                
                # Store data with both descriptions
                hike_info = {
                    "tour_id": tour_id,
                    "route_type": route_type,
                    "short_description": descriptions["short_description"],
                    "long_description": descriptions["long_description"],
                    "url": current_url
                }
                hiking_data.append(hike_info)
                
                if (index + 1) % 10 == 0:
                    print(f"Processed {index + 1} routes...")
                    save_to_json(hiking_data)
                
                if index > 0:
                    driver.close()
                    driver.switch_to.window(driver.window_handles[0])
            
    except Exception as e:
        print(f"Unexpected error: {e}")
    
    finally:
        save_to_json(hiking_data)
        driver.quit()

if __name__ == "__main__":
    process_tours()


Data saved to hiking_data_20250119_014309.json


KeyboardInterrupt: 

# loop hole in the outdooractive website they store the image url in the javascript tag so we can extrat that

In [None]:



def extract_ld_json(url):
    try:
        # Fetch the webpage
        response = requests.get(url)
        response.raise_for_status()
        
        # Parse HTML
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find the ld+json script tag
        json_script = soup.find('script', {'type': 'application/ld+json'})
        
        if not json_script:
            return None
            
        # Parse JSON content
        json_data = json.loads(json_script.string)
        return json_data
        
    except requests.RequestException as e:
        print(f"Error fetching URL: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        return None

# Usage
url = "https://www.outdooractive.com/en/route/hiking-trail/allgaeu-alps/from-the-rappensee-hut-to-the-hermann-von-barth-hut/17940759/"
data = extract_ld_json(url)

if data:
    print(json.dumps(data, indent=2))

{
  "@context": "http://schema.org",
  "@type": "SportsActivityLocation",
  "url": "https://www.outdooractive.com/en/route/hiking-trail/allgaeu-alps/from-the-rappensee-hut-to-the-hermann-von-barth-hut/17940759/",
  "name": "From the Rappensee Hut to the Hermann von Barth Hut",
  "alternateName": "From the Rappensee Hut to the Hermann von Barth Hut",
  "description": "Alpine hike from the Rappenseeh\u00fctte via the Heilbronner Weg and the Gro\u00dfer Krottenkopf (2656 m) and on to the Hermann-von-Barth H\u00fctte.",
  "hasMap": "https://www.outdooractive.com/api/staticmap?i=17940759&size=xlarge",
  "aggregateRating": {
    "@type": "AggregateRating",
    "reviewCount": 5,
    "ratingValue": 5.0,
    "worstRating": 0,
    "bestRating": 5
  },
  "Image": [
    {
      "@type": "ImageObject",
      "@context": "http://schema.org",
      "author": {
        "@type": "Person",
        "@context": "http://schema.org",
        "givenName": "Matthias",
        "familyName": "Gruse",
        "i

# extracting image details as well as difficulty along with that , text as usual included

In [96]:
routes_to_include=['hiking-trail','scenic-route','trail-running','long-distance-hiking','running','mountain-hike'
                   ,'via-ferrata','winter-hiking','nature-trail','bikepacking','snowshoeing','city-walk','pilgrim-walk']

In [97]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import WebDriverException, TimeoutException
import requests
import xml.etree.ElementTree as ET
from collections import defaultdict
import re
import csv
import requests
from bs4 import BeautifulSoup
import json
import json
from datetime import datetime
import time

def extract_ld_json(url):
    try:
        # Fetch the webpage
        response = requests.get(url)
        response.raise_for_status()
        
        # Parse HTML
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find the ld+json script tag
        json_script = soup.find('script', {'type': 'application/ld+json'})
        
        if not json_script:
            return None
            
        # Parse JSON content
        json_data = json.loads(json_script.string)
        return json_data
        
    except requests.RequestException as e:
        print(f"Error fetching URL: {e}")
        return None
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        return None
    
def setup_driver():
    """Set up and return a Chrome webdriver with configured options"""
    chrome_options = Options()
    chrome_options.add_experimental_option("detach", True)
    driver = webdriver.Chrome(options=chrome_options)
    return driver

def get_tour_ids():
    """Fetch tour IDs from the API"""
    api_url = "https://www.outdooractive.com/api/project/api-dev-oa/tours?key=yourtest-outdoora-ctiveapi"
    
    try:
        response = requests.get(api_url)
        response.raise_for_status()
        root = ET.fromstring(response.content)
        tour_ids = [elem.attrib['id'] for elem in root.findall('.//{http://www.outdooractive.com/api/}data')]
        return tour_ids
    except (requests.RequestException, ET.ParseError) as e:
        print(f"Error fetching tour IDs: {e}")
        return []

def handle_initial_consent(driver,tour_id):
    """Handle the initial cookie consent popups"""
    try:
        # Wait for and click the first "Accept all" button
        accept_all = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "oax-cookie-consent-select-all"))
        )
        accept_all.click()

        # Wait for and click the second consent button
        consent_btn = WebDriverWait(driver, 100).until(
            EC.presence_of_element_located((By.CLASS_NAME, "fc-button-label"))
        )
        consent_btn.click()
        
        return True
    except TimeoutException:
        print("Consent buttons not found or already accepted")
        return False
    except Exception as e:
        print(f"Error handling consent: {e} in id:{tour_id}")
        return False

def extract_route_type(url):
    """Extract the route type from the URL"""
    match = re.search(r'/route/([^/]+)/', url)
    if match:
        return match.group(1)
    return "unknown"
 
def save_to_json(hiking_data,tour_id):
    """Save hiking data to a JSON file"""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"hiking_data_{timestamp}.json"
    
    try:
        with open(filename, 'w', encoding='utf-8') as jsonfile:
            json.dump(hiking_data, jsonfile, indent=4, ensure_ascii=False)
        print(f"\nData saved to {filename}")
    except Exception as e:
        print(f"Error saving JSON file: {e} in id:{tour_id}")


In [98]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import (
    TimeoutException, 
    NoSuchElementException,
    StaleElementReferenceException,
    WebDriverException,
    ElementNotVisibleException,
    ElementNotInteractableException
)
import re
import requests
import xml.etree.ElementTree as ET
from requests.exceptions import RequestException

def find_text_in_page(driver, search_text):
    try:
        page_source = driver.page_source
        matches = []
        search_text_lower = search_text.lower()
        page_source_lower = page_source.lower()
        
        for match in re.finditer(re.escape(search_text_lower), page_source_lower):
            try:
                start_pos = match.start() + 11
                end_pos = min(len(page_source), match.end() + 5)
                context = page_source[start_pos:end_pos]
                matches.append((search_text, context))
            except IndexError as e:
                print(f"Error processing match position: {e}")
                continue
                
        return matches
    except Exception as e:
        print(f"Error in find_text_in_page: {e}")
        return []

def extract_gpx_data(tour_id):
    try:
        gpx_url = f"https://api-oa.com/download.tour.gpx?i={tour_id}&project=api-dev-oa&key=yourtest-outdoora-ctiveapi"
        
        try:
            response = requests.get(gpx_url, timeout=30)
            response.raise_for_status()
        except RequestException as e:
            print(f"Network error fetching GPX data: {e} for tour_id: {tour_id}")
            return None
            
        try:
            root = ET.fromstring(response.content)
            ns = {'gpx': 'http://www.topografix.com/GPX/1/1'}
            title_elem = root.find('.//gpx:metadata/gpx:name', ns)
            title = title_elem.text if title_elem is not None else None
            
            return {'title': title}
        except ET.ParseError as e:
            print(f"XML parsing error: {e} for tour_id: {tour_id}")
            return None
            
    except Exception as e:
        print(f"Unexpected error in extract_gpx_data: {e} for tour_id: {tour_id}")
        return None

def process_ld_json_data(json_data):
    try:
        if not json_data:
            return None
        
        map_url = json_data.get("hasMap", "")
        images = []
        
        try:
            if "Image" in json_data:
                for img in json_data["Image"]:
                    try:
                        image_url = img.get("url", "")
                        if image_url == map_url:
                            continue
                        image_info = {
                            "url": image_url,
                            "caption": img.get("caption", ""),
                            "date_created": img.get("dateCreated", ""),
                            "author": {
                                "given_name": img.get("author", {}).get("givenName", ""),
                                "family_name": img.get("author", {}).get("familyName", ""),
                                "image": img.get("author", {}).get("image", "")
                            } if "author" in img else None
                        }
                        images.append(image_info)
                    except Exception as e:
                        print(f"Error processing individual image: {e}")
                        continue
        except Exception as e:
            print(f"Error processing images: {e}")
        
        amenities = {}
        difficulty = None
        tags = []
        
        try:
            if "amenityFeature" in json_data:
                for amenity in json_data["amenityFeature"]:
                    try:
                        name = amenity.get("name", "")
                        value = amenity.get("value")
                        unit = amenity.get("unitCode", "")
                        
                        if unit:
                            amenities[name] = {"value": value, "unit": unit}
                        elif name == "difficulty":
                            difficulty = value
                        elif isinstance(value, bool) and value is True:
                            tags.append(name)
                    except Exception as e:
                        print(f"Error processing individual amenity: {e}")
                        continue
        except Exception as e:
            print(f"Error processing amenities: {e}")
        
        return {
            "images": images,
            "map_url": map_url,
            "amenities": amenities,
            "difficulty": difficulty,
            "tags": tags
        }
    except Exception as e:
        print(f"Error in process_ld_json_data: {e}")
        return None

def extract_best_months(driver, tour_id):
    try:
        months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
        best_months = []
        
        try:
            results = find_text_in_page(driver, "month tip")
        except Exception as e:
            print(f"Error finding text in page: {e} for tour_id: {tour_id}")
            return None
            
        if results:
            for i, (text, context) in enumerate(results, 1):
                try:
                    if context in months:
                        best_months.append(context)
                except Exception as e:
                    print(f"Error processing month context: {e}")
                    continue
        else:
            print(f"Text not found in page source of tour_id: {tour_id}")
            
        return best_months if best_months else None
        
    except Exception as e:
        print(f"Error in extract_best_months: {e} for tour_id: {tour_id}")
        return None

def extract_description(driver, tour_id):
    try:
        short_desc_selector = "#mainContent > div > div.oax-part-singleviewpage.oax-part-singleviewpage-tour.oax_singlePage_flex.oax.oax-dpvis-true > div.oax_container_12.oax_singlePage > div.oax_detail_main > div.oax_detail_tabs.oax_detail_tabs_content.oax_grid_8.oax_alpha.oax_pad_bottom_20.oax_marg_bottom_30 > div:nth-child(2) > div.oax_bold"
        long_desc_selector = "#mainContent > div > div.oax-part-singleviewpage.oax-part-singleviewpage-tour.oax_singlePage_flex.oax.oax-dpvis-true > div.oax_container_12.oax_singlePage > div.oax_detail_main > div.oax_detail_tabs.oax_detail_tabs_content.oax_grid_8.oax_alpha.oax_pad_bottom_20.oax_marg_bottom_30 > div:nth-child(2) > div:nth-child(6)"
        
        short_description = ""
        long_description = ""
        
        try:
            short_desc_element = WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located((By.CSS_SELECTOR, short_desc_selector))
            )
            short_description = short_desc_element.text.strip()
        except (TimeoutException, NoSuchElementException, StaleElementReferenceException) as e:
            print(f"Error getting short description: {e} for tour_id: {tour_id}")
        except Exception as e:
            print(f"Unexpected error getting short description: {e} for tour_id: {tour_id}")
            
        try:
            long_desc_element = WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located((By.CSS_SELECTOR, long_desc_selector))
            )
            long_description = long_desc_element.text.strip()
        except (TimeoutException, NoSuchElementException, StaleElementReferenceException) as e:
            print(f"Error getting long description: {e} for tour_id: {tour_id}")
        except Exception as e:
            print(f"Unexpected error getting long description: {e} for tour_id: {tour_id}")
            
        try:
            current_url = driver.current_url
            ld_json_data = extract_ld_json(current_url)
            extra_data = process_ld_json_data(ld_json_data)
        except Exception as e:
            print(f"Error processing JSON-LD data: {e} for tour_id: {tour_id}")
            extra_data = None
            
        try:
            best_months = extract_best_months(driver, tour_id)
        except Exception as e:
            print(f"Error extracting best months: {e} for tour_id: {tour_id}")
            best_months = None
            
        return {
            "short_description": short_description,
            "long_description": long_description,
            "images": extra_data["images"] if extra_data else [],
            "map_url": extra_data["map_url"] if extra_data else None,
            "amenities": extra_data["amenities"] if extra_data else {},
            "difficulty": extra_data["difficulty"] if extra_data else None,
            "tags": extra_data["tags"] if extra_data else [],
            "best_months": best_months
        }
        
    except Exception as e:
        print(f"Error in extract_description: {e} for tour_id: {tour_id}")
        return None

def process_tours():
    driver = None
    try:
        driver = setup_driver()
        tour_ids = get_tour_ids()
        hiking_data = []
        consent_handled = False
        
        for index, tour_id in enumerate(tour_ids):
            try:
                hiking_url = f"https://www.outdooractive.com/en/route/hiking-trail/{tour_id}"
                
                try:
                    if index == 0:
                        driver.get(hiking_url)
                        if not consent_handled:
                            handle_initial_consent(driver, tour_id=tour_id)
                            consent_handled = True
                    else:
                        driver.execute_script(f"window.open('{hiking_url}', '_blank')")
                        driver.switch_to.window(driver.window_handles[-1])
                except WebDriverException as e:
                    print(f"Error navigating to URL: {e} for tour_id: {tour_id}")
                    continue
                
                try:
                    WebDriverWait(driver, 10).until(
                        lambda d: d.current_url != hiking_url
                    )
                except TimeoutException:
                    pass
                
                try:
                    current_url = driver.current_url
                    route_type = extract_route_type(current_url)
                except Exception as e:
                    print(f"Error getting route type: {e} for tour_id: {tour_id}")
                    continue
                
                if route_type in routes_to_include:
                    try:
                        page_data = extract_description(driver, tour_id)
                        gpx_data = extract_gpx_data(tour_id)
                        
                        hike_info = {
                            "tour_id": tour_id,
                            "route_type": route_type,
                            "title": gpx_data['title'] if gpx_data else None,
                            "short_description": page_data["short_description"] if page_data else "",
                            "long_description": page_data["long_description"] if page_data else "",
                            "images": page_data["images"] if page_data else [],
                            "map_url": page_data["map_url"] if page_data else None,
                            "amenities": page_data["amenities"] if page_data else {},
                            "difficulty": page_data["difficulty"] if page_data else None,
                            "tags": page_data["tags"] if page_data else [],
                            "best_months": page_data["best_months"] if page_data else None,
                            "url": current_url,
                        }
                        hiking_data.append(hike_info)
                    except Exception as e:
                        print(f"Error processing tour data: {e} for tour_id: {tour_id}")
                        hiking_data.append({
                            "tour_id": tour_id,
                            "error": str(e),
                            "url": current_url if 'current_url' in locals() else None
                        })
                
                if (index + 1) % 1000 == 0:
                    try:
                        print(f"Processed {index + 1} routes...")
                        save_to_json(hiking_data, tour_id=tour_id)
                    except Exception as e:
                        print(f"Error saving checkpoint data: {e}")
                
                if index > 0:
                    try:
                        driver.close()
                        driver.switch_to.window(driver.window_handles[0])
                    except WebDriverException as e:
                        print(f"Error switching windows: {e}")
                        
            except Exception as e:
                print(f"Error processing tour: {e} for tour_id: {tour_id}")
                continue
                
    except Exception as e:
        print(f"Unexpected error in process_tours: {e}")
        
    finally:
        try:
            if hiking_data:
                save_to_json(hiking_data, tour_id=tour_id if 'tour_id' in locals() else 'final')
        except Exception as e:
            print(f"Error saving final data: {e}")
            
        try:
            if driver:
                driver.quit()
        except Exception as e:
            print(f"Error closing driver: {e}")

In [100]:
if __name__ == "__main__":
    process_tours()

Text not found in page source of tour_id: 129107930
Text not found in page source of tour_id: 129107928
Text not found in page source of tour_id: 129107929
Error getting short description: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF6385C80D5+2992373]
	(No symbol) [0x00007FF63825BFD0]
	(No symbol) [0x00007FF6380F590A]
	(No symbol) [0x00007FF63814926E]
	(No symbol) [0x00007FF63814955C]
	(No symbol) [0x00007FF6381927D7]
	(No symbol) [0x00007FF63816F3AF]
	(No symbol) [0x00007FF63818F584]
	(No symbol) [0x00007FF63816F113]
	(No symbol) [0x00007FF63813A918]
	(No symbol) [0x00007FF63813BA81]
	GetHandleVerifier [0x00007FF638626A2D+3379789]
	GetHandleVerifier [0x00007FF63863C32D+3468109]
	GetHandleVerifier [0x00007FF638630043+3418211]
	GetHandleVerifier [0x00007FF6383BC78B+847787]
	(No symbol) [0x00007FF63826757F]
	(No symbol) [0x00007FF638262FC4]
	(No symbol) [0x00007FF63826315D]
	(No symbol) [0x00007FF638252979]
	BaseThreadInitThunk [0x00007FFECCE3259D+29]
	RtlUserThreadStart [0x00007F