In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
!pip install googlemaps
import googlemaps
from datetime import datetime
from google.colab import userdata
!pip install beautifulsoup4
from bs4 import BeautifulSoup
!pip install itables



In [22]:
# Trekking routes organized by region
trekking_destinations =  [
        "Everest Base Camp Trek",
        "Gokyo Valley Trek",
        "Everest Three Passes Trek",
        "Everest Panorama Trek",
        "Renjo La Pass Trek",
        "Everest Base Camp Trek via Cho La Pass and Gokyo Lakes",
        "Luxury Everest Base Camp Trek",
        "Jiri to Everest Base Camp Trekking",
        "Pikey Peak Trek",
        "Everest Family Adventure with Chitwan National Park",
        "Everest Base Camp Trek Without Lukla Flight",
        "Annapurna Base Camp Trek in Comfort",
        "Annapurna Dhaulagiri Trek",
        "Annapurna Circuit Trek",
        "Annapurna Excursion",
        "Annapurna Sanctuary Trek",
        "Ghorepani Poonhill Trekking",
        "Mardi Himal Trek",
        "Dhampus Sarangkot Trek with Chitwan Jungle Safari",
        "Annapurna Base Camp Trekking",
        "Annapurna Family Adventure With Chitwan National Park",
        "Annapurna Machhapuchhre Trek",
        "Annapurna Base Camp Trekking from Pokhara",
        "Mohare Danda Trek with Khopra Ridge and Khayar Lake",
        "Nar Phu Valley Trekking",
        "Langtang Gosaikunda Trek",
        "Tamang Heritage Trek",
        "Gosaikunda Lake Trek",
        "Langtang Family Adventure with Chitwan National Park",
        "Langtang Valley Trek",
        "Manaslu Circuit Trek",
        "Upper Mustang Trek",
        "Nar Phu Valley Trek with Annapurna Circuit",
        "Tsum Valley Trek",
        "Kanchenjunga Circuit Trek",
        "Makalu Base Camp Trekking"
    ]


In [23]:
print(f"The trekking_destinations list contains {len(trekking_destinations)} items.")
print("First 5 trekking destinations:")
for i, destination in enumerate(trekking_destinations[:5]):
    print(f"- {destination}")

The trekking_destinations list contains 36 items.
First 5 trekking destinations:
- Everest Base Camp Trek
- Gokyo Valley Trek
- Everest Three Passes Trek
- Everest Panorama Trek
- Renjo La Pass Trek


In [24]:
import re

def slugify(text):
    """
    Converts a string into a URL-friendly slug.
    """
    # 1. Convert to lowercase
    text = text.lower()

    # 2. Replace spaces with hyphens
    text = text.replace(' ', '-')

    # 3. Remove all characters that are not alphanumeric or hyphens
    text = re.sub(r'[^a-z0-9-]', '', text)

    # 4. Remove any multiple consecutive hyphens, replacing them with a single hyphen
    text = re.sub(r'-+', '-', text)

    # 5. Remove leading or trailing hyphens
    text = text.strip('-')

    return text

# Test the function with a few examples from the trekking_destinations list
print("Testing slugify function:")
for i, destination in enumerate(trekking_destinations[:5]):
    slug = slugify(destination)
    print(f"Original: '{destination}' -> Slug: '{slug}'")


Testing slugify function:
Original: 'Everest Base Camp Trek' -> Slug: 'everest-base-camp-trek'
Original: 'Gokyo Valley Trek' -> Slug: 'gokyo-valley-trek'
Original: 'Everest Three Passes Trek' -> Slug: 'everest-three-passes-trek'
Original: 'Everest Panorama Trek' -> Slug: 'everest-panorama-trek'
Original: 'Renjo La Pass Trek' -> Slug: 'renjo-la-pass-trek'


In [25]:
BASE_URL = 'https://www.adventuretreksnepal.com/'
print(f"Base URL defined as: {BASE_URL}")

Base URL defined as: https://www.adventuretreksnepal.com/


In [26]:
def construct_full_url(base_url, destination_name):
    """
    Constructs a full URL using the base URL and a slugified destination name.
    """
    slug = slugify(destination_name)
    return f"{base_url}{slug}"

# Test the function with a few examples
print("Testing construct_full_url function:")
for i, destination in enumerate(trekking_destinations[:5]):
    full_url = construct_full_url(BASE_URL, destination)
    print(f"Original: '{destination}' -> URL: '{full_url}'")

Testing construct_full_url function:
Original: 'Everest Base Camp Trek' -> URL: 'https://www.adventuretreksnepal.com/everest-base-camp-trek'
Original: 'Gokyo Valley Trek' -> URL: 'https://www.adventuretreksnepal.com/gokyo-valley-trek'
Original: 'Everest Three Passes Trek' -> URL: 'https://www.adventuretreksnepal.com/everest-three-passes-trek'
Original: 'Everest Panorama Trek' -> URL: 'https://www.adventuretreksnepal.com/everest-panorama-trek'
Original: 'Renjo La Pass Trek' -> URL: 'https://www.adventuretreksnepal.com/renjo-la-pass-trek'


In [27]:
all_destination_urls = []
for destination in trekking_destinations:
    full_url = construct_full_url(BASE_URL, destination)
    all_destination_urls.append(full_url)

print(f"Generated {len(all_destination_urls)} URLs.")
print("First 5 generated URLs:")
for url in all_destination_urls[:5]:
    print(f"- {url}")

Generated 36 URLs.
First 5 generated URLs:
- https://www.adventuretreksnepal.com/everest-base-camp-trek
- https://www.adventuretreksnepal.com/gokyo-valley-trek
- https://www.adventuretreksnepal.com/everest-three-passes-trek
- https://www.adventuretreksnepal.com/everest-panorama-trek
- https://www.adventuretreksnepal.com/renjo-la-pass-trek


**Reasoning**:
Now that all destination URLs are prepared, the next step is to fetch the HTML content from each URL using the `requests` library, incorporating error handling for robust scraping.



In [28]:
import requests

def fetch_html_content(url):
    """
    Fetches HTML content from a given URL with error handling.
    """
    try:
        response = requests.get(url, timeout=10) # Set a timeout for the request
        response.raise_for_status()  # Raise an HTTPError for bad responses (4xx or 5xx)
        print(f"Successfully fetched HTML from: {url}")
        return response.text
    except requests.exceptions.HTTPError as errh:
        print(f"HTTP Error fetching {url}: {errh}")
    except requests.exceptions.ConnectionError as errc:
        print(f"Error Connecting to {url}: {errc}")
    except requests.exceptions.Timeout as errt:
        print(f"Timeout Error fetching {url}: {errt}")
    except requests.exceptions.RequestException as err:
        print(f"An error occurred fetching {url}: {err}")
    return None

print("Defined function to fetch HTML content with error handling.")

Defined function to fetch HTML content with error handling.


**Reasoning**:
Now, I will iterate through the list of all destination URLs, fetch the HTML content for each using the `fetch_html_content` function, and display the first 500 characters of the HTML for each successful request.



In [29]:


print("Fetching HTML content for each destination URL...")

# FIX 1: Initialize as a LIST to hold multiple trip dictionaries
all_extracted_data_list = []

for i, url in enumerate(all_destination_urls):
    print(f"\n--- Processing URL {i+1}/{len(all_destination_urls)}: {url} ---")
    html_content = fetch_html_content(url)

    if html_content:
        soup = BeautifulSoup(html_content, 'html.parser')

        # FIX 2: Create a temporary dictionary for THIS specific trip
        current_trip = {}
        current_trip["url"] = url # Good practice to keep the source URL

        # --- Extract Title ---
        if soup.h1:
            title_text = soup.h1.get_text()
            current_trip["title"] = title_text.split('-')[0].strip()

        # --- Extract Trip Facts (Details) ---
        details = soup.find('ul', class_='trip-fact')
        if details:
            details_items = details.find_all('li')
            for item in details_items:
                # Use recursive=False to get only the label text
                label = item.find(text=True, recursive=False).strip()
                value_span = item.find('span', class_='block')

                if value_span:
                    # Get value without tooltip text
                    value = value_span.find(text=True, recursive=False).strip()
                    current_trip[label] = value

        # --- Review Count  ---
        rating_div = soup.find('div', class_='pack__rating')
        if rating_div and rating_div.find('b'):
            # Extracts "140" from "140 reviews"
            current_trip["review_count"] = rating_div.find('b').get_text(strip=True).split()[0]


        # --- Extract Images ---
        images = soup.find_all('div', class_='hero-image')

        if images:
            current_trip["images"] = [
                img['src']
                for div in images
                for img in div.find_all('img')
                if img.get('src')
            ]

        # --- Extract Itinerary ---
        days = {}
        itenary_container = soup.find('div', id='itinerary_accordion')
        if itenary_container:
            itenary_items = itenary_container.find_all('div', class_='accordion-item')
            for item in itenary_items:
                span = item.find('span')
                if span and span.find('b'):
                    day_key = span.find('b').get_text(strip=True).replace(':', '')
                    day_description = span.find('b').next_sibling
                    if day_description:
                        days[day_key] = day_description.strip()

            current_trip["itinerary"] = days



        # --- Extract Highlights ---
        highlights_heading = soup.find('h2', string='Highlights')
        if highlights_heading:
            highlights_ul = highlights_heading.find_next_sibling('ul')
            if highlights_ul:
                current_trip["highlights"] = [li.get_text(strip=True) for li in highlights_ul.find_all('li')]



        # FIX 3: Append the current_trip dictionary to the list
        all_extracted_data_list.append(current_trip)

    else:
        print(f"No HTML content retrieved for: {url}")

# FIX 5: Move DataFrame creation OUTSIDE the loop
df_extracted = pd.DataFrame(all_extracted_data_list)


Fetching HTML content for each destination URL...

--- Processing URL 1/36: https://www.adventuretreksnepal.com/everest-base-camp-trek ---
Successfully fetched HTML from: https://www.adventuretreksnepal.com/everest-base-camp-trek

--- Processing URL 2/36: https://www.adventuretreksnepal.com/gokyo-valley-trek ---


  label = item.find(text=True, recursive=False).strip()
  value = value_span.find(text=True, recursive=False).strip()


Successfully fetched HTML from: https://www.adventuretreksnepal.com/gokyo-valley-trek

--- Processing URL 3/36: https://www.adventuretreksnepal.com/everest-three-passes-trek ---
Successfully fetched HTML from: https://www.adventuretreksnepal.com/everest-three-passes-trek

--- Processing URL 4/36: https://www.adventuretreksnepal.com/everest-panorama-trek ---
Successfully fetched HTML from: https://www.adventuretreksnepal.com/everest-panorama-trek

--- Processing URL 5/36: https://www.adventuretreksnepal.com/renjo-la-pass-trek ---
Successfully fetched HTML from: https://www.adventuretreksnepal.com/renjo-la-pass-trek

--- Processing URL 6/36: https://www.adventuretreksnepal.com/everest-base-camp-trek-via-cho-la-pass-and-gokyo-lakes ---
Successfully fetched HTML from: https://www.adventuretreksnepal.com/everest-base-camp-trek-via-cho-la-pass-and-gokyo-lakes

--- Processing URL 7/36: https://www.adventuretreksnepal.com/luxury-everest-base-camp-trek ---
Successfully fetched HTML from: https:

In [30]:
df_extracted.to_csv("AdventursNepal.csv")

In [31]:
from itables import init_notebook_mode
init_notebook_mode(all_interactive=False)

In [32]:
df_extracted.head()

Unnamed: 0,url,title,Duration,Trip Grade,Country,Maximum Altitude,Group Size,Starts,Ends,Activities,Best Time,review_count,images,itinerary,highlights
0,https://www.adventuretreksnepal.com/everest-ba...,Everest Base Camp Trek,16 Days,Strenuous,Nepal,"5,364 m.",01-16,Kathmandu,Kathmandu,Tour / Trekking,Sep-May,140,[https://media.adventuretreksnepal.com/uploads...,"{'Day 01': 'Arrival in Kathmandu [1,338m] Tran...",[Embark on a bucket-list adventure and a journ...
1,https://www.adventuretreksnepal.com/gokyo-vall...,Gokyo Valley Trek,14 Days,Strenuous,Nepal,5360,,Kathmandu,Kathmandu,Trekking,September to May,21,[https://media.adventuretreksnepal.com/uploads...,{'Day 01': 'Welcome to Kathmandu and transfer ...,[Witness spectacular views of Mount Everest an...
2,https://www.adventuretreksnepal.com/everest-th...,Everest Three Passes Trek,21 Days,Strenuous,Nepal,5545,,Kathmandu,Kathmandu,Tour / Trekking,September to May,14,[https://media.adventuretreksnepal.com/uploads...,{'Day 01': 'Welcome to Kathmandu and transfer ...,[Cross the three high mountain passes(Kongma L...
3,https://www.adventuretreksnepal.com/everest-pa...,Everest Panorama Trek,12 Days,Moderate,Nepal,3860,,Kathmandu,Kathmandu,Tour / Trekking,September to May,22,[https://media.adventuretreksnepal.com/uploads...,{'Day 01': 'Welcome to Kathmandu and transfer ...,[A perfect short and easy adventure in the Khu...
4,https://www.adventuretreksnepal.com/renjo-la-p...,Renjo La Pass Trek,14 Days,Strenuous,Nepal,5360,,Kathmandu,Kathmandu,Trekking,September to May,5,[https://media.adventuretreksnepal.com/uploads...,{'Day 01': 'Welcome to Kathmandu and transfer ...,[A fresh high pass trekking program in the Eve...
