In [1]:
# Cell 1: Import Libraries
import requests
from bs4 import BeautifulSoup
import logging
import csv
import time

In [2]:
# Cell 2: Set up URL and Headers
url = 'https://www.tripadvisor.com/Attractions-g293961-Activities-c61-t243-Sri_Lanka.html'

headers = {
    "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.5",
    # Note: Removed Accept-Encoding to fix garbled text issue
    "DNT": "1",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1",
}

print("Headers configured successfully")

Headers configured successfully


In [3]:
# Cell 3: Create Session and Make Request
session = requests.Session()
session.headers.update(headers)

try:
    response = session.get(url, timeout=10)
    response.raise_for_status()
    
    print(f"✅ Status Code: {response.status_code}")
    print(f"📦 Content Encoding: {response.headers.get('Content-Encoding', 'None')}")
    print(f"📄 Content Type: {response.headers.get('Content-Type', 'None')}")
    print(f"📏 Content Length: {len(response.content)} bytes")
    
except requests.exceptions.RequestException as e:
    print(f"❌ Request failed: {e}")

✅ Status Code: 200
📦 Content Encoding: gzip
📄 Content Type: text/html; charset=utf-8
📏 Content Length: 1979930 bytes


In [4]:
# Cell 4: Handle Text Encoding
# Ensure proper encoding to avoid garbled text
response.encoding = response.apparent_encoding or 'utf-8'

print(f"🔤 Detected encoding: {response.encoding}")
print(f"📝 Response text length: {len(response.text)} characters")

🔤 Detected encoding: utf-8
📝 Response text length: 1979799 characters


In [5]:
# Cell 5: Parse with BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

print("🍲 BeautifulSoup parsing completed")
print(f"📊 Found {len(soup.find_all())} HTML elements")

🍲 BeautifulSoup parsing completed
📊 Found 7121 HTML elements


In [6]:
# Cell 6: Test the Parsed Content
# Display first 500 characters to verify it's readable
print("🔍 First 500 characters of parsed content:")
print("=" * 50)
print(soup.prettify()[:500])
print("=" * 50)

🔍 First 500 characters of parsed content:
<!DOCTYPE html>
<html lang="en-US">
 <head>
  <link href="https://static.tacdn.com/img2/brand_refresh_2025/application_icons/favicon_2025.ico" id="favicon" rel="icon" type="image/x-icon"/>
  <link href="https://static.tacdn.com/img2/brand_refresh_2025/application_icons/icon.svg" rel="icon" type="image/svg+xml"/>
  <link href="https://static.tacdn.com/img2/brand_refresh_2025/application_icons/apple_touch_icon.png" rel="apple-touch-icon" sizes="180x180"/>
  <link color="#00210c" href="https://stat


In [7]:
with open("scraped_page.txt", "w", encoding="utf-8") as file:
    file.write(soup.prettify())

In [8]:
# Cell 7: Check for Common TripAdvisor Elements
# Test if we can find typical TripAdvisor elements
card_name = soup.find('h3')
if card_name:
    print(f"🏪 found: {card_name.get_text().strip()}")
else:
    print("⚠️ not found - might be blocked or structure changed")

🏪 found: 1. Private Jeep Safari at Minneriya National Park to Visit Elephants


In [9]:
general_infos = soup.find('div', class_='biGQs _P pZUbB hmDzD').text.strip()
print(general_infos)

AttributeError: 'NoneType' object has no attribute 'text'

In [None]:
sections = soup.find_all('section', class_='mowmC')

In [None]:
sections = soup.find_all('section', class_='mowmC')

for section in sections:
    title = section.find('h3')
    rating = section.find('div', class_='biGQs _P pZUbB hmDzD')
    recomendation = section.find_all('span', class_='biGQs _P pZUbB egaXP hmDzD')
    
    others = section.find_all('div', class_='biGQs _P pZUbB hmDzD')

    # Initialize variables as None (or some default value)
    total_reviews = types = duration = caution = None

    if len(others) > 1:
        total_reviews = others[1]
    if len(others) > 2:
        types = others[2]
    if len(others) > 3:
        duration = others[3]
    if len(others) > 4:
        caution = others[4]

    print(f"Title: {title.text}")
    print(f"Rating: {rating.text}") 
    print(f"Recommendation: {recomendation[1].text}")
    print(f"Total Reviews: {total_reviews.text.strip()}")
    print(f"Types: {types.text.strip()}")
    print(f"Duration: {duration.text.strip()}")
    print(f"Caution: {caution.text.strip()}")
    print("")


Title: 1. Private Jeep Safari at Minneriya National Park to Visit Elephants
Rating: 4.9
Recommendation: Recommended by 98% of travelers
Total Reviews: 210
Types: 4WD Tours
Duration: 3–4 hours
Caution: Attention: According to the weather conditions, Minneriya National Park is the best these days. You can see a huge number…

Title: 2. Special leopards safari Yala National park - 04.30 am to 11.30 am
Rating: 4.9
Recommendation: Recommended by 97% of travelers
Total Reviews: 154
Types: Private and Luxury
Duration: 6+ hours
Caution: People who like to see more wild life this is the perfect tour with janaka safari,

we do our best for the leopard. start…

Title: 3. Wilpattu National Park Jeep Safari from Negombo / Waikkal (All Inclusive)
Rating: 4.9
Recommendation: Recommended by 98% of travelers
Total Reviews: 83
Types: 4WD Tours
Duration: 10–16 hours
Caution: Today you will enjoy an adventurous full-day safari at Wilpattu National Park including breakfast and lunch at the Park. …



AttributeError: 'NoneType' object has no attribute 'text'

In [None]:
headers = soup.find_all('h3')

for header in headers:
    print(header.text)

1. Private Jeep Safari at Minneriya National Park to Visit Elephants
2. Special leopards safari Yala National park - 04.30 am to 11.30 am
3. Wilpattu National Park Jeep Safari from Negombo / Waikkal (All Inclusive)
4. Yala National Park Wildlife Safari/ Game Drive - by Ajith Safari
5. Full day / Half Day WildlifeAdventure in Yala-National-Park 
6. 2h Bentota River Safari: Crocodiles, Monkeys, Mangrove & more
7. Udawalawe National Park Half Day Jeep Safari (Private Tour)
8. Minneriya National Park halfday Safari
9. Udawalawe Safari DayTrip frm Bentota/Kalutara/Ahungalla/Hikkaduwa
10. Negombo Lagoon Boat Safari From Negombo
11. Yala National Park Special Leopard Safari Tours By Dinuka Safari
12. Private Jeep Safari at Minneriya National Park - Half Day Tour
13. Full day Safari - Yala National Park - 04.30 am to 06.00 pm with - Janaka safari
14. Ella: Transfer to Tangalle/Mirissa/Galle & Yala/Udawalawe Safari
15. Udawalawe National Park | Private Safari Tour 
16. Wilpattu National Park Sa

In [None]:
# Cell 8: Clean up
session.close()
print("🔒 Session closed successfully")