WEB SCRAPING-ASSIGNMENT3

1. Write a python program which searches all the product under a particular product from www.amazon.in. The
product to be searched will be taken as input from user. For e.g. If user input is ‘guitar’. Then search for
guitars

In [12]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
import time

# Initialize the WebDriver
driver = webdriver.Chrome()

def search_amazon(product):
    # Go to Amazon.in
    driver.get("https://www.amazon.in")

    # Find the search bar and enter the product name
    search_box = driver.find_element(By.ID, "twotabsearchtextbox")
    search_box.send_keys(product)
    search_box.send_keys(Keys.RETURN)  # Hit Enter

    # Wait for the page to load
    time.sleep(3)  # Adjust time as needed for slower networks

    # Fetch the product results
    product_titles = driver.find_elements(By.XPATH, "//span[@class='a-size-medium a-color-base a-text-normal']")

    print(f"Found {len(product_titles)} products for '{product}':\n")
    for index, title in enumerate(product_titles[:10]):  # Display the first 10 products
        print(f"{index + 1}. {title.text}")

# Take input from the user
product_to_search = input("Enter the product to search on Amazon: ")

# Perform the search
search_amazon(product_to_search)

# Close the driver after scraping
driver.quit()


Enter the product to search on Amazon:  guitar


Found 2 products for 'guitar':

1. Naayaab Craft Guitar Xylophone with 5 Tones, Musical Toy for Kids with Child Safe Mallets, Best Educational Development Musical Kid Toy as Best Holiday/Birthday Gift for Your Mini Musicians, 5 Knocks
2. East top Harmonica C, Diatonic Harmonica Key of C 10 Holes 20 Tones Mouth Organ Blues Harp Harmonica For Adults, Kids, Beginners, Professionals and Students


2. In the above question, now scrape the following details of each product listed in first 3 pages of your search
results and save it in a data frame and csv. In case if any product has less than 3 pages in search results then
scrape all the products available under that product name. Details to be scraped are: "Brand
Name", "Name of the Product", "Price", "Return/Exchange", "Expected Delivery", "Availability" and
“Product URL”. In case, if any of the details are missing for any of the product then replace it by “-“. 

In [17]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException

# Initialize the WebDriver
driver = webdriver.Chrome()

# Function to extract product details from a single page
def extract_product_details():
    products = []

    # Get all product containers
    product_containers = driver.find_elements(By.XPATH, "//div[@data-component-type='s-search-result']")

    for container in product_containers:
        try:
            # Extract brand name
            brand_name = container.find_element(By.XPATH, ".//span[@class='a-size-base-plus a-color-base']").text
        except NoSuchElementException:
            brand_name = "-"
        
        try:
            # Extract product name
            product_name = container.find_element(By.XPATH, ".//span[@class='a-size-medium a-color-base a-text-normal']").text
        except NoSuchElementException:
            product_name = "-"
        
        try:
            # Extract price
            price = container.find_element(By.XPATH, ".//span[@class='a-price-whole']").text
        except NoSuchElementException:
            price = "-"
        
        try:
            # Extract return/exchange information
            return_exchange = container.find_element(By.XPATH, ".//span[contains(text(),'Free Return')]").text
        except NoSuchElementException:
            return_exchange = "-"
        
        try:
            # Extract expected delivery date
            expected_delivery = container.find_element(By.XPATH, ".//span[contains(text(),'Get it by')]").text
        except NoSuchElementException:
            expected_delivery = "-"
        
        try:
            # Extract availability (availability tag not always present)
            availability = container.find_element(By.XPATH, ".//span[contains(text(),'In stock')]").text
        except NoSuchElementException:
            availability = "-"
        
        try:
            # Extract product URL
            product_url = container.find_element(By.XPATH, ".//a[@class='a-link-normal s-no-outline']").get_attribute("href")
        except NoSuchElementException:
            product_url = "-"

        # Append the details to the list
        products.append({
            "Brand Name": brand_name,
            "Name of the Product": product_name,
            "Price": price,
            "Return/Exchange": return_exchange,
            "Expected Delivery": expected_delivery,
            "Availability": availability,
            "Product URL": product_url
        })

    return products

# Function to scrape multiple pages of products
def scrape_amazon(product, num_pages=3):
    all_products = []
    
    # Open Amazon and search for the product
    driver.get("https://www.amazon.in")
    search_box = driver.find_element(By.ID, "twotabsearchtextbox")
    search_box.send_keys(product)
    search_box.submit()

    # Iterate through the pages and scrape the data
    for page in range(1, num_pages + 1):
        time.sleep(3)  # Wait for the page to load

        # Extract details from the current page
        all_products.extend(extract_product_details())

        try:
            # Go to the next page
            next_page = driver.find_element(By.XPATH, "//a[contains(@class, 's-pagination-next')]")
            next_page.click()
        except NoSuchElementException:
            print(f"Less than {num_pages} pages found. Scraping completed.")
            break

    return all_products

# Main function to start scraping and save data to a CSV
def main():
    # Take input from the user
    product_to_search = input("Enter the product to search on Amazon: ")

    # Scrape product details for the first 3 pages (or less if fewer pages exist)
    scraped_data = scrape_amazon(product_to_search, num_pages=3)

    # Convert to a DataFrame
    df = pd.DataFrame(scraped_data)

    # Save the data to a CSV file
    csv_filename = f"{product_to_search}_amazon_products.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Scraped data saved to {csv_filename}")

    # Close the browser
    driver.quit()

# Run the script
if __name__ == "__main__":
    main()


Enter the product to search on Amazon:  guitar


Scraped data saved to guitar_amazon_products.csv


3. Write a python program to access the search bar and search button on images.google.com and scrape 10
images each for keywords ‘fruits’, ‘cars’ and ‘Machine Learning’, ‘Guitar’, ‘Cakes’. 

In [22]:
import os
import time
import requests
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

# Initialize the WebDriver
driver = webdriver.Chrome()

# Function to download and save images
def download_images(image_urls, folder_name):
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    for i, url in enumerate(image_urls):
        try:
            image_content = requests.get(url).content
            image_file = BytesIO(image_content)
            image = Image.open(image_file)
            image_filename = os.path.join(folder_name, f"{folder_name}_{i + 1}.jpg")
            image.save(image_filename, "JPEG")
            print(f"Downloaded {image_filename}")
        except Exception as e:
            print(f"Could not download image {i + 1}: {e}")

# Function to scrape images from Google Images for a given search term
def scrape_google_images(search_term, num_images=10):
    search_url = f"https://images.google.com/?q={search_term}"
    driver.get(search_url)

    # Accept Google consent pop-up if it appears
    time.sleep(2)
    try:
        accept_button = driver.find_element(By.XPATH, "//button[contains(text(),'I agree')]")
        accept_button.click()
    except:
        pass  # If no pop-up appears, continue scraping

    # Find the search bar and input the search term
    search_box = driver.find_element(By.NAME, "q")
    search_box.clear()
    search_box.send_keys(search_term)
    search_box.submit()

    time.sleep(2)  # Wait for the page to load

    # Scroll down to load more images (optional)
    for _ in range(2):  # Scroll multiple times to load more images
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)

    # Collect image URLs
    image_elements = driver.find_elements(By.CSS_SELECTOR, "img.rg_i")
    image_urls = []
    for image_element in image_elements[:num_images]:  # Only take the first `num_images` images
        try:
            image_element.click()
            time.sleep(1)  # Allow time for the larger image to load
            larger_image_element = driver.find_element(By.CSS_SELECTOR, "img.n3VNCb")
            image_url = larger_image_element.get_attribute("src")
            if image_url and "http" in image_url:
                image_urls.append(image_url)
        except Exception as e:
            print(f"Error while fetching image: {e}")
            continue

    # Download and save the images
    download_images(image_urls, search_term)

# Main function to scrape images for different search terms
def main():
    search_terms = ["fruits", "cars", "Machine Learning", "Guitar", "Cakes"]

    for term in search_terms:
        print(f"Scraping images for: {term}")
        scrape_google_images(term, num_images=10)

    # Close the browser after scraping
    driver.quit()

# Run the script
if __name__ == "__main__":
    main()


Scraping images for: fruits
Scraping images for: cars
Scraping images for: Machine Learning
Scraping images for: Guitar
Scraping images for: Cakes


4. Write a python program to search for a smartphone(e.g.: Oneplus Nord, pixel 4A, etc.) on www.flipkart.com
and scrape following details for all the search results displayed on 1st page. Details to be scraped: “Brand
Name”, “Smartphone name”, “Colour”, “RAM”, “Storage(ROM)”, “Primary Camera”,
“Secondary Camera”, “Display Size”, “Battery Capacity”, “Price”, “Product URL”. Incase if any of the
details is missing then replace it by “- “. Save your results in a dataframe and CSV. 

In [33]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException

# Initialize the WebDriver
driver = webdriver.Chrome()

# Function to extract smartphone details
def extract_smartphone_details():
    smartphones = []
    
    # Get all smartphone containers
    product_containers = driver.find_elements(By.XPATH, "//div[@class='_1AtVbE']")
    
    for container in product_containers:
        try:
            # Extract the smartphone name
            smartphone_name = container.find_element(By.XPATH, ".//a[@class='IRpwTa']").text
        except NoSuchElementException:
            smartphone_name = "-"
        
        try:
            # Extract brand name
            brand_name = smartphone_name.split()[0]  # First word is often the brand
        except:
            brand_name = "-"
        
        try:
            # Extract color and RAM/Storage information
            desc = container.find_element(By.XPATH, ".//ul[@class='_1xgFaf']").text
            details = desc.split("\n")
            color = details[0].split(", ")[-1]
            ram = details[1].split("|")[0].strip()
            storage = details[1].split("|")[1].strip()
        except NoSuchElementException:
            color, ram, storage = "-", "-", "-"

        try:
            # Extract primary and secondary camera
            cameras = details[2].split("|")
            primary_camera = cameras[0].strip()
            secondary_camera = cameras[1].strip() if len(cameras) > 1 else "-"
        except:
            primary_camera, secondary_camera = "-", "-"
        
        try:
            # Extract display size
            display_size = details[3].strip()
        except:
            display_size = "-"

        try:
            # Extract battery capacity
            battery = details[4].strip() if len(details) > 4 else "-"
        except:
            battery = "-"
        
        try:
            # Extract price
            price = container.find_element(By.XPATH, ".//div[@class='_30jeq3']").text.replace("₹", "")
        except NoSuchElementException:
            price = "-"

        try:
            # Extract product URL
            product_url = container.find_element(By.XPATH, ".//a[@class='_1fQZEK']").get_attribute("href")
        except NoSuchElementException:
            product_url = "-"

        # Append the scraped details to the list
        smartphones.append({
            "Brand Name": brand_name,
            "Smartphone Name": smartphone_name,
            "Colour": color,
            "RAM": ram,
            "Storage(ROM)": storage,
            "Primary Camera": primary_camera,
            "Secondary Camera": secondary_camera,
            "Display Size": display_size,
            "Battery Capacity": battery,
            "Price": price,
            "Product URL": product_url
        })

    return smartphones

# Function to scrape smartphone details from Flipkart
def scrape_flipkart(smartphone, num_pages=1):
    # Open Flipkart and search for the smartphone
    driver.get("https://www.flipkart.com")
    time.sleep(2)  # Allow page to load
    
    # Close login pop-up if it appears
    try:
        close_login = driver.find_element(By.XPATH, "//button[contains(text(), '✕')]")
        close_login.click()
    except NoSuchElementException:
        pass

    # Enter the smartphone into the search bar
    search_box = driver.find_element(By.NAME, "q")
    search_box.send_keys(smartphone)
    search_box.submit()
    time.sleep(2)  # Allow search results to load
    
    # Scrape the details from the first page
    smartphones = extract_smartphone_details()

    return smartphones

# Main function to scrape and save data into a CSV
def main():
    # Take input from the user
    smartphone_to_search = input("Enter the smartphone to search on Flipkart: ")

    # Scrape smartphone details from the first page
    scraped_data = scrape_flipkart(smartphone_to_search)

    # Convert to a DataFrame
    df = pd.DataFrame(scraped_data)

    # Save the data to a CSV file
    csv_filename = f"{smartphone_to_search}_flipkart_smartphones.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Scraped data saved to {csv_filename}")

    # Close the browser
    driver.quit()

# Run the script
if __name__ == "__main__":
    main()


Enter the smartphone to search on Flipkart:  samsung mobile 


Scraped data saved to samsung mobile _flipkart_smartphones.csv


5. Write a program to scrap geospatial coordinates (latitude, longitude) of a city searched on google maps. 


In [36]:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

# Initialize the WebDriver
driver = webdriver.Chrome()

# Function to get latitude and longitude from Google Maps
def get_geospatial_coordinates(city_name):
    # Open Google Maps
    driver.get("https://www.google.com/maps")
    time.sleep(2)  # Allow page to load
    
    # Search for the city
    search_box = driver.find_element(By.ID, "searchboxinput")
    search_box.send_keys(city_name)
    search_box.submit()
    time.sleep(3)  # Wait for the search results to load
    
    # Get the URL with the coordinates in it
    current_url = driver.current_url
    time.sleep(2)
    
    # Extract the latitude and longitude from the URL
    try:
        # The URL contains a string like "@12.9715987,77.5945627" for the coordinates
        coords_section = current_url.split('@')[1].split(',')[:2]
        latitude, longitude = coords_section[0], coords_section[1]
        print(f"Coordinates of {city_name}:")
        print(f"Latitude: {latitude}")
        print(f"Longitude: {longitude}")
    except IndexError:
        print(f"Could not extract coordinates for {city_name}.")
    
# Main function to run the script
def main():
    city = input("Enter the city name: ")
    get_geospatial_coordinates(city)
    
    # Close the browser
    driver.quit()

# Run the script
if __name__ == "__main__":
    main()


Enter the city name:  chh sambhaji nagar


Coordinates of chh sambhaji nagar:
Latitude: 19.8836224
Longitude: 75.2975872


6. Write a program to scrap all the available details of best gaming laptops from digit.in.

In [41]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

# Initialize the WebDriver
driver = webdriver.Chrome()

# Function to extract gaming laptop details
def extract_laptop_details():
    laptops = []
    
    # Load the Digit.in page for the best gaming laptops
    driver.get("https://www.digit.in/top-products/best-gaming-laptops-40.html")
    time.sleep(3)  # Allow the page to load
    
    # Use BeautifulSoup to parse the page source
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    # Find all laptop containers
    laptop_containers = soup.find_all('div', class_='TopNumbeHeading sticky-footer')
    
    for container in laptop_containers:
        try:
            # Extract laptop name
            laptop_name = container.find('h3').text.strip()
        except:
            laptop_name = "-"
        
        try:
            # Extract laptop specifications
            laptop_specs = container.find('div', class_='Specs').text.strip()
        except:
            laptop_specs = "-"
        
        try:
            # Extract laptop price
            laptop_price = container.find('div', class_='Price').text.strip().replace('₹', '')
        except:
            laptop_price = "-"
        
        try:
            # Extract more details about the laptop
            details_link = container.find('a', href=True)['href']
            product_url = f"https://www.digit.in{details_link}"
        except:
            product_url = "-"
        
        # Append the scraped details to the list
        laptops.append({
            "Laptop Name": laptop_name,
            "Specifications": laptop_specs,
            "Price": laptop_price,
            "Product URL": product_url
        })

    return laptops

# Main function to scrape and save data
def main():
    # Scrape laptop details
    laptop_data = extract_laptop_details()
    
    # Convert to a DataFrame
    df = pd.DataFrame(laptop_data)
    
    # Save the data to a CSV file
    csv_filename = "best_gaming_laptops_digit.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Scraped data saved to {csv_filename}")
    
    # Close the browser
    driver.quit()

# Run the script
if __name__ == "__main__":
    main()


Scraped data saved to best_gaming_laptops_digit.csv


7. Write a python program to scrape the details for all billionaires from www.forbes.com. Details to be scrapped:
“Rank”, “Name”, “Net worth”, “Age”, “Citizenship”, “Source”, “Industry”. 

In [44]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

# Initialize the WebDriver
driver = webdriver.Chrome()


# Function to scrape billionaire details
def extract_billionaire_details():
    billionaires = []
    
    # Load the Forbes Billionaires page
    driver.get("https://www.forbes.com/billionaires/")
    time.sleep(5)  # Allow the page to load
    
    # Scroll the page to load all billionaires
    last_height = driver.execute_script("return document.body.scrollHeight")
    
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Wait for new content to load
        
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    
    # Use BeautifulSoup to parse the loaded page
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    # Find all billionaire containers
    billionaire_containers = soup.find_all('div', class_='personName')
    
    for container in billionaire_containers:
        try:
            # Extract the rank
            rank = container.find('div', class_='rank').text.strip()
        except:
            rank = "-"
        
        try:
            # Extract the name
            name = container.find('div', class_='personName').text.strip()
        except:
            name = "-"
        
        try:
            # Extract the net worth
            net_worth = container.find('div', class_='netWorth').text.strip()
        except:
            net_worth = "-"
        
        try:
            # Extract the age
            age = container.find('div', class_='age').text.strip()
        except:
            age = "-"
        
        try:
            # Extract citizenship
            citizenship = container.find('div', class_='countryOfCitizenship').text.strip()
        except:
            citizenship = "-"
        
        try:
            # Extract source of wealth
            source = container.find('div', class_='source').text.strip()
        except:
            source = "-"
        
        try:
            # Extract the industry
            industry = container.find('div', class_='category').text.strip()
        except:
            industry = "-"
        
        # Append the scraped details to the list
        billionaires.append({
            "Rank": rank,
            "Name": name,
            "Net Worth": net_worth,
            "Age": age,
            "Citizenship": citizenship,
            "Source": source,
            "Industry": industry
        })

    return billionaires

# Main function to scrape and save data
def main():
    # Scrape billionaire details
    billionaire_data = extract_billionaire_details()
    
    # Convert to a DataFrame
    df = pd.DataFrame(billionaire_data)
    
    # Save the data to a CSV file
    csv_filename = "forbes_billionaires.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Scraped data saved to {csv_filename}")
    
    # Close the browser
    driver.quit()

# Run the script
if __name__ == "__main__":
    main()


Scraped data saved to forbes_billionaires.csv


8. Write a program to extract at least 500 Comments, Comment upvote and time when comment was posted
from any YouTube Video.

In [49]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

# Set up Chrome WebDriver
driver = webdriver.Chrome()

# Function to scroll and load comments
def scroll_and_load_comments():
    # Scroll to load comments
    driver.execute_script("window.scrollTo(0, 600);")
    time.sleep(3)
    
    # Keep scrolling until at least 500 comments are loaded
    comment_count = 0
    while comment_count < 500:
        # Scroll down by a certain amount
        driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
        time.sleep(2)
        comment_count = len(driver.find_elements(By.XPATH, '//*[@id="content-text"]'))

# Function to extract comments and details
def extract_comments():
    comments = []
    
    # Get all comment elements
    comment_elements = driver.find_elements(By.XPATH, '//*[@id="content-text"]')
    upvote_elements = driver.find_elements(By.XPATH, '//*[@id="vote-count-middle"]')
    time_elements = driver.find_elements(By.XPATH, '//*[@id="header-author"]/yt-formatted-string/a')
    
    # Iterate over the comments and extract details
    for i in range(min(500, len(comment_elements))):
        try:
            comment_text = comment_elements[i].text
        except:
            comment_text = "-"
        
        try:
            upvotes = upvote_elements[i].text if upvote_elements[i].text else "0"
        except:
            upvotes = "-"
        
        try:
            time_posted = time_elements[i].text
        except:
            time_posted = "-"
        
        # Append to the list
        comments.append({
            "Comment": comment_text,
            "Upvotes": upvotes,
            "Time Posted": time_posted
        })
    
    return comments

# Main function to scrape YouTube comments
def scrape_youtube_comments(video_url):
    # Load the YouTube video page
    driver.get(video_url)
    time.sleep(5)  # Allow the page to load
    
    # Scroll and load more comments
    scroll_and_load_comments()
    
    # Extract comments and details
    comments_data = extract_comments()
    
    # Convert to DataFrame
    df = pd.DataFrame(comments_data)
    
    # Save the data to a CSV file
    csv_filename = "youtube_comments.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Scraped data saved to {csv_filename}")
    
    # Close the browser
    driver.quit()

# Run the script
if __name__ == "__main__":
    video_url = input("Enter the YouTube video URL: ")
    scrape_youtube_comments(video_url)


Enter the YouTube video URL:  https://www.youtube.com/watch?v=gPpQNzQP6gE


Scraped data saved to youtube_comments.csv


9. Write a python program to scrape a data for all available Hostels from https://www.hostelworld.com/ in
“London” location. You have to scrape hostel name, distance from city centre, ratings, total reviews, overall
reviews, privates from price, dorms from price, facilities and property description. 

In [53]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

# Set up Chrome WebDriver
driver = webdriver.Chrome()

# Function to extract hostel details
def extract_hostel_details():
    hostels = []
    
    # Load the Hostelworld page for London
    driver.get("https://www.hostelworld.com/hostels/london")
    time.sleep(5)  # Allow the page to load
    
    # Scroll to load more hostels
    last_height = driver.execute_script("return document.body.scrollHeight")
    
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    
    # Find all hostel containers
    hostel_containers = driver.find_elements(By.CLASS_NAME, 'property-card')
    
    for container in hostel_containers:
        try:
            # Extract hostel name
            name = container.find_element(By.CLASS_NAME, 'property-card-title').text.strip()
        except:
            name = "-"
        
        try:
            # Extract distance from city center
            distance = container.find_element(By.CLASS_NAME, 'distance').text.strip()
        except:
            distance = "-"
        
        try:
            # Extract ratings
            rating = container.find_element(By.CLASS_NAME, 'score').text.strip()
        except:
            rating = "-"
        
        try:
            # Extract total reviews
            total_reviews = container.find_element(By.CLASS_NAME, 'reviews').text.strip()
        except:
            total_reviews = "-"
        
        try:
            # Extract overall reviews
            overall_reviews = container.find_element(By.CLASS_NAME, 'overall-reviews').text.strip()
        except:
            overall_reviews = "-"
        
        try:
            # Extract privates from price
            private_price = container.find_element(By.XPATH, './/span[contains(@class, "private-price")]').text.strip()
        except:
            private_price = "-"
        
        try:
            # Extract dorms from price
            dorm_price = container.find_element(By.XPATH, './/span[contains(@class, "dorm-price")]').text.strip()
        except:
            dorm_price = "-"
        
        try:
            # Extract facilities
            facilities = container.find_element(By.CLASS_NAME, 'facilities').text.strip()
        except:
            facilities = "-"
        
        try:
            # Extract property description
            description = container.find_element(By.CLASS_NAME, 'property-card-description').text.strip()
        except:
            description = "-"
        
        # Append the scraped details to the list
        hostels.append({
            "Hostel Name": name,
            "Distance from City Centre": distance,
            "Ratings": rating,
            "Total Reviews": total_reviews,
            "Overall Reviews": overall_reviews,
            "Privates from Price": private_price,
            "Dorms from Price": dorm_price,
            "Facilities": facilities,
            "Property Description": description
        })

    return hostels

# Main function to scrape and save data
def main():
    # Scrape hostel details
    hostel_data = extract_hostel_details()
    
    # Convert to a DataFrame
    df = pd.DataFrame(hostel_data)
    
    # Save the data to a CSV file
    csv_filename = "hostels_in_london.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Scraped data saved to {csv_filename}")
    
    # Close the browser
    driver.quit()

# Run the script
if __name__ == "__main__":
    main()


Scraped data saved to hostels_in_london.csv
