# Mini Project 1: Scraping Data from a Dynamic Webpage
- Install necessary Python libraries: selenium, beautifulsoup4 (bs4), and chromedriver-autoinstaller.
- Choose a dynamic webpage for scraping. For this project, we will scrape dynamic product data from a demo e-commerce site, like : [inmotionhosting](https://www.inmotionhosting.com/).


## Task

- Initialize Selenium WebDriver
- Load the Web Page
- Identify the elements that contain hosting plan details.
- Extract necessary data such as plan names, features, and pricing.
- Store and Save the Data
- Close Selenium WebDriver


In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup

In [41]:
# Setup Selenium WebDriver
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(options=options)

# InMotion Hosting page
url = "https://www.inmotionhosting.com/"
driver.get(url)

# yay nested content im so happy
def process_nested_content(driver):
    '''
    here we click the button and try to get a soup from the content \n
    if it cant, then it produces an empty soup
    '''
    term_soups = []
    
    # Wait for the specific section to be present
    wait = WebDriverWait(driver, 10)
    section = wait.until(EC.presence_of_element_located((By.ID, "shared-hosting-rostrum")))
    
    # Scroll into view if necessary
    driver.execute_script("arguments[0].scrollIntoView(true);", section)

    # Find all term selector buttons within this specific section
    term_buttons = section.find_elements(By.CSS_SELECTOR, "button.imh-term-selector")

    # # Print the list of buttons found
    # print(f"Found {len(term_buttons)} term buttons:")
    # for button in term_buttons:
    #     print(f"- {button.text}")

    # Print the h2 element from the container class
    try:
        container = section.find_element(By.CSS_SELECTOR, "div.container")
        h2_element = container.find_element(By.TAG_NAME, "h2")
        print(f"H2 found: {h2_element.text.strip()}")
    except Exception as e:
        print(f"Error finding h2 element: {str(e)}")
    
    for button in term_buttons:
        try:
            # Scroll the button into view
            driver.execute_script("arguments[0].scrollIntoView(true);", button)
            
            # Click the term button
            driver.execute_script("arguments[0].click();", button)
            print(f"Clicked term button: {button.text}")
            
            # Wait for the content to load
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.imh-rostrum-container")))
            
            # Get the HTML content of the imh-rostrum-container
            container_html = driver.find_element(By.CSS_SELECTOR, "div.imh-rostrum-container").get_attribute('outerHTML')
            
            # Create a soup from the container HTML
            soup = BeautifulSoup(container_html, 'html.parser')
            
            # Limit to 3 elements from the soup
            limited_soup = soup.find_all(limit=3)
            term_soups.append(limited_soup)

            # # Print all h2 elements from the soup
            # h2_elements = soup.find_all('h2')
            # for h2 in h2_elements:
            #     print(f"H2 found: {h2.text.strip()}")
            
            print(f"Captured soup for term: {button.text}")
            
        except Exception as e:
            print(f"Error processing term button {button.text}: {str(e)}")
            term_soups.append([])  # Append an empty list if there's an error
    
    return term_soups

soups = []



for i in range(5):  # Limit to 5 buttons
    try:
        # Wait for the container with buttons to load
        wait = WebDriverWait(driver, 30)
        container = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.imh-rostrum-container")))
        
        # Find all buttons within the container
        buttons = container.find_elements(By.CSS_SELECTOR, "a.cta-link.btn-secondary-alt")
        
        # Click the button
        driver.execute_script("arguments[0].click();", buttons[i])
        print(f"\nClicked button {i+1}") # Visual confirmation
        
        # Wait for the page to load after clicking
        WebDriverWait(driver, 10).until(EC.staleness_of(container))
        
        # Process nested content
        nested_soups = process_nested_content(driver)
        print(f"Captured {len(nested_soups)} term soups for main button {i+1}")
        
        # Get the page source and create a BeautifulSoup object
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        soups.append(soup)
        
        print(f"Captured soup for button {i+1}") # Visual confirmation
        print('=' * 50)
        
        # Navigate back to the initial page
        driver.back()
        
    except Exception as e:
        print(f"An error occurred while processing button {i+1}: {str(e)}")

driver.quit()

# confirmation of all soups got
print(f'\n## there is a total of {len(soups)} soups')


Clicked button 1
H2 found: Buy Shared Web Hosting Plans
Clicked term button: 3 Year
Captured soup for term: 3 Year
Clicked term button: 1 Year
Captured soup for term: 1 Year
Clicked term button: 1 Month
Captured soup for term: 1 Month
Captured 3 term soups for main button 1
Captured soup for button 1

Clicked button 2
H2 found: Buy Shared Web Hosting Plans
Clicked term button: 3 Year
Captured soup for term: 3 Year
Clicked term button: 1 Year
Captured soup for term: 1 Year
Clicked term button: 1 Month
Captured soup for term: 1 Month
Captured 3 term soups for main button 2
Captured soup for button 2

Clicked button 3
An error occurred while processing button 3: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7194B6CC5+28821]
	(No symbol) [0x00007FF719423850]
	(No symbol) [0x00007FF7192C578A]
	(No symbol) [0x00007FF7193191BE]
	(No symbol) [0x00007FF7193194AC]
	(No symbol) [0x00007FF719362647]
	(No symbol) [0x00007FF71933F33F]
	(No symbol) [0x00007FF71935F412]
	(No symbol) [0x00007FF71

It clicks on the first button twice, then jumps to the third where all the structure is different