## Scraping University Data
Here we are going to scrape the following university Data. from `USNews.com`, Possible Data that can be collected 
- University Name
- Rank
- Tution Fees (some of them has out-of-state, in-state)
- undergraduate Enrollment

In [1]:
### Loading the Libraries
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time

# Specify the path to the ChromeDriver executable
chrome_driver_path = "D:/Program Setup/Selenium WebDriver/chromedriver.exe"

# Setup Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)

In [2]:
# create a service object with Chrome driver
service = Service(chrome_driver_path)

In [6]:


def Scrape_University_data(driver):
    '''
    Scrape the Web Data of US Universities
    '''
    try:
        driver.get("https://www.usnews.com/best-colleges/rankings/national-universities?_mode=table")
        load_more = True

        while load_more:
            try:
                # Attempt to find the 'Load More' button
                load_more_buttons = driver.find_elements(By.CSS_SELECTOR, ".button__ButtonStyled-sc-1vhaw8r-1.bGXiGV.pager__ButtonStyled-sc-1i8e93j-1.dypUdv.type-secondary.size-large")
                
                if load_more_buttons:
                    load_more_button = load_more_buttons[0]
                    driver.execute_script("arguments[0].scrollIntoView();", load_more_button)
                    print("Scrolling to Load More Button")

                    try:
                        load_more_button.click()
                        print("Load More Clicked")
                        time.sleep(10)  # Wait for 10 seconds
                        WebDriverWait(driver, 10).until(EC.invisibility_of_element_located((By.CSS_SELECTOR, ".pager__Spinner-sc-1i8e93j-2")))
                    except ElementClickInterceptedException:
                        print("Element Click Intercepted, retrying...")
                        time.sleep(5)
                        continue
                else:
                    load_more = False

            except NoSuchElementException:
                print("No more Load More button found.")
                load_more = False

            except TimeoutException:
                print("Timed out waiting for Load More button or results to load.")
                load_more = False
            
        try:
            table_body = driver.find_element(By.CSS_SELECTOR, 'tbody[class="search-table__TableBody-sc-8xxgib-4 jWtyuQ"]')

            rows = table_body.find_elements(By.CSS_SELECTOR, 'tr[class="search-table__TableRow-sc-8xxgib-5 jSNVDf"]')

            universities = []
            for row in rows:
                # Get the rank, name, and location of each university
                university_data = {}
                try:
                    university_data["Ranking"] = row.find_elements(By.TAG_NAME, "td")[0].find_element(By.TAG_NAME, "strong").text
                    university_data["University"] = row.find_elements(By.TAG_NAME, "td")[0].find_element(By.TAG_NAME, "h3").text
                    university_data["Location"] = row.find_elements(By.TAG_NAME, "td")[0].find_element(By.TAG_NAME, "p").text
                    university_data["Tuition"] = row.find_elements(By.TAG_NAME, "td")[1].text
                    university_data["Enrollment"] = row.find_elements(By.TAG_NAME, "td")[2].find_element(By.TAG_NAME, "span").text
                    universities.append(university_data)  # Append university_data to the list

                except NoSuchElementException:
                    # Ignores the Missing elements
                    pass

            # Convert scraped data into a DataFrame
            df = pd.DataFrame(universities)

            # Save DataFrame to an Excel file
            df.to_csv("Universities_ranking.csv", index=False)
            print("Data Generated on CSV")
            print("Scraped universities:")
            for university in universities:
                print(university)

        except TimeoutException:
            print("Unable to scrape the table")

    except TimeoutException:
        print("link Didn't work")

Scrape_University_data(driver)

Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Load More Clicked
Scrolling to Load More Button
Element Click Intercepted, retrying...
Scrolling to Load More Button
Load More Clicked
Scrolling to Load M