In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd 
import re

# Set up the WebDriver (Ensure that chromedriver is in the system path or provide the full path)
driver = webdriver.Chrome()

# Open a website
driver.get("https://www.kaggle.com/competitions?hostSegmentIdFilter=1")

In [None]:
competitions = []
competition_elements = driver.find_elements(By.CSS_SELECTOR, "div[data-testid='competition-landing-render-tid']")
parent_div = driver.find_element(By.CSS_SELECTOR, "div[data-testid='competition-landing-render-tid']")

# Extract all competition rows (li elements)
competition_rows = parent_div.find_elements(By.CSS_SELECTOR, "li.MuiListItem-root")

for competition in competition_rows:
    try:
        name = competition.find_element(By.TAG_NAME, "a").get_attribute("aria-label")
        description = competition.find_elements(By.TAG_NAME, "span")[0].text
        link = competition.find_element(By.TAG_NAME, "a").get_attribute("href")

        competitions.append({
            "name": name,
            "description": description,
            "link": link
        })
    except Exception as e:
        print(f"Error extracting competition: {e}")

df = pd.DataFrame(competitions)
df

In [None]:
# Initialize an empty list to store competition IDs
competition_ids = []

# Loop through each row in the DataFrame
for index, row in df.iterrows():
    link = row["link"]
    try:
        # Navigate to the competition page
        driver.get(link)
        time.sleep(2)  # Allow time for the page to load

        # Find the meta tag with property="og:image"
        meta_tag = driver.find_element(By.CSS_SELECTOR, "meta[property='og:image']")

        # Get the content attribute
        content_value = meta_tag.get_attribute("content")

        # Extract the competition ID using regex
        match = re.search(r"/(\d+)/", content_value)
        if match:
            competition_id = match.group(1)
        else:
            competition_id = None
    except Exception as e:
        print(f"Error fetching competition ID for {link}: {e}")
        competition_id = None

    # Append the competition ID to the list
    competition_ids.append(competition_id)

# Add the competition IDs as a new column in the DataFrame
df["competition_id"] = competition_ids
df

In [None]:
# Extract all list items in the table
list_items = driver.find_elements(By.CSS_SELECTOR, "li.MuiListItem-root")

# Collect data
code_data = []
for item in list_items:
    try:
        title = item.get_attribute("aria-label").replace(" List Item", "")
        updated_time = item.find_element(By.CSS_SELECTOR, "span[aria-label]").get_attribute("aria-label")
        score = item.find_element(By.CSS_SELECTOR, "span.sc-gdqXCh").text.split(": ")[1]
        comments = item.find_element(By.CSS_SELECTOR, "a.sc-uYFMi").text.split(" ")[0]
        competition = item.find_element(By.CSS_SELECTOR, "span.sc-buUSia").text
        votes = item.find_element(By.CSS_SELECTOR, "span[aria-label*='votes']").text
        code_data.append({
            "Title": title,
            "Updated Time": updated_time,
            "Score": score,
            "Comments": comments,
            "Competition": competition,
            "Votes": votes
        })
    except Exception as e:
        print(f"Error extracting data for item: {e}")

code_df = pd.DataFrame(code_data)
code_df

In [None]:
# Extract competition data
competitions = []
competition_elements = driver.find_elements(By.CSS_SELECTOR, "li.MuiListItem-root")

for competition in competition_elements:
    try:
        name = competition.get_attribute("aria-label").replace(" List Item", "")
        prize = competition.find_element(By.CSS_SELECTOR, "div.sc-eauhAA.hnTMYu").text
        description = competition.find_element(By.CSS_SELECTOR, "span.sc-geXuza.sc-NOKRk").text
        teams_time_info = competition.find_elements(By.CSS_SELECTOR, "span.sc-cPoruP.jksDWk")
        
        if len(teams_time_info) > 0:
            teams_info = teams_time_info[0].text
            teams = teams_info.split(" · ")[-2] if "Teams" in teams_info else "N/A"
            time_remaining = teams_info.split(" · ")[-1]
        else:
            teams = "N/A"
            time_remaining = "N/A"
        
        link = competition.find_element(By.TAG_NAME, "a").get_attribute("href")

        competitions.append({
            "Name": name,
            "Prize": prize,
            "Description": description,
            "Teams": teams,
            "Time Remaining": time_remaining,
            "Link": link
        })
    except Exception as e:
        print(f"Error extracting competition: {e}")

# Convert to Pandas DataFrame
df = pd.DataFrame(competitions)

In [None]:
# Wait for the page to load
time.sleep(2)

# Locate the search box using its name attribute
search_box = driver.find_element(By.NAME, "q")

# Type a query into the search box
search_box.send_keys("Selenium Python")

# Press ENTER to search
search_box.send_keys(Keys.RETURN)

# Wait for results to load
time.sleep(3)

# Retrieve search results
search_results = driver.find_elements(By.CSS_SELECTOR, "h3")

# Print the search results
for index, result in enumerate(search_results):
    print(f"{index + 1}. {result.text}")


In [None]:
# Close the browser
driver.quit()