1. Go to the election comission of Bihar's website:- 
https://results.eci.gov.in/PcResultGenJune2024/partywiseresult-S04.htm 

2. Select districts from the dropdown:- 
https://results.eci.gov.in/PcResultGenJune2024/candidateswise-S049.htm 

3. Get the list of all the candidates, party name and the number of votes each candidate secured 

4. Store that in a table (csv file) that has following columns:- 
   a. Candidate name
   b. Constituency 
   c. Party name 
   d. Number of votes 

In [56]:
import os
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from webdriver_manager.chrome import ChromeDriverManager

In [None]:
def setup_driver():
    # Set up Chrome options 
    
    chrome_options = Options()
    
    # Set download directory to current working directory 

    download_dir = os.path.join(os.getcwd(), "downloads")
    os.makedirs(download_dir, exist_ok=True)
    
    prefs = {
        "download.default_directory": download_dir,
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "safebrowsing.enabled": False
    }
    
    chrome_options.add_experimental_option("prefs", prefs)
    chrome_options.add_argument("--start-maximized")  # Start with maximized window
    
    # Initialize the Chrome driver 

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options) 

    return driver, download_dir


In [55]:
def get_constituencies(driver, url):
    # Open the main page
    driver.get(url)
    print("Main page loaded")
    
    # Wait for the page to load
    time.sleep(3)
    
    # Find the dropdown directly
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "ctl00_ContentPlaceHolder1_Result1_ddlState"))
        )
        
        # Get the constituency dropdown
        dropdown = driver.find_element(By.ID, "ctl00_ContentPlaceHolder1_Result1_ddlState")
        print("Found constituency dropdown")
        
        # Get all options
        select = Select(dropdown)
        options = select.options
        
        # Extract constituencies (skip the first "Select Constituency" option)
        constituencies = []
        for option in options[1:]:  # Skip the first option as it's a placeholder
            constituencies.append({
                'text': option.text,
                'value': option.get_attribute('value')
            })
        
        print(f"Found {len(constituencies)} constituencies")
        # Return only the first 5 constituencies for testing
        return constituencies[:5]
        
    except Exception as e:
        print(f"Error getting constituencies: {str(e)}")
        driver.save_screenshot("constituency_error.png")
        raise


In [None]:
def download_constituency_csv(driver, constituency, download_dir):
    try:
        # Find the constituency dropdown
        dropdown = driver.find_element(By.ID, "ctl00_ContentPlaceHolder1_Result1_ddlState")
        select = Select(dropdown)
        
        print(f"Selecting constituency: {constituency['text']}")
        try:
            select.select_by_visible_text(constituency['text'])
        except:
            try:
                select.select_by_value(constituency['value'])
            except Exception as e:
                print(f"Could not select constituency: {str(e)}")
                driver.save_screenshot(f"select_error_{constituency['text'].replace(' ', '_')}.png")
                return None
        
        # Wait for the page to update
        time.sleep(3)
        
        # Now we need to click on the constituency wise results link
        try:
            # Try to find the constituency link by href or by text
            constituency_links = driver.find_elements(By.XPATH, "//a[contains(@href, 'Constituencywise')]")
            
            if constituency_links:
                print(f"Found constituency wise link: {constituency_links[0].get_attribute('href')}")
                constituency_links[0].click()
                print("Clicked constituency wise link")
                time.sleep(3)  # Wait for page to load
            else:
                print("Could not find constituency wise link, taking screenshot...")
                driver.save_screenshot(f"no_const_link_{constituency['text'].replace(' ', '_')}.png")
                
                # Try to find any link that might contain constituency data
                all_links = driver.find_elements(By.TAG_NAME, "a")
                print(f"Found {len(all_links)} links on the page")
                
                for i, link in enumerate(all_links[:10]):  # Just check first 10 links
                    href = link.get_attribute('href')
                    text = link.text
                    print(f"Link {i}: href={href}, text={text}")
                
                # Try another approach - look for specific elements
                print("Looking for constituency elements by XPath...")
                try:
                    constituency_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'card-header')]/following-sibling::div//a")
                    if constituency_elements:
                        print(f"Found {len(constituency_elements)} potential constituency elements")
                        constituency_elements[0].click()
                        print("Clicked first potential constituency element")
                        time.sleep(3)
                    else:
                        print("No constituency elements found")
                        return None
                except Exception as e:
                    print(f"Error finding constituency elements: {str(e)}")
                    return None
            
            # Now we should be on the table page
            # Wait for the table to be present using the XPath you provided
            try:
                WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, "/html/body/main/div/div[3]/div"))
                )
                print("Found the table container")
                
                # Get the table using the class names you provided
                table = driver.find_element(By.CSS_SELECTOR, "table.table.table-striped.table-bordered")
                print("Found the table")
                
                # Extract headers from thead
                headers = []
                header_cells = table.find_elements(By.XPATH, ".//thead//th")
                for cell in header_cells:
                    headers.append(cell.text.strip())
                
                print(f"Found {len(headers)} headers: {headers}")
                
                # Extract rows from tbody
                rows = []
                data_rows = table.find_elements(By.XPATH, ".//tbody//tr")
                print(f"Found {len(data_rows)} rows")
                
                for row in data_rows:
                    row_data = []
                    cells = row.find_elements(By.TAG_NAME, "td")
                    for cell in cells:
                        row_data.append(cell.text.strip())
                    
                    if row_data:  # Only add non-empty rows
                        rows.append(row_data)
                
                print(f"Processed {len(rows)} data rows")
                
                # Create DataFrame
                if headers and rows:
                    # Make sure the number of columns matches
                    max_cols = max(len(headers), max(len(row) for row in rows))
                    
                    # Extend headers if needed
                    while len(headers) < max_cols:
                        headers.append(f"Column{len(headers)+1}")
                    
                    # Extend rows if needed
                    for row in rows:
                        while len(row) < max_cols:
                            row.append("")
                    
                    df = pd.DataFrame(rows, columns=headers)
                    
                    # Save to CSV
                    safe_constituency_name = constituency['text'].replace(' ', '_').replace('/', '_').replace('\\', '_')
                    csv_path = os.path.join(download_dir, f"{safe_constituency_name}.csv")
                    
                    # If the file already exists, remove it
                    if os.path.exists(csv_path):
                        os.remove(csv_path)
                    
                    df.to_csv(csv_path, index=False)
                    print(f"Extracted and saved table data to: {csv_path}")
                    return csv_path
                else:
                    print("No data found in table")
                    driver.save_screenshot(f"no_data_{constituency['text'].replace(' ', '_')}.png")
                    return None
                
            except Exception as e:
                print(f"Error extracting table for {constituency['text']}: {str(e)}")
                driver.save_screenshot(f"table_error_{constituency['text'].replace(' ', '_')}.png")
                
                # Try an alternative approach - look for any table
                try:
                    tables = driver.find_elements(By.TAG_NAME, "table")
                    if tables:
                        print(f"Found {len(tables)} tables, trying first one")
                        table = tables[0]
                        
                        # Extract headers
                        headers = []
                        header_rows = table.find_elements(By.TAG_NAME, "tr")
                        if header_rows:
                            header_cells = header_rows[0].find_elements(By.TAG_NAME, "th")
                            if not header_cells:
                                header_cells = header_rows[0].find_elements(By.TAG_NAME, "td")
                            
                            for cell in header_cells:
                                headers.append(cell.text.strip())
                        
                        # Extract rows
                        rows = []
                        data_rows = table.find_elements(By.TAG_NAME, "tr")[1:]  # Skip header row
                        for row in data_rows:
                            row_data = []
                            cells = row.find_elements(By.TAG_NAME, "td")
                            for cell in cells:
                                row_data.append(cell.text.strip())
                            
                            if row_data:
                                rows.append(row_data)
                        
                        if headers and rows:
                            df = pd.DataFrame(rows, columns=headers)
                            safe_constituency_name = constituency['text'].replace(' ', '_').replace('/', '_').replace('\\', '_')
                            csv_path = os.path.join(download_dir, f"{safe_constituency_name}.csv")
                            
                            if os.path.exists(csv_path):
                                os.remove(csv_path)
                            
                            df.to_csv(csv_path, index=False)
                            print(f"Extracted and saved table data using alternative method to: {csv_path}")
                            return csv_path
                    else:
                        print("No tables found on the page")
                except Exception as alt_e:
                    print(f"Alternative table extraction failed: {str(alt_e)}")
                
                return None
                
        except Exception as e:
            print(f"Error with navigation for {constituency['text']}: {str(e)}")
            driver.save_screenshot(f"navigation_error_{constituency['text'].replace(' ', '_')}.png")
            return None
            
    except Exception as e:
        print(f"Error downloading CSV for {constituency['text']}: {str(e)}")
        driver.save_screenshot(f"download_error_{constituency['text'].replace(' ', '_')}.png")
        return None


In [None]:
def combine_csv_files(csv_files):
    all_data = []
    
    for file in csv_files:
        if file and os.path.exists(file):
            try:
                # Read CSV file
                df = pd.read_csv(file, encoding='utf-8')
                
                # Add constituency name as a column (extracted from filename)
                constituency_name = os.path.basename(file).replace('.csv', '').replace('_', ' ')
                df['Constituency'] = constituency_name
                
                all_data.append(df)
                print(f"Added data from {file}")
            except Exception as e:
                print(f"Error reading {file}: {str(e)}")
    
    if all_data:
        # Combine all DataFrames
        combined_df = pd.concat(all_data, ignore_index=True)
        
        # Save to a new CSV file
        output_file = os.path.join(os.getcwd(), "combined_constituency_data.csv")
        combined_df.to_csv(output_file, index=False)
        print(f"Combined data saved to: {output_file}")
        return output_file
    else:
        print("No data to combine")
        return None


Main page loaded
Found constituency dropdown
Found 40 constituencies
Processing first 5 constituencies: ['Araria - 9', 'Arrah - 32', 'Aurangabad - 37', 'Banka - 27', 'Begusarai - 24']

Processing: Araria - 9
Selecting constituency: Araria - 9
Found constituency wise link: https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseS049.htm
Clicked constituency wise link
Found the table container
Found the table
Found 7 headers: ['S.N.', 'Candidate', 'Party', 'EVM Votes', 'Postal Votes', 'Total Votes', '% of Votes']
Found 10 rows
Processed 10 data rows
Extracted and saved table data to: /Users/sauravjha/Desktop/Hertie School/Semester 2/hertie-pdc-scraping/colabs/downloads/Araria_-_9.csv

Processing: Arrah - 32
Selecting constituency: Arrah - 32
Found constituency wise link: https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseS0432.htm
Clicked constituency wise link
Found the table container
Found the table
Found 7 headers: ['S.N.', 'Candidate', 'Party', 'EVM Votes', 'Posta

In [58]:
def main():
    url = "https://results.eci.gov.in/PcResultGenJune2024/partywiseresult-S04.htm"
    
    # Set up the driver
    driver, download_dir = setup_driver()
    
    try:
        # Get list of constituencies - limited to first 5
        constituencies = get_constituencies(driver, url)
        print(f"Processing first 5 constituencies: {[c['text'] for c in constituencies]}")
        
        # Download CSV for each constituency
        csv_files = []
        for constituency in constituencies:
            print(f"\n{'='*50}\nProcessing: {constituency['text']}\n{'='*50}")
            
            csv_file = download_constituency_csv(driver, constituency, download_dir)
            if csv_file:
                csv_files.append(csv_file)
            
            # Go back to the main page for the next constituency
            driver.get(url)
            time.sleep(3)
        
        # Combine all CSV files
        if csv_files:
            combined_file = combine_csv_files(csv_files)
            if combined_file:
                print(f"Successfully combined data from {len(csv_files)} constituencies")
                print(f"Combined data saved to: {combined_file}")
        else:
            print("No CSV files were downloaded successfully")
    except Exception as e:
        print(f"An error occurred: {str(e)}")
    finally:
        # Close the browser
        driver.quit()

if __name__ == "__main__":
    main()

Main page loaded
Found constituency dropdown
Found 40 constituencies
Processing first 5 constituencies: ['Araria - 9', 'Arrah - 32', 'Aurangabad - 37', 'Banka - 27', 'Begusarai - 24']

Processing: Araria - 9
Selecting constituency: Araria - 9
Found constituency wise link: https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseS049.htm
Clicked constituency wise link
Found the table container
Found the table
Found 7 headers: ['S.N.', 'Candidate', 'Party', 'EVM Votes', 'Postal Votes', 'Total Votes', '% of Votes']
Found 10 rows
Processed 10 data rows
Extracted and saved table data to: /Users/sauravjha/Desktop/Hertie School/Semester 2/hertie-pdc-scraping/colabs/downloads/Araria_-_9.csv

Processing: Arrah - 32
Selecting constituency: Arrah - 32
Found constituency wise link: https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseS0432.htm
Clicked constituency wise link
Found the table container
Found the table
Found 7 headers: ['S.N.', 'Candidate', 'Party', 'EVM Votes', 'Posta

In [59]:
df = pd.read_csv("combined_constituency_data.csv")

In [61]:
df.head(25)

Unnamed: 0,S.N.,Candidate,Party,EVM Votes,Postal Votes,Total Votes,% of Votes,Constituency
0,1,PRADEEP KUMAR SINGH,Bharatiya Janata Party,599118,1028,600146,47.91,Araria - 9
1,2,SHAHNAWAZ,Rashtriya Janata Dal,578904,1148,580052,46.31,Araria - 9
2,3,SHATRUGHAN PRASAD SUMAN,Independent,13697,49,13746,1.1,Araria - 9
3,4,MD GHOUSUL AZAM,Bahujan Samaj Party,12672,18,12690,1.01,Araria - 9
4,5,MD. MOBINUL HAQUE,Independent,12005,3,12008,0.96,Araria - 9
5,6,MD. ISMAIL,Bharatiya Momin Front,7353,1,7354,0.59,Araria - 9
6,7,AKHILESH KUMAR,Independent,5086,34,5120,0.41,Araria - 9
7,8,MUSHTAK ALAM,Independent,4897,2,4899,0.39,Araria - 9
8,9,ZAWED AKHTAR,The National Road Map Party of India,3037,4,3041,0.24,Araria - 9
9,10,NOTA,None of the Above,13438,66,13504,1.08,Araria - 9


In [62]:
import requests
from bs4 import BeautifulSoup
import re

In [None]:
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment

def is_valid_flag(flag):
    """
    Validate if a given string is a valid flag
    
    Args:
        flag (str): Potential flag to validate
    
    Returns:
        bool: True if valid flag, False otherwise
    """
    return (
        isinstance(flag, str) and 
        flag.startswith('flag-') and 
        flag[5:].isdigit()
    )

def safe_finder(func):
    """
    Decorator to handle exceptions in flag finding methods
    
    Args:
        func (callable): Flag finding function to wrap
    
    Returns:
        callable: Wrapped function with error handling
    """
    def wrapper(soup):
        try:
            return func(soup)
        except Exception as e:
            print(f"Error in {func.__name__}: {e}")
            return set()
    return wrapper

# Flag Finding Strategies
@safe_finder
def find_flags_in_elements_text(soup):
    """
    Find flags in the text content of elements
    
    Args:
        soup (BeautifulSoup): Parsed HTML
    
    Returns:
        set: Flags found in element text
    """
    flags = set()
    
    # Find all text elements
    text_elements = soup.find_all(text=True)
    
    for text in text_elements:
        # Split text into words
        words = str(text).split()
        
        # Extract flag-like words
        flags.update(word for word in words if word.startswith('flag-'))
    
    return flags

@safe_finder
def find_flags_in_attributes(soup):
    """
    Find flags in various HTML attributes
    
    Args:
        soup (BeautifulSoup): Parsed HTML
    
    Returns:
        set: Flags found in attributes
    """
    flags = set()
    
    # Find elements with attributes
    elements = soup.find_all(attrs=lambda attrs: attrs)
    
    for elem in elements:
        # Check all attributes
        for attr_name, attr_value in elem.attrs.items():
            # Handle list and string attributes
            if isinstance(attr_value, list):
                flags.update(
                    val for val in attr_value 
                    if isinstance(val, str) and val.startswith('flag-')
                )
            elif isinstance(attr_value, str):
                words = attr_value.split()
                flags.update(word for word in words if word.startswith('flag-'))
    
    return flags

@safe_finder
def find_flags_in_special_attributes(soup):
    """
    Find flags in special attributes like data-*, title, etc.
    
    Args:
        soup (BeautifulSoup): Parsed HTML
    
    Returns:
        set: Flags found in special attributes
    """
    flags = set()
    
    # Special attribute types to check
    special_attrs = [
        lambda attrs: attrs and any(
            key.startswith('data-') and 
            isinstance(attrs.get(key), str) and 
            'flag-' in attrs.get(key) 
            for key in attrs
        ),
        {'title': lambda x: x and 'flag-' in str(x)}
    ]
    
    # Search for elements with special attributes
    for attr_condition in special_attrs:
        special_elements = soup.find_all(attrs=attr_condition)
        
        for elem in special_elements:
            # Extract flags from attributes
            for attr_name, attr_value in elem.attrs.items():
                if isinstance(attr_value, str):
                    words = attr_value.split()
                    flags.update(word for word in words if word.startswith('flag-'))
    
    return flags

@safe_finder
def find_flags_in_comments(soup):
    """
    Find flags in HTML comments
    
    Args:
        soup (BeautifulSoup): Parsed HTML
    
    Returns:
        set: Flags found in comments
    """
    flags = set()
    
    # Find HTML comments
    comments = soup.find_all(text=lambda text: isinstance(text, Comment))
    
    for comment in comments:
        words = str(comment).split()
        flags.update(word for word in words if word.startswith('flag-'))
    
    return flags

@safe_finder
def find_flags_in_hidden_elements(soup):
    """
    Find flags in hidden or special CSS class elements
    
    Args:
        soup (BeautifulSoup): Parsed HTML
    
    Returns:
        set: Flags found in hidden elements
    """
    flags = set()
    
    # Find hidden elements (various methods)
    hidden_classes = [
        'text-transparent',  # Transparent text
        'hidden',            # Hidden class
        'sr-only',           # Screen reader only
    ]
    
    for cls in hidden_classes:
        hidden_elements = soup.find_all(class_=cls)
        
        for elem in hidden_elements:
            # Extract text and check for flags
            if elem.string:
                words = str(elem.string).split()
                flags.update(word for word in words if word.startswith('flag-'))
    
    return flags

@safe_finder
def find_flags_in_ids(soup):
    """
    Find flags in element IDs
    
    Args:
        soup (BeautifulSoup): Parsed HTML
    
    Returns:
        set: Flags found in IDs
    """
    flags = set()
    
    # Find elements with flag-like IDs
    id_elements = soup.find_all(id=lambda x: x and 'flag-' in str(x))
    
    # Extract IDs that are flags
    flags.update(
        elem.get('id') for elem in id_elements 
        if elem.get('id') and elem.get('id').startswith('flag-')
    )
    
    return flags

def comprehensive_flag_extractor(url):
    """
    Comprehensive flag extraction with multiple strategies
    
    Args:
        url (str): URL to scrape
    
    Returns:
        list: Sorted list of unique, valid flags
    """
    try:
        # Fetch the page
        response = requests.get(url)
        response.raise_for_status()
        
        # Parse HTML
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Combine all flag-finding strategies
        flag_finders = [
            find_flags_in_elements_text,
            find_flags_in_attributes,
            find_flags_in_special_attributes,
            find_flags_in_comments,
            find_flags_in_hidden_elements,
            find_flags_in_ids
        ]
        
        # Collect flags
        all_flags = set()
        for finder in flag_finders:
            found_flags = finder(soup)
            all_flags.update(found_flags)
        
        # Clean and validate flags
        cleaned_flags = {flag for flag in all_flags if is_valid_flag(flag)}
        
        return sorted(cleaned_flags)
    
    except requests.RequestException as e:
        print(f"Network error: {e}")
        return []
    except Exception as e:
        print(f"Unexpected error: {e}")
        return []

# Main execution
def main():
    """
    Main function to run the flag extractor
    """
    url = "https://hertie-scraping-website.vercel.app/"
    
    # Extract flags
    found_flags = comprehensive_flag_extractor(url)
    
    # Print results
    print("Found Flags:", found_flags)
    print(f"Total Flags: {len(found_flags)}")

# Run the script
if __name__ == "__main__":
    main()

Found Flags: ['flag-1', 'flag-10', 'flag-11', 'flag-12', 'flag-13', 'flag-14', 'flag-15', 'flag-16', 'flag-17', 'flag-18', 'flag-19', 'flag-2', 'flag-20', 'flag-21', 'flag-22', 'flag-23', 'flag-24', 'flag-25', 'flag-26', 'flag-27', 'flag-28', 'flag-29', 'flag-3', 'flag-30', 'flag-31', 'flag-32', 'flag-33', 'flag-34', 'flag-35', 'flag-36', 'flag-37', 'flag-38', 'flag-39', 'flag-4', 'flag-40', 'flag-5', 'flag-6', 'flag-7', 'flag-8', 'flag-9']
Total Flags: 40


  text_elements = soup.find_all(text=True)
  comments = soup.find_all(text=lambda text: isinstance(text, Comment))


In [89]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time
import re

In [93]:
class Level2FlagHunter:
    def __init__(self, url):
        """
        Initialize the Level 2 Flag Hunter
        
        Args:
            url (str): URL of the level 2 page
        """
        # Setup Chrome options
        chrome_options = Options()
        # Uncomment the next line if you want to run in headless mode
        # chrome_options.add_argument("--headless")
        
        # Setup the WebDriver
        self.driver = webdriver.Chrome(
            service=Service(ChromeDriverManager().install()),
            options=chrome_options
        )
        
        self.url = url
        # Existing flags to exclude
        self.existing_flags = set(f'flag-{i}' for i in range(1, 41))
    
    def load_page(self):
        """
        Load the target page and wait for it to stabilize
        """
        try:
            self.driver.get(self.url)
            # Wait for page to load
            WebDriverWait(self.driver, 10).until(
                EC.presence_of_element_located((By.TAG_NAME, 'body'))
            )
            time.sleep(2)  # Additional stabilization time
        except Exception as e:
            print(f"Error loading page: {e}")
    
    def extract_flags_comprehensively(self):
        """
        Comprehensive method to extract flags from the level 2 page
        
        Returns:
            set: All flags found on the page
        """
        flags = set()
        
        try:
            # Comprehensive JavaScript to find flags
            flag_script = """
            function extractFlags() {
                const flags = new Set();
                
                // Function to check if a string is a flag
                function isFlag(str) {
                    return /flag-\d+/.test(str);
                }
                
                // Search through entire document
                const walker = document.createTreeWalker(
                    document.body,
                    NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT,
                    null,
                    false
                );
                
                let node;
                while (node = walker.nextNode()) {
                    // Check text content of text nodes
                    if (node.nodeType === Node.TEXT_NODE && node.textContent) {
                        const matches = node.textContent.match(/flag-\d+/g);
                        if (matches) {
                            matches.forEach(match => flags.add(match));
                        }
                    }
                    
                    // Check attributes of element nodes
                    if (node.nodeType === Node.ELEMENT_NODE) {
                        // Check all attributes
                        for (let attr of node.attributes) {
                            if (isFlag(attr.value)) {
                                flags.add(attr.value);
                            }
                        }
                        
                        // Check for flag-like class names
                        if (node.className && isFlag(node.className)) {
                            flags.add(node.className);
                        }
                    }
                }
                
                return Array.from(flags);
            }
            
            return extractFlags();
            """
            
            # Execute the script
            flags = set(self.driver.execute_script(flag_script))
        
        except Exception as e:
            print(f"Error extracting flags: {e}")
        
        return flags
    
    def find_hidden_flags(self):
        """
        Find flags in hidden or hard-to-detect elements
        
        Returns:
            set: Hidden flags found
        """
        hidden_flags = set()
        
        try:
            # JavaScript to reveal and find hidden flags
            hidden_flag_script = """
            function findHiddenFlags() {
                const flags = new Set();
                
                // Find potentially hidden elements
                const hiddenElements = document.querySelectorAll(
                    '[style*="display:none"],' +
                    '[style*="visibility:hidden"],' +
                    '[style*="opacity:0"],' +
                    '.hidden,' +
                    '.invisible,' +
                    '.text-transparent'
                );
                
                // Reveal and search hidden elements
                hiddenElements.forEach(el => {
                    // Make element visible
                    el.style.display = 'block';
                    el.style.visibility = 'visible';
                    el.style.opacity = '1';
                    
                    // Search for flags
                    const matches = el.textContent.match(/flag-\d+/g);
                    if (matches) {
                        matches.forEach(match => flags.add(match));
                    }
                });
                
                return Array.from(flags);
            }
            
            return findHiddenFlags();
            """
            
            hidden_flags = set(self.driver.execute_script(hidden_flag_script))
        
        except Exception as e:
            print(f"Error finding hidden flags: {e}")
        
        return hidden_flags
    
    def search_rotated_and_styled_elements(self):
        """
        Search for flags in rotated, skewed, or specially styled elements
        
        Returns:
            set: Flags found in specially styled elements
        """
        styled_flags = set()
        
        try:
            # JavaScript to search styled elements
            styled_flag_script = """
            function findStyledFlags() {
                const flags = new Set();
                
                // Find elements with specific styling that might hide flags
                const styledElements = document.querySelectorAll(
                    '.rotate-90, .rotate-45, .rotate-180, ' +
                    '.skew-y-12, .text-xs, .opacity-25, .text-4xl'
                );
                
                styledElements.forEach(el => {
                    // Search for flags in text content
                    const matches = el.textContent.match(/flag-\d+/g);
                    if (matches) {
                        matches.forEach(match => flags.add(match));
                    }
                    
                    // Check child elements
                    const childMatches = el.innerHTML.match(/flag-\d+/g);
                    if (childMatches) {
                        childMatches.forEach(match => flags.add(match));
                    }
                });
                
                return Array.from(flags);
            }
            
            return findStyledFlags();
            """
            
            styled_flags = set(self.driver.execute_script(styled_flag_script))
        
        except Exception as e:
            print(f"Error finding styled flags: {e}")
        
        return styled_flags
    
    def find_level2_flags(self):
        """
        Comprehensive method to find all flags on level 2 page
        
        Returns:
            set: All unique flags found
        """
        # Combine flag-finding methods
        flag_methods = [
            self.extract_flags_comprehensively,
            self.find_hidden_flags,
            self.search_rotated_and_styled_elements
        ]
        
        # Collect flags
        all_flags = set()
        for method in flag_methods:
            try:
                method_flags = method()
                all_flags.update(method_flags)
            except Exception as e:
                print(f"Error in {method.__name__}: {e}")
        
        # Remove existing flags and validate
        unique_flags = {
            flag for flag in all_flags 
            if flag.startswith('flag-') and 
               flag[5:].isdigit() and 
               flag not in self.existing_flags
        }
        
        return unique_flags
    
    def cleanup(self):
        """
        Close the browser and clean up resources
        """
        try:
            self.driver.quit()
        except Exception as e:
            print(f"Error during cleanup: {e}")

def main():
    """
    Main function to run the Level 2 flag hunter
    """
    url = "https://hertie-scraping-website.vercel.app/level2"
    
    # Create FlagHunter instance
    flag_hunter = Level2FlagHunter(url)
    
    try:
        # Load the page
        flag_hunter.load_page()
        
        # Find flags
        found_flags = flag_hunter.find_level2_flags()
        
        # Print results
        print("Flags Found on Level 2:")
        for flag in sorted(found_flags):
            print(flag)
        print(f"\nTotal Flags Found: {len(found_flags)}")
    
    except Exception as e:
        print(f"Unexpected error: {e}")
    
    finally:
        # Always ensure browser is closed
        flag_hunter.cleanup()

# Run the script
if __name__ == "__main__":
    main()

  flag_script = """
  hidden_flag_script = """
  styled_flag_script = """


Flags Found on Level 2:
flag-41
flag-42
flag-43
flag-44
flag-45
flag-46
flag-47
flag-48
flag-49
flag-50
flag-51
flag-52
flag-53
flag-54

Total Flags Found: 14


In [101]:
class Level3FlagHunter:
    def __init__(self, url):
        """
        Initialize the Level 3 Flag Hunter
        """
        # Setup Chrome options
        chrome_options = Options()
        
        # Add options to prevent detection and improve stability
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")
        chrome_options.add_argument("--disable-extensions")
        chrome_options.add_argument("--disable-gpu")
        chrome_options.add_argument("--log-level=3")  # Minimize logging
        
        # Setup the WebDriver with more robust service
        service = Service(ChromeDriverManager().install())
        service.creationflags = 0x08000000  # Prevents console window from appearing on Windows
        
        # Create WebDriver
        self.driver = webdriver.Chrome(
            service=service,
            options=chrome_options
        )
        
        # Set longer timeouts
        self.driver.set_page_load_timeout(30)
        self.driver.implicitly_wait(10)
        
        self.url = url
        # Existing flags to exclude (up to level 2)
        self.existing_flags = set(f'flag-{i}' for i in range(1, 55))
    
    def load_page(self):
        """
        Load the target page and wait for it to stabilize
        """
        try:
            # Navigate to the page
            self.driver.get(self.url)
            
            # Wait for page to load completely
            WebDriverWait(self.driver, 30).until(
                EC.presence_of_element_located((By.TAG_NAME, 'body'))
            )
            
            # Additional wait for potential dynamic content
            time.sleep(3)
            
            # Print page source for debugging if needed
            # print(self.driver.page_source)
        
        except TimeoutException:
            print("Timeout while loading the page")
            # Attempt to get partial page source
            print("Partial page source:", self.driver.page_source[:1000])
            raise
        except Exception as e:
            print(f"Error loading page: {e}")
            raise
    
    def extract_and_click_flags(self):
        """
        Extract flags and click on specific buttons to reveal more
        
        Returns:
            set: Unique flags found
        """
        flags = set()
        
        try:
            # Find all buttons with more robust method
            try:
                buttons = WebDriverWait(self.driver, 10).until(
                    EC.presence_of_all_elements_located((By.TAG_NAME, 'button'))
                )
            except TimeoutException:
                print("No buttons found on the page")
                buttons = []
            
            # Buttons to click (with IDs 55, 57, 59)
            click_ids = ['flag-55', 'flag-57', 'flag-59']
            
            # First, extract initial flags
            for button in buttons:
                try:
                    # Try to get button text
                    button_text = button.text.strip()
                    
                    # Check if text starts with flag-
                    if button_text.startswith('flag-'):
                        flags.add(button_text)
                    
                    # Check button ID
                    button_id = button.get_attribute('id')
                    if button_id and button_id.startswith('flag-'):
                        flags.add(button_id)
                
                except Exception as button_error:
                    print(f"Error processing button: {button_error}")
            
            # Click specific buttons to reveal more flags
            for click_id in click_ids:
                try:
                    # Find and click the button with explicit wait
                    click_button = WebDriverWait(self.driver, 10).until(
                        EC.element_to_be_clickable((By.ID, click_id))
                    )
                    
                    # Scroll to the button to ensure it's in view
                    self.driver.execute_script("arguments[0].scrollIntoView(true);", click_button)
                    
                    # Wait a moment
                    time.sleep(1)
                    
                    # Click the button
                    click_button.click()
                    
                    # Wait for potential dynamic content
                    time.sleep(2)
                    
                    # Re-find buttons after click
                    new_buttons = self.driver.find_elements(By.TAG_NAME, 'button')
                    
                    # Extract new flags
                    for button in new_buttons:
                        try:
                            # Try to get button text
                            button_text = button.text.strip()
                            
                            # Check if text starts with flag-
                            if button_text.startswith('flag-'):
                                flags.add(button_text)
                            
                            # Check button ID
                            button_id = button.get_attribute('id')
                            if button_id and button_id.startswith('flag-'):
                                flags.add(button_id)
                        
                        except Exception as new_button_error:
                            print(f"Error processing new button: {new_button_error}")
                
                except (NoSuchElementException, TimeoutException) as click_error:
                    print(f"Error clicking button {click_id}: {click_error}")
                    # Additional debugging
                    print("Available button IDs:")
                    for btn in self.driver.find_elements(By.TAG_NAME, 'button'):
                        print(btn.get_attribute('id'))
        
        except Exception as e:
            print(f"Unexpected error extracting and clicking flags: {e}")
        
        return flags
    
    def find_level3_flags(self):
        """
        Find all unique flags on level 3 page
        
        Returns:
            set: All unique flags found
        """
        # Extract flags by clicking buttons
        found_flags = self.extract_and_click_flags()
        
        # Remove existing flags and validate
        unique_flags = {
            flag for flag in found_flags 
            if flag.startswith('flag-') and 
               flag[5:].isdigit() and 
               flag not in self.existing_flags
        }
        
        return unique_flags
    
    def cleanup(self):
        """
        Close the browser and clean up resources
        """
        try:
            self.driver.quit()
        except Exception as e:
            print(f"Error during cleanup: {e}")

def main():
    """
    Main function to run the Level 3 flag hunter
    """
    url = "https://hertie-scraping-website.vercel.app/level3"
    
    # Create FlagHunter instance
    flag_hunter = Level3FlagHunter(url)
    
    try:
        # Load the page
        flag_hunter.load_page()
        
        # Find flags
        found_flags = flag_hunter.find_level3_flags()
        
        # Print results
        print("\nFlags Found on Level 3:")
        for flag in sorted(found_flags):
            print(flag)
        print(f"\nTotal Flags Found: {len(found_flags)}")
    
    except Exception as e:
        print(f"Unexpected error: {e}")
    
    finally:
        # Always ensure browser is closed
        flag_hunter.cleanup()

# Run the script
if __name__ == "__main__":
    main()


Flags Found on Level 3:
flag-55
flag-56
flag-57
flag-58
flag-59
flag-60

Total Flags Found: 6


In [109]:

class Level4FlagHunter:
    def __init__(self, url):
        """
        Initialize the Level 4 Flag Hunter
        """
        # Setup Chrome options
        chrome_options = Options()
        
        # Add options to prevent detection and improve stability
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")
        
        # Setup the WebDriver
        self.driver = webdriver.Chrome(
            service=Service(ChromeDriverManager().install()),
            options=chrome_options
        )
        
        self.url = url
        # Existing flags to exclude (up to level 3)
        self.existing_flags = set(f'flag-{i}' for i in range(1, 61))
    
    def load_page(self):
        """
        Load the target page and wait for it to stabilize
        """
        try:
            # Navigate to the page
            self.driver.get(self.url)
            
            # Wait for page to load completely
            WebDriverWait(self.driver, 30).until(
                EC.presence_of_element_located((By.TAG_NAME, 'body'))
            )
            
            # Additional wait for potential dynamic content
            time.sleep(2)
        
        except Exception as e:
            print(f"Error loading page: {e}")
    
    def extract_flag(self):
        """
        Extract flag by triggering the specific condition
        
        Returns:
            set: Unique flags found
        """
        flags = set()
        
        try:
            # Find input element
            input_element = self.driver.find_element(By.TAG_NAME, 'input')
            
            # Trigger the specific condition
            input_element.clear()
            input_element.send_keys('!!flag-61!!')
            
            # Wait for potential dynamic changes
            time.sleep(1)
            
            # Check if flag is revealed
            page_source = self.driver.page_source
            if 'flag-61' in page_source:
                flags.add('flag-61')
            
            # Additional verification
            flag_elements = self.driver.find_elements(By.XPATH, "//*[contains(text(), 'flag-61')]")
            if flag_elements:
                flags.add('flag-61')
        
        except Exception as e:
            print(f"Flag extraction error: {e}")
        
        return flags
    
    def find_level4_flags(self):
        """
        Find all unique flags on level 4 page
        
        Returns:
            set: All unique flags found
        """
        # Extract flags
        found_flags = self.extract_flag()
        
        # Remove existing flags and validate
        unique_flags = {
            flag for flag in found_flags 
            if flag.startswith('flag-') and 
               flag[5:].isdigit() and 
               flag not in self.existing_flags
        }
        
        return unique_flags
    
    def cleanup(self):
        """
        Close the browser and clean up resources
        """
        try:
            self.driver.quit()
        except Exception as e:
            print(f"Error during cleanup: {e}")

def main():
    """
    Main function to run the Level 4 flag hunter
    """
    url = "https://hertie-scraping-website.vercel.app/level4"
    
    # Create FlagHunter instance
    flag_hunter = Level4FlagHunter(url)
    
    try:
        # Load the page
        flag_hunter.load_page()
        
        # Find flags
        found_flags = flag_hunter.find_level4_flags()
        
        # Print results
        print("\nFlags Found on Level 4:")
        for flag in sorted(found_flags):
            print(flag)
        print(f"\nTotal Flags Found: {len(found_flags)}")
    
    except Exception as e:
        print(f"Unexpected error: {e}")
    
    finally:
        # Always ensure browser is closed
        flag_hunter.cleanup()

# Run the script
if __name__ == "__main__":
    main()


Flags Found on Level 4:
flag-61

Total Flags Found: 1
