In [None]:
!pip install selenium
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service

# Set up ChromeDriver using ChromeDriverManager and Service
service = Service(ChromeDriverManager().install())

# Initialize WebDriver with the Service object
driver = webdriver.Chrome(service=service)
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import pandas as pd
import time

# Read the Excel file containing the links
file_path = " " #input the excel file name here
df = pd.read_excel(file_path)

# Define a function to check the status of each link
def check_link(url, index):
    if pd.isna(url):  # Skip if there's no link
        status = 'No Link'
        print(f"{index}: No Link")
        return status
    
    try:
        print(f"{index}: Opening URL: {url}")  # Debug: Show the index and link being opened
        driver.get(url)
        time.sleep(5)  # Wait for 5 seconds to ensure the page loads
        
        # Get the current URL after any potential redirection
        current_url = driver.current_url
        print(f"{index}: Current URL after potential redirect: {current_url}")
        
        # Check if the link was redirected to specific URLs (like google.com/sorry or gemini.google.com)
        if "google.com/sorry" in current_url or "gemini.google.com" in current_url or "aistudio.google.com" in current_url or "copilot.microsoft.com" in current_url:
            status = f'Error: Redirected to {current_url}'
            print(f"{index}: Failed - {status}")
            return status
        
        # Corrected XPath: Use double quotes inside the XPath expression
        try:
            error_message = driver.find_element(By.XPATH, "//*[contains(text(), \"Link doesn't exist\") or contains(text(), \"Bing AI\") or contains(text(), \"404 Not Found\") or contains(text(), \"Allow Google AI Studio to access Drive to view shared prompts\")]")
            status = 'Error: Page Not Found or Unable to Load Content'
            print(f"{index}: Failed - {status}")
            return status
        except NoSuchElementException:
            pass
        
        # Check if a pop-up window has opened
        main_window = driver.current_window_handle
        windows = driver.window_handles
        if len(windows) > 1:
            status = 'Error: Pop-up Window Detected'
            print(f"{index}: Failed - {status}")
            return status
        
        # If no new window, check for modal or overlay pop-up within the same window
        try:
            popup = driver.find_element(By.CLASS_NAME, "popup-overlay")  # Example: Replace with actual pop-up class or ID
            status = 'Error: Pop-up Overlay Detected'
            print(f"{index}: Failed - {status}")
            return status
        except NoSuchElementException:
            pass
        
        # If no error messages or pop-ups are found, consider it accessible
        status = 'Accessible'
        print(f"{index}: Success - {status}")
        return status
    
    except TimeoutException:
        status = 'Timeout Error'
        print(f"{index}: Failed - {status}")
        return status
    except Exception as e:
        status = f'Error: {str(e)}'
        print(f"{index}: Failed - {status}")
        return status

# # Limit the number of links to test
# test_limit = 20  # Set the limit to xx links for testing

valid_links = []

for i, row in df.iterrows():
#     if i >= test_limit:
#         break  # Stop after processing 10 links

    # Get the URL from the 'Link' column
    url = row['Link']
    
    # Apply the function to each link
    status = check_link(url, i)
    
    # If the status is 'Accessible', save the valid link with file names
    if status == 'Accessible':
        valid_links.append({
            'Index': i,
            'File Name': row['File Name'],             # Add the file name
            'Link': url,                              # Add the link itself
            'Saved File Name': row['Saved File Name'], # Add the saved file name
            'Status': status                          # Add the status
        })

# Convert the valid links to a DataFrame
valid_links_df = pd.DataFrame(valid_links)

# Save the valid links to a new Excel file
valid_links_df.to_excel("valid_checked_links_selenium_test_results.xlsx", index=False)

# Close the browser session after processing all links
driver.quit()

print("Link checking complete. Valid links saved to 'valid_checked_links_selenium_test_results.xlsx'.")
