In [None]:
import requests
import csv
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from itertools import cycle

In [None]:
def rotate_proxies(proxies):
    """
    Rotate proxies in a round-robin manner.
    """
    proxy_cycle = cycle(proxies)
    return next(proxy_cycle)

In [None]:
# Function to read proxies from a file
def read_proxies(file_path):
    with open(file_path, 'r') as file:
        proxies = [line.strip() for line in file if line.strip()]
    return proxies

In [None]:
#function to check the proxies are working or not
def check_proxies(proxy):
    try:
        res = requests.get('URL', proxies={"http":proxy,"https":proxy}, timeout=(5)) #put the url to be scraped for testing
        if res.status_code == 200:
            return True
    except requests.RequestException as e:
            return None

In [None]:
# to read the data out of website
def r_data(sites, proxy):
    driver = webdriver.Edge()
    # Define your proxy settings
    proxy_address = proxy
    # Create EdgeOptions object
    edge_options = webdriver.EdgeOptions()
    # Add the proxy settings to EdgeOptions
    edge_options.add_argument("--proxy-server={}".format(proxy_address))
    # Specify the path for the CSV file
    csv_file_path = 'scraped.csv'

    try:
        # Navigate to the webpage
        driver.get(sites)

        # Find all <span> elements within <p> elements with class "listedItemDetails"
        span_elements = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'p.listedItemDetails span'))
        )

        # Click on each <span> element
        for span_element in span_elements:
            span_element.click()

        # Get the updated HTML content
        updated_html = driver.page_source

        # Parse the updated HTML with BeautifulSoup
        soup = BeautifulSoup(updated_html, 'html.parser')

        # Initialize a list to store the data
        data_list = []

        # find all elements with class 'itemInfo'
        item_infos = soup.find_all('article', class_='itemInfo')

        for item_info in item_infos:
            item_name = item_info.find('p', class_='itemName').a.text.strip()
            item_price = item_info.find('span', class_='itemPrice').text.strip()

            # Save item_name and item_price in the list
            data_list.append({'Item Name': item_name, 'Item Price': item_price})

        # Replace the currency symbol in the 'Item Price' field
        for item in data_list:
            if '₹' in item['Item Price']:
                item['Item Price'] = 'Rs.' + item['Item Price'][1:]

        # Save the list to a CSV file
        with open(csv_file_path, 'a', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['Item Name', 'Item Price']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            # Write the header
            writer.writeheader()

            # Write the data
            writer.writerows(data_list)

        print(f'List saved to CSV file: {csv_file_path}')
    except requests.RequestException:
        print(f"Failed:")

In [None]:
def main():
    # Path to the file containing proxies
    proxy_file_path = "proxy_list"
    #Read proxies from the file
    proxies_list = read_proxies(proxy_file_path)
    sites= [

        "URL1",
        "URL2",
        "URL3"
        ] #sites from where you want the data

    for site in sites:
        proxy_cycle = cycle(proxies_list)
        for _ in range(len(proxies_list)):
            current_proxy = next(proxy_cycle)
      #for proxy in proxies_list:
            if check_proxies(current_proxy):
                r_data(site, current_proxy)
                break
            else:
                print(f"Proxy {current_proxy} Not Working. Moving to the next proxy.")

if __name__ == "__main__":
    main()