In [9]:
# importing required libraries, module & class
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from amazoncaptcha import AmazonCaptcha
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException
from bs4 import BeautifulSoup
import os
import pandas as pd
import openpyxl

username = os.getenv('SCRAPER_USERNAME')
password = os.getenv('SCRAPER_PASSWORD')


# Path to the chromedriver. Creating the object
s= Service("C:/Users/Admin/Desktop/chromedriver-win64/chromedriver")

# Initializing the webdriver with service object
driver = webdriver.Chrome(service= s)
driver.get('https://www.amazon.com/')

# Captcha Handling
try:
    # providing the 10 sec time to webdriver to wait till the page loads and get attribute of the captcha source
    image_src = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH,"""/html/body/div/div[1]/div[3]/div/div/form/div[1]/div/div/div[1]/img"""))).get_attribute('src')         

    # Using AmazonCaptcha Module to retrieve captcha value
    captcha = AmazonCaptcha.fromlink(image_src) 
    captcha_value = AmazonCaptcha.solve(captcha)

    # Finding the input field for captcha value and inserting the solved captcha value and continuing 
    input_field = driver.find_element(By.XPATH, """/html/body/div/div[1]/div[3]/div/div/form/div[1]/div/div/div[2]/input""")
    captcha_inp = input_field.send_keys(captcha_value)
    condinue_shopping = driver.find_element(By.XPATH, """/html/body/div/div[1]/div[3]/div/div/form/div[2]/div/span/span/button""").click()
except Exception as e:
    print(f"Captcha handling failed: {e}")


try: 
    # locating the accounts field and signing in
    driver.find_element_by_xpath("/html/body/div[1]/header/div/div[1]/div[3]/div/a[2]/div/span").click()

    # Locating username field and inserting username retrieved above from env file.
    user = driver.find_element_by_xpath("""/html/body/div[2]/div[1]/div[2]/div/div[2]/div[2]/div[1]/form/div/div/div/div[1]/input[1]""")
    user.send_keys(username)    
    driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[2]/div/div[2]/div[2]/div[1]/form/div/div/div/div[2]/span/span/input').click()

    # Locating password field and inserting password retrieved above from env file.
    pwd = driver.find_element_by_xpath("""/html/body/div[2]/div[1]/div[2]/div/div[2]/div/div[2]/div/form/div/div[1]/input""")
    pwd.send_keys(password)
    driver.find_element_by_xpath("""/html/body/div[2]/div[1]/div[2]/div/div[2]/div/div[2]/div/form/div/div[2]/span/span/input""").click()
    print('login success')
    
    # Data Scraping
    search_bar = driver.find_element(By.XPATH, """/html/body/div[1]/header/div/div[1]/div[2]/div/form/div[2]/div[1]/input""")
    search_bar.send_keys("laptops")
    search_bar.send_keys(Keys.ENTER)
    
    
    # wait for the page to load
    WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH, """/html/body/div[1]/div[1]/div[1]/div[1]/div/span[1]/div[1]""")))
    
    # open excel file and create new sheet
    wb = openpyxl.Workbook()
    ws = wb.active
    
    # column headers
    ws['A1'] = 'Name'
    ws['B1'] = 'Price'
    ws['C1'] = 'Review'
    
    page = 0
    while page <= 20:
        print(f"Scraping page {page + 1}")
        # saving the page as html as Amazon doesn't let scraping the data
        with open('Amazon.html','w', encoding = 'utf-8') as file:
            file.write(driver.page_source)
            print(f"Page {page + 1} saved successfully")

        with open('Amazon.html', 'r', encoding = 'utf-8') as f:
            html_page = f.read()

        # get page source and parse it with BeautifulSoup
        soup = BeautifulSoup(html_page, 'html.parser')

  
        products = soup.find_all('div', class_="puis-card-container s-card-container s-overflow-hidden aok-relative puis-include-content-margin puis puis-v1ptg7iq6f5f4u2smovb1ai55ug s-latency-cf-section puis-card-border")
        print(f'Number of products: {len(products)}')
        row = ws.max_row +1
        
        for product in products:
            try:
                product_name = product.find("span" , class_= "a-size-medium a-color-base a-text-normal").text.strip()
                price = product.find("span", class_ = 'a-price-whole').text.strip()
                price_val_dollars = ''.join(filter(str.isdigit, price))
                rating = product.find("span",class_= "a-icon-alt").text.strip()
                rating_val = rating.split()[0]
 

                # write data to excel sheet
                ws.cell(row = row, column = 1, value = product_name)
                ws.cell(row = row, column = 2, value = price_val_dollars)
                ws.cell(row = row, column = 3, value = rating_val)

                 # move to next row
                row +=1


            except AttributeError:
                continue
            
        # Page increment       
        page +=1  
        
        # Check for next button to move to the next page
        try:
            next_button = driver.find_element(By.XPATH, "//a[contains(@class, 's-pagination-next')]")
            if next_button and next_button.is_enabled():
                print("Next button found")
                print("*******************************")
                next_button.click()
                WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@data-component-type='s-search-result']")))
            else:
                print('No more pages available or next button not enabled')
                break
        except NoSuchElementException:
            print("No more pages found")
            break
        
    wb.save('Laptop_prices.xlsx')
    print("Data successfully saved to Laptop_prices.xlsx") 

    # Exception Handling
except NoSuchElementException as e:
    print(f"No Such element found: {e}")
except TimeoutException as e:
    print(f"Time out. Waiting to load: {e}")
except WebDriverException as e:
    print(f"Web driver function error: {e}")
except Exception as e:
    print(f"An unexpected error occured: {e}")
finally:
    driver.quit()



login success
Scraping page 1
Page 1 saved successfully
Number of products: 22
Next button found
*******************************
Scraping page 2
Page 2 saved successfully
Number of products: 22
Next button found
*******************************
Scraping page 3
Page 3 saved successfully
Number of products: 6
Next button found
*******************************
Scraping page 4
Page 4 saved successfully
Number of products: 1
Next button found
*******************************
Scraping page 5
Page 5 saved successfully
Number of products: 10
Next button found
*******************************
Scraping page 6
Page 6 saved successfully
Number of products: 22
Next button found
*******************************
Scraping page 7
Page 7 saved successfully
Number of products: 22
Next button found
*******************************
Scraping page 8
Page 8 saved successfully
Number of products: 6
Next button found
*******************************
Scraping page 9
Page 9 saved successfully
Number of products: 22
Next