In [None]:
## INSTALL REQUIRED LIBRARIES

pip install requests beautifulsoup4 pandas openpyxl schedule lxml fake_useragent

In [7]:
#API_KEY = "xxxxxxxxxxxxxxxxxxxxx"
#url = "https://www.amazon.com/s?k=laptops"

# Use ScraperAPI to bypass Amazon’s blocks
#proxy_url = f"http://api.scraperapi.com?api_key={API_KEY}&url={url}"

#response = requests.get(proxy_url)

#print(response.text)  # Check if the HTML is retrieved successfully

In [None]:
## SCRAPE LAPTOP NAMES AND PRICES

import requests   # To fetch the webpage
from bs4 import BeautifulSoup   # To parse HTML
import pandas as pd   # To handle data
from fake_useragent import UserAgent   # To avoid getting blocked by Amazon

# Amazon search results page for laptops
url = "https://www.amazon.com/s?k=laptops"

# Function to scrape product names and prices
def scrape_amazon():
    headers = {"User-Agent": UserAgent().random}  # Generate a random user-Agent
    response = requests.get(url, headers=headers) # send requests to Amazon

    if response.status_code == 200: # If request is successful
        soup = BeautifulSoup(response.text, 'lxml') # Parse HTML using lxml
        products = soup.find_all('div', {'data-component-type': 's-search-result'}) # Find product divs
        data = []   #List to store extracted data

        for product in products:
          try:
            # Extract product name
            name = product.find('span', class_='a-size-medium').text.strip()

            # Extract price (some products may not have a price)
            price_whole = product.find('span', class_='a-price-whole')  # Find whole part of price
            price_fraction = product.find('span', class_='a-price-fraction')  # Find cents part

            if price_whole and price_fraction:  # If both parts are found
                price = f"{price_whole.text}{price_fraction.text}"  # Combine whole and fraction

            else:
              price = "N/A"   # If no price is found

            data.append([name, price])   # Append extracted data

          except AttributeError:
            continue  # Skip product if there's missing data

        return data # Return extracted data
    else:
        print("Failed to fetch Amazon page")
        return[]





In [None]:
## SAVE DATA TO AN EXCEL FILE

import openpyxl   # To handle Excel files

# Function to save extracted data into an Excel file
def save_to_excel(data):
    df = pd.DateFrame(data, columns=['Product', 'Price']) # Convert data to DataFrame
    df.to_excel('amazon_laptop_prices.xlsx', index=False) # Save to Excel
    print("Data saved to Amazon_laptop_prices.xlsx")  # Confirmation message



In [None]:
## AUTOMATE DAILY PRICE TRACKING

import schedule   # To automate task scheduling
import time    # To keep the script running

# Function to run the full process
#def run_price_tracker():
  #print("fetching latest laptop prices from Amazon...")   # Log message
  #data = scrape_amazon()  # Scrape Amazon page
  #if data:
    #save_to_excel(data)  # Save data if scraping was successful
  #else:
    #print("No data extracted. Skipping save step.")   # Handle errors

# Function to scrape Amazon
def scrape_amazon():
  print("Fetching latest laptop prices from Amazon...")
  return [
      {"Product": "Apple MacBook Air M1", "Price": "$899"},
      {"Product": "ASUS ROG Gaming Laptop", "Price": "$1,499"},
      {"Product": "HP Pavillion 15", "Price": "$699"},
      {"Product": "DELL XPS 13", "Price": "$1,199"}
  ]  # Mocked output (Replace with actual scraping logic)

# Function to save data to Excel (need to define this function)
def save_to_excel(data):
  print("Saving data to Excel...")  #Placeholder (will Replaced  with actual Excel writing logic)
  for item in data:
    print(f"{item['Product']} - {item['Price']}") # Print output immediately

# Function to run the full process **immediately**
def run_price_tracker():
    data = scrape_amazon()  # Scrape Amazon page
    if data:
      save_to_excel(data)  # Save and print data immediately
    else:
      print("No data extracted. Skipping save step.")  # Handle errors

# Run the script immediately **without waiting**
run_price_tracker()


# Schedule the script to run daily at 9:00am
#schedule.every().day.at("09:00").do(run_price_tracker)

# Keep the script running
#while True:
  #schedule.run_pending()  #check if it's time to run
  #time.sleep(60)  # wait 1 minute before checking again



Fetching latest laptop prices from Amazon...
Saving data to Excel...
Apple MacBook Air M1 - $899
ASUS ROG Gaming Laptop - $1,499
HP Pavillion 15 - $699
DELL XPS 13 - $1,199
