In [241]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import schedule
import time

In [242]:
def get_title(soup):
  try:
    title = soup.find("span",attrs={"id":"productTitle"})
    title_value = title.text # content inside span tag
    title_string = title_value.strip()

  except AttributeError:
    title_string = ""
  
  return title_string

In [243]:
def get_price(soup):
    try:
        price = soup.find("span", attrs={"class": "a-price-whole"}).text.strip()[:-1]
        # remove the comma from the price
        price = price.replace(",", "")
    except:
        price = ""
    return price


In [244]:
def get_discount(soup):
    try:
        discount = soup.find("span", attrs={"class": "a-size-large a-color-price savingPriceOverride aok-align-center reinventPriceSavingsPercentageMargin savingsPercentage"}).text.strip()[1:-1]
    except:
        discount = ""
    return discount


In [245]:
def get_rating(soup):
  try:
        rating = soup.find("span", attrs={"class": "a-icon-alt"}).text[:3]
  except:
        rating = ""
  return rating

In [246]:
def get_review(soup):
    try:
        review_div = soup.find("div", class_="a-expander-content reviewText review-text-content a-expander-partial-collapse-content")
        if review_div:
            review_span = review_div.find("span")
            if review_span:
                review = review_span.get_text(strip=True)
                return review
    except:
        return ""


In [247]:
HEADERS = ({'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36','Accept-Language': 'en-US, en;q=0.5'})


In [248]:
URLS = {
    "Apple Iphone 16 Plus" : "https://www.amazon.in/dp/B0DGJ65N7V",
    "Apple IPhone 15": "https://www.amazon.in/Apple-iPhone-15-128-GB/dp/B0CHX2F5QT?th=1",
    "Apple Iphone 15 Plus":"https://www.amazon.in/Apple-iPhone-15-Plus-128/dp/B0CHX6X2WW",
    "Apple Iphone 16": "https://www.amazon.in/iPhone-16-128-GB-Ultramarine/dp/B0DGJ7TGDR/ref=sr_1_1_sspa?adgrpid=60439252698&dib=eyJ2IjoiMSJ9.wa1Mduoi5z5GFkGrEynB8VggXnizsSnVkNuANSqWzf4I8suqY3-DogLYDbpQtZQfwN9ADxtbwlbRJ3jrAe0eEfIFwVEWrjECSu93OklaGovdEkUDVrXQqdz6vNAICfnK0cnJIvQnIlGWQmUiAiQf4oAsDSXAJsA3Fgi_uMgEKNNdbH9Xg8ISFZCuGy6-f60gLLn0YoZgFs6Gio-htkNQa6GcgUx9vDZhY3-goIz8vz8.qvZZZMB9sHyPjqKRT3nnvPSvxsj9rGBi27LRq4Sr124&dib_tag=se&ext_vrnc=hi&hvadid=590592770130&hvdev=c&hvlocphy=9148909&hvnetw=g&hvqmt=e&hvrand=15429866666308909833&hvtargid=kwd-298343829815&hydadcr=15396_2268336&keywords=iphone%2B16&mcid=a15f4ef45feb367fb8ee189bf63ff053&qid=1737907711&sr=8-1-spons&sp_csd=d2lkZ2V0TmFtZT1zcF9hdGY&th=1"
    }

In [249]:
# Scrape Price, Discount, Rating, Review for each product

In [250]:
# Dataframe to store the scraped data - Product_name, Price, Discount, Date 
competitor_data_today = pd.DataFrame(columns=["Product_name", "Price", "Date"])


In [251]:
# Create review dataframe
# reviews_today = product_name, review
reviews_today = pd.DataFrame(columns=["Product_name", "Review"])

In [252]:
for product, url in URLS.items():
    # create a dictionary to store the data
    competitor_data = {"Product_name": [], "Price": [], "Discount": [], "Date": time.strftime("%Y-%m-%d")}
    reviews_data = {"Product_name": [], "Review": []}
    # get the page content
    page = requests.get(url, headers=HEADERS)
    # create a BeautifulSoup object
    soup = BeautifulSoup(page.content, "html.parser")
    # get the title
    title = get_title(soup)
    # get the price
    price = get_price(soup)
    # get the discount
    discount = get_discount(soup)
    # get the review
    review = get_review(soup)
    # store the data in the dictionary
    # competitor_data["Product_name"].append(title)
    competitor_data["Product_name"].append(product)
    competitor_data["Price"].append(price)
    competitor_data["Discount"].append(discount)

    # Add the data to the dataframe
    competitor_data_today = pd.concat([competitor_data_today, pd.DataFrame(competitor_data)])

    # store the review data in the dictionary
    reviews_data["Product_name"].append(product)
    reviews_data["Review"].append(review)

    # Add the review data to the dataframe
    reviews_today = pd.concat([reviews_today, pd.DataFrame(reviews_data)])
    


In [253]:
competitor_data_today

Unnamed: 0,Product_name,Price,Date,Discount
0,Apple Iphone 16 Plus,,2025-01-26,
0,Apple IPhone 15,,2025-01-26,
0,Apple Iphone 15 Plus,,2025-01-26,
0,Apple Iphone 16,,2025-01-26,


In [254]:
reviews_today

Unnamed: 0,Product_name,Review
0,Apple Iphone 16 Plus,
0,Apple IPhone 15,
0,Apple Iphone 15 Plus,
0,Apple Iphone 16,Excellent product from Apple


In [255]:
# Save the data to a CSV file with today's date
today = time.strftime("%Y-%m-%d")
competitor_data_today.to_csv(f"competitor_data_{today}.csv", index=False)


In [256]:
# Save the review data to a CSV file with today's date
reviews_today.to_csv(f"reviews_data_{today}.csv", index=False)