In [5]:
from selenium import webdriver # used to control a web browser programmatically
from selenium.webdriver.chrome.service import Service  # specify the path to the chromedriver executable that controls browser
from selenium.webdriver.support.ui import WebDriverWait # this is useful for waiting elements to load an webpage
from selenium.webdriver.support import expected_conditions as EC # use with webdriverwait. for example, waiting until an element is visible or click
from selenium.webdriver.support.ui import Select # the select class is used to interact with dropdown menus in forms on web pages
from selenium.webdriver.common.by import By # used to locate elements
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

import time
import pandas as pd

In [2]:
#https://www.amazon.in/s?i=watches&bbn=2563504031&rh=n%3A2563504031%2Cp_89%3ACasio%7CFastrack%7CFossil%7CSonata%7CTIMEX%7CTitan&dc&page={i}&qid=1728809727&rnid=3837712031&ref=sr_pg_{i}

In [7]:
# Set up Selenium
options = Options()
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

def scrape_books(url):
    # Initialize a list to store WATCH data
    book_data = []
    page_count = 0  # Initialize a counter for pages

    while url and page_count < 2:  # Limit to 20 pages
        driver.get(url)

        # Scroll down to load more WATCH
        last_height = driver.execute_script("return document.body.scrollHeight")
        
        while True:
            # Scroll down to the bottom
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(3)  # Wait for new content to load


            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break  # No more content to load
            last_height = new_height

        # Extract WATCH details
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        books = soup.find_all('div', class_='a-section a-spacing-base a-text-center')  # Adjust class as needed

        for book in books:
            try:
                # Get brand
                brand = book.find('span', class_='a-size-base-plus a-color-base').text.strip() if book.find('span', class_='a-size-base-plus a-color-base') else 'N/A'

                # Get name
                name = book.find('span', class_='a-size-base-plus a-color-base a-text-normal').text.strip() if book.find('span', class_='a-size-base-plus a-color-base a-text-normal') else 'N/A'
                
                # Get price
                price = book.find('span', class_='a-price-whole').text.strip() if book.find('span', class_='a-price-whole') else 'N/A'

                # Get slashed_price
                slashed_price = book.find('span', class_="a-price a-text-price").text.strip() if book.find('span', class_="a-price a-text-price") else 'N/A'
                
                # Get review
                review = book.find('span', class_='a-icon-alt').text.strip() if book.find('span', class_='a-icon-alt') else 'N/A'

                # Get No of ratings_count
                review_count = book.find('span', class_='a-size-base s-underline-text').text.strip() if book.find('span', class_='a-size-base s-underline-text') else 'N/A'

                # Get discount
                discount = book.find('div', class_="a-row a-size-base a-color-base").text.strip() if book.find('div', class_="a-row a-size-base a-color-base") else 'N/A'
                
                # Print the details in the same row
                #print(f"{name}, {price}, {rating}, {no_of_ratings}")
                
                # Append the details to the list
                book_data.append({
                    'Brand': brand,
                    'Name': name,
                    'Price': price,
                    'Slashed_price': slashed_price,
                    'Review': review,
                    'Review_count': review_count,
                    'Discount' :discount
                    

                })
            
            except Exception as e:
                print(f"Error processing book: {e}")
                
        # Increment the page counter
        page_count += 1

        # Find the next page URL
        next_page = soup.find('div', class_='a-section a-text-center s-pagination-container')
        if next_page and next_page.find('a'):
            url = 'https://www.amazon.in' + next_page.find('a')['href']
        else:
            url = None  # No more pages
            
    # Convert list to DataFrame
    df = pd.DataFrame(book_data)
    return df

# Start scraping from the first page
df_books = scrape_books('https://www.amazon.in/s?i=watches&bbn=2563504031&rh=n%3A2563504031%2Cp_89%3ACasio%7CFastrack%7CFossil%7CSonata%7CTIMEX%7CTitan&dc&page=3&qid=1728809727&rnid=3837712031&ref=sr_pg_3')
driver.quit()  # Close the browser when done

In [9]:
df_books

Unnamed: 0,Brand,Name,Price,Slashed_price,Review,Review_count,Discount
0,Casio,Analog Black Dial Men's Watch - MTP-V300L-1AUD...,2884,"₹3,795₹3,795",4.2 out of 5 stars,902,"₹2,884₹2,884 M.R.P: ₹3,795₹3,795 (24% off)"
1,Casio,Enticer Analog Black Dial Men's Watch - MTP-VD...,3775,"₹4,195₹4,195",4.4 out of 5 stars,1027,"₹3,775₹3,775 M.R.P: ₹4,195₹4,195 (10% off)"
2,Casio,Vintage A168WEMB-1BDF Black Digital Dial Silve...,5295,,4.2 out of 5 stars,168,"₹5,295₹5,295"
3,Sonata,Sonata Poze Quartz Green Dial Silver Metal Str...,1295,"₹1,850₹1,850",3.9 out of 5 stars,12,"₹1,295₹1,295 M.R.P: ₹1,850₹1,850 (30% off)"
4,Sonata,Sonata Analog Champagne Dial Men's Watch-NN770...,719,₹899₹899,4.2 out of 5 stars,4219,₹719₹719 M.R.P: ₹899₹899 (20% off)
...,...,...,...,...,...,...,...
91,Fastrack,AnaDigi Black Dial Plastic Strap Unisex Analog...,1756,"₹2,195₹2,195",4.2 out of 5 stars,36,"₹1,756₹1,756 M.R.P: ₹2,195₹2,195 (20% off)"
92,Titan,Octane Quartz Chronograph White Dial Stainless...,8436,"₹10,545₹10,545",4.4 out of 5 stars,104,"₹8,436₹8,436 M.R.P: ₹10,545₹10,545 (20% off)"
93,Titan,Bandhan Quartz Analog with Date Silver Dial St...,7195,"₹7,995₹7,995",4.4 out of 5 stars,125,"₹7,195₹7,195 M.R.P: ₹7,995₹7,995 (10% off)"
94,Titan,Karishma Stainless Steel Analog Silver Dial Me...,2676,"₹3,395₹3,395",4.3 out of 5 stars,566,"₹2,676₹2,676 M.R.P: ₹3,395₹3,395 (21% off)"


In [15]:
df_books.to_csv('Amazon_watch_finals.csv')

In [47]:
import pandas as pd
data=pd.read_excel("C:/Users/kural/Amazon_watch_final_excelworkbook_2.xlsx")
data

Unnamed: 0,Brand,Name,Price,Slashed_price_tag,Review,Reviews,Review_count,Discount_percentage
0,Fastrack,Fastrack Limitless Glide Advanced UltraVU HD D...,1199,2799,3.9 out of 5 stars,3.9,2392,(57%off)
1,TIMEX,TIMEX Analog Blue Dial Men's Watch-TW000U931,1647,3295,4.2 out of 5 stars,4.2,5379,(50%off)
2,Sonata,Quartz Analog with Day and Date White Dial Sta...,956,1149,4.1 out of 5 stars,4.1,387,(17%off)
3,Titan,Mens Analog Black Dial Black Band Stainless St...,4793,5995,4.3 out of 5 stars,4.3,90,(20%off)
4,Titan,Mens Analog Green Dial Brown Band Leather Watch,14708,0,4.0 out of 5 stars,4.0,26,(0%off)
...,...,...,...,...,...,...,...,...
1157,Sonata,Digital Grey Dial Men's Watch-NL77072PP02/NP77...,576,649,4.3 out of 5 stars,4.3,1809,(11%off)
1158,Fastrack,Analog Unisex-Adult Watch,597,995,4.3 out of 5 stars,4.3,136,(40%off)
1159,Fastrack,Analog Men's Watch (Dial Colored Strap),1396,1850,4.2 out of 5 stars,4.2,2661,(25%off)
1160,Sonata,Quartz Analog Black Dial Stainless Steel Strap...,796,995,4.3 out of 5 stars,4.3,939,(20%off)


In [19]:
#pip install pandas mysql-connector-python openpyxl

In [29]:
import pandas as pd
import mysql.connector
from mysql.connector import errorcode

# Load the cleaned dataset
data = pd.read_csv('C:/Users/kural/Amazon_watch_final_csv_1.csv')

# MySQL database connection
config = {
    "user": "root",
    "password": "Dharani@2401",
    "host": "localhost",
    "database": "amazon_data",
    "raise_on_warnings": True
}

try:
    # Connect to the database
    connection = mysql.connector.connect(**config)
    cursor = connection.cursor()

    # Insert data into the Products table
    for index, row in data.iterrows():
        cursor.execute("""
            INSERT INTO Products (Brand,Name,Price,Slashed_price_tag,Review,Reviews,Review_count,Discount_percentage)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['Brand'],
            row['Name'],  
            row['Price'],
            row['Slashed_price_tag'],
            row['Review'], 
            row['Reviews'],
            row['Review_count'],
            row['Discount_percentage'],
            
        ))

    # Commit the transaction
    connection.commit()

except mysql.connector.Error as err:
    if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
        print("Something is wrong with your user name or password")
    elif err.errno == errorcode.ER_BAD_DB_ERROR:
        print("Database does not exist")
    else:
        print(err)
finally:
    cursor.close()
    connection.close()

# Statistical analysis

## one way anova

In [49]:
# Grouping prices by brand
grouped_data = [group['Price'].values for name, group in data.groupby('Brand')]

# Performing the ANOVA test
f_statistic, p_value = stats.f_oneway(*grouped_data)

# Output the results
print(f"F-statistic: {f_statistic:.4f}, P-value: {p_value:.4f}")

# Interpretation
if p_value < 0.05:
    print("Reject the null hypothesis: There are significant differences in average prices among the brands.")
else:
    print("Fail to reject the null hypothesis: No significant differences in average prices among the brands.")

F-statistic: 195.2426, P-value: 0.0000
Reject the null hypothesis: There are significant differences in average prices among the brands.


# Hypothesis Formulation:

#### Null Hypothesis (H0): There is no significant difference in average prices among the different brands.
#### Alternative Hypothesis (H1): There is a significant difference in average prices among the different brands.