In [108]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import numpy as np

In [152]:
# Function to extract product titles
def extract_titles(soup):
    """Extract product titles from the HTML soup."""
    titles = []
    for h2_tag in soup.find_all("h2"):
        span = h2_tag.find("span")
        if span and span.text.strip():
            titles.append(span.text.strip())
    return titles

# Function to extract product prices
def extract_price(soup):
    """Extract product prices from the HTML soup."""
    prices = []
    for price_tag in soup.find_all("span", attrs={"class": "a-price-whole"}):
        if price_tag.text.strip():
            prices.append(price_tag.text.strip())
    return prices

def extract_image_urls(soup):
    image_urls = []
    for img_tag in soup.find_all("img"):
        img_url = img_tag.get("src")
        if img_url and "https://" in img_url:
            image_urls.append(img_url)
    return image_urls

def extract_ratings(soup):
    ratings = []
    # Find all i tags with the specified data-cy attribute
    for rating_tag in soup.find_all("i", attrs={"data-cy": "reviews-ratings-slot"}):
        # Find the span tag inside the i tag
        span = rating_tag.find("span", class_="a-icon-alt")
        if span and span.text.strip():
            ratings.append(span.text.strip())
    return ratings


In [154]:
# Main section to run the scraping
if __name__ == '__main__':
    # Define URL and Headers to mimic browser requests
    URL = "https://www.amazon.in/s?k=samsung+under+20000+5g+phone&crid=1D9GVMTW0PQ82&sprefix=samsung+under%2Caps%2C366&ref=nb_sb_ss_ts-doa-p_5_13"

    HEADERS = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
        "Accept-Language": "en-US, en;q=0.5"
    }

    try:
        # HTTP Request to get the HTML content of the main search page
        webpage = requests.get(URL, headers=HEADERS)

        if webpage.status_code != 200:
            print(f"Failed to fetch the main page. Status code: {webpage.status_code}")
            exit()

        # Parse the HTML content with BeautifulSoup
        soup = BeautifulSoup(webpage.content, "html.parser")

        # Dictionary to store extracted product details
        d = {
            "Title": [], "Price": [], "Image_URL": [], "Ratings": []
        }

        # Extract product titles and prices using the functions
        product_titles = extract_titles(soup)
        product_prices = extract_price(soup)
        product_image_urls = extract_image_urls(soup)
        product_ratings = extract_ratings(soup)

        # Ensure that the lists have the same length
        min_length = min(len(product_titles), len(product_prices), len(product_image_urls), len(product_ratings))
        d['Title'] = product_titles[:min_length]
        d['Price'] = product_prices[:min_length]
        d['Image_URL'] = product_image_urls[:min_length]
        d['Ratings'] = product_ratings[:min_length]

        # Convert dictionary to a DataFrame
        amazon_df2 = pd.DataFrame.from_dict(d)

        # Save the DataFrame to a CSV file
        amazon_df2.to_csv("amazon_data.csv", header=True, index=False)
        print("Data successfully saved to 'amazon_data.csv'.")

        print(amazon_df2)

    except requests.RequestException as e:
        print(f"An error occurred while making the request: {e}")

    except Exception as ex:
        print(f"An unexpected error occurred: {ex}")


Data successfully saved to 'amazon_data.csv'.
                                                Title   Price  \
0                             1-16 of 446 results for  16,499   
1   Samsung Galaxy A16 5G (Light Green, 6GB RAM, 1...  12,999   
2   Samsung Galaxy M15 5G Prime Edition (Stone Gre...  12,999   
3   Samsung Galaxy M15 5G Prime Edition (Celestial...  17,999   
4   OnePlus Nord CE4 Lite 5G (Super Silver, 8GB RA...  11,609   
5   Samsung Galaxy A14 5G (Dark Red, 4GB, 128GB St...  17,490   
6   Samsung Galaxy M33 5G (Mystique Green, 8GB, 12...  12,999   
7   Samsung Galaxy M15 5G Prime Edition (Blue Topa...  19,287   
8      Samsung Galaxy f55 5g Apricot Crush 8GB 128 GB  14,499   
9   Samsung Galaxy M15 5G Prime Edition (Celestial...  11,999   
10  Samsung Galaxy M15 5G Prime Edition (Stone Gre...  14,499   
11  Samsung Galaxy M15 5G Prime Edition (Blue Topa...  19,990   
12  Samsung Galaxy M53 5G (Deep Ocean Blue, 6GB, 1...  11,999   
13  Samsung Galaxy M15 5G Prime Edition (Cel