In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

# Define functions for extracting details
def get_title(soup):
    try:
        title = soup.find("span", attrs={"id": 'productTitle'}).get_text(strip=True)
    except AttributeError:
        title = ""
    return title

def get_price(soup):
    try:
        price = soup.find("span", attrs={"class": 'a-price-whole'}).get_text(strip=True)
    except AttributeError:
        price = ""
    return price

def get_rating(soup):
    try:
        rating = soup.find("span", attrs={"class": 'a-icon-alt'}).get_text(strip=True)
    except AttributeError:
        rating = ""
    return rating

def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={"id": 'acrCustomerReviewText'}).get_text(strip=True)
    except AttributeError:
        review_count = ""
    return review_count

def get_availability(soup):
    try:
        availability = soup.find("div", attrs={"id": 'availability'}).find("span").get_text(strip=True)
    except AttributeError:
        availability = "Not Available"
    return availability

if __name__ == '__main__':

    page = range(1, 11)

    # Add your user agent
    HEADERS = ({'User-Agent':'', 'Accept-Language': 'en-US, en;q=0.5'})

    # The webpage URL
    URL = "https://www.amazon.com/s?k=mouse&page={page}"

    # HTTP Request
    webpage = requests.get(URL, headers=HEADERS)

    # Soup Object containing all data
    soup = BeautifulSoup(webpage.content, "html.parser")

    # Fetch links as List of Tag Objects
    links = soup.find_all("a", attrs={'class':'a-link-normal s-no-outline'})

    # Store the links
    links_list = []

    # Loop for extracting links from Tag Objects
    for link in links:
        links_list.append(link.get('href'))

    # Dictionary to store product details
    d = {"title": [], "price": [], "rating": [], "reviews": [], "availability": []}

    # Loop for extracting product details from each link
    for link in links_list:
        new_webpage = requests.get("https://www.amazon.com" + link, headers=HEADERS)
        new_soup = BeautifulSoup(new_webpage.content, "html.parser")

        # Function calls to display all necessary product information
        d['title'].append(get_title(new_soup))
        d['price'].append(get_price(new_soup))
        d['rating'].append(get_rating(new_soup))
        d['reviews'].append(get_review_count(new_soup))
        d['availability'].append(get_availability(new_soup))

    # Create DataFrame from dictionary
    amazon_df = pd.DataFrame.from_dict(d)

    # Replace empty strings with NaN in the 'title' column
    amazon_df['title'] = amazon_df['title'].replace('', np.nan)

    # Drop rows with NaN in the 'title' column
    amazon_df = amazon_df.dropna(subset=['title'])

    # Export to CSV
    amazon_df.to_csv("amazon_data.csv", header=True, index=False)


In [None]:
amazon_df

Unnamed: 0,title,price,rating,reviews,availability
1,Logitech M510 Wireless Computer Mouse for PC w...,27.0,4.6 out of 5 stars,"31,012 ratings",In Stock
2,Logitech MX Master 3S - Wireless Performance M...,83.0,4.6 out of 5 stars,9099,Not Available
4,Logitech G305 LIGHTSPEED Wireless Gaming Mouse...,28.0,4.6 out of 5 stars,31283,Not Available
7,Logitech MX Vertical Wireless Mouse – Ergonomi...,68.0,4.4 out of 5 stars,13570,Not Available
9,Razer DeathAdder Essential Gaming Mouse: 6400 ...,20.0,4.4 out of 5 stars,"11,947 ratings",In Stock
12,Logitech G502 HERO High Performance Wired Gami...,38.0,4.7 out of 5 stars,53153,Not Available
15,"VssoPlor Wireless Mouse, 2.4G Slim Portable Co...",9.0,4.6 out of 5 stars,"22,322 ratings",In Stock


In [None]:
from google.colab import files
files.download('amazon_data.csv')
print("sucessfully downloaded")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

sucessfully downloaded
