# EarBuddies, Ltd.: Scraping Amazon For You

¿Eres todo oídos para los mejores auriculares Bluetooth? ¡No busques más allá de EarBuddies! Hacemos el scraping por ti, analizando los últimos y mejores auriculares del mercado. Di adiós al remordimiento de conciencia y hola a los auriculares perfectos con EarBuddies.

In [2]:
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
import numpy as np

# Functions used

In [3]:
# Function to extract Product Title
def get_title(soup):
    
    try:
        # Outer Tag Object
        title = soup.find('span', attrs ={'id':'productTitle'})
        
        # Inner string object
        title_value = title.text
        
        # Title as a string value
        title_string = title_value.strip()
    
    except AttributeError:
        title_string = ''
    
    return title_string

# Function to extract Product Price
def get_price(soup):
    
    try: 
        price = soup.find('span', attrs={'class':'a-offscreen'}).string.strip()
        
    except AttributeError:
        price = ''
    
    return price

# Function to extract Product Rating
def get_rating(soup):
    
    try:
        rating = soup.find('span', attrs={'class':'a-icon-alt'}).string.strip()
        
    except AttributeError:
        rating = ''
    
    return rating

# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find('span', attrs={'id':'acrCustomerReviewText'}).string.strip()
    
    except AttributeError:
        review_count = ''
        
    return review_count



# Function to extract Availability Status
def get_availability(soup):
    try:
        available = soup.find('div', attrs={'id':'availability'})
        available = available.find("span").string.strip()

    except AttributeError:
        available = 'Not Available' 
    
    return available
        

In [6]:
#  Adding my user agent
headers = ({'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', 'Acccept-Language':'en-US, en;q=0.5'})
    
# Website's URL
url = 'https://www.amazon.com/s?k=bluetooth+earbuds&i=electronics-intl-ship&crid=9K0VAJEV4J9S&sprefix=bluetooth+earbuds%2Celectronics-intl-ship%2C700&ref=nb_sb_noss_1'

# HTTP request
webpage = requests.get(url, headers = headers)
    
# Soup object containing all the data
soup = bs(webpage.content,'html.parser')
    
# Fetch links as list of tag objects
links = soup.find_all('a', attrs ={'class':'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal'})
    
# Store the links in a list
links_list = []
    
# Loop for extracting the links from tag objects
for link in links:
    links_list.append(link.get('href'))
    
d = {'title':[],'price':[],'rating':[],'reviews':[], 'availability':[]}
    
# Loop for extracting product details from each link
for link in links_list:
    new_webpage = requests.get('https://www.amazon.com' + link, headers = headers)
    new_soup = bs(new_webpage.content, 'html.parser')
        
    # Function calls to display all necessary product information
    d['title'].append(get_title(new_soup))
    d['price'].append(get_price(new_soup))
    d['rating'].append(get_rating(new_soup))
    d['reviews'].append(get_review_count(new_soup))
    d['availability'].append(get_availability(new_soup))

        
df_amazon = pd.DataFrame.from_dict(d)
df_amazon['title'].replace('', np.nan, inplace=True)
df_amazon = df_amazon.dropna(subset=['title'])
df_amazon.to_csv('amazon_scraping.csv', header =True, index=False)

In [4]:
df_amazon

Unnamed: 0,title,price,rating,reviews,availability
0,"A40 Pro Wireless Earbuds, 50Hrs Playtime Bluet...",$29.99,4.9 out of 5 stars,47 ratings,In Stock.
1,TOZO A3 Wireless Earbuds Bluetooth 5.3 Half in...,$14.44,4.5 out of 5 stars,549 ratings,In Stock.
2,"Wireless Earbuds, Bluetooth Earbuds Bluetooth ...",$29.99,4.4 out of 5 stars,"2,127 ratings",In Stock.
3,TOZO T6 True Wireless Earbuds Bluetooth 5.3 He...,$25.99,4.4 out of 5 stars,"236,633 ratings",In Stock.
4,TOZO T6 True Wireless Earbuds Bluetooth 5.3 He...,$25.99,4.4 out of 5 stars,"236,633 ratings",In Stock.
5,Bluetooth Headphones V5.3 Wireless Earbuds 50 ...,$25.49,4.7 out of 5 stars,"1,571 ratings",In Stock.
6,"ZIUTY Wireless Earbuds, V5.3 Headphones 50H Pl...",$26.99,4.8 out of 5 stars,"5,597 ratings",In Stock.
7,GOLREX Bluetooth Headphones Wireless Earbuds 3...,$41.99,4.8 out of 5 stars,"3,068 ratings",In Stock.
8,CAPOXO Wireless Earbuds Bluetooth Headphones 5...,$19.99,4.7 out of 5 stars,742 ratings,In Stock.
9,TOZO T10 Bluetooth 5.3 Wireless Earbuds with W...,$22.99,4.3 out of 5 stars,"336,312 ratings",In Stock.
