In [31]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [37]:
# Function to extract Product Title
def get_title(soup):

    try:
        # Outer Tag Object
        title = soup.find("span", attrs={"id":'productTitle'})
        
        # Inner NavigatableString Object
        title_value = title.text

        # Title as a string value
        title_string = title_value.strip()

    except AttributeError:
        title_string = ""

    return title_string


def get_price(soup):
    try:
        # Try to get the main price first
        price = soup.find("span", attrs={"class":'a-price-whole'}).text
        currency = soup.find("span", attrs={"class":'a-price-symbol'}).text
        return currency + price
    except AttributeError:
        try:
            # If it's a deal price
            price = soup.find("span", attrs={'id':'priceblock_dealprice'}).text
            return price
        except AttributeError:
            try:
                # Fallback to another price block
                price = soup.find("span", attrs={"class":'a-price'}).find("span", attrs={"class":'a-offscreen'}).text
                return price
            except AttributeError:
                return "Price Not Available"
# Function to extract Product Rating
def get_rating(soup):

    try:
        rating = soup.find("i", attrs={'class':'a-icon a-icon-star a-star-4-5'}).string.strip()
    
    except AttributeError:
        try:
            rating = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
        except:
            rating = ""	

    return rating

# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={'id':'acrCustomerReviewText'}).string.strip()

    except AttributeError:
        review_count = ""	

    return review_count

# Function to extract Availability Status
def get_availability(soup):
    try:
        available = soup.find("div", attrs={'id':'availability'})
        available = available.find("span").string.strip()

    except AttributeError:
        available = "Not Available"	

    return available


In [162]:

if __name__ == '__main__':

    # add your user agent 
    HEADERS =({'User_Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36', 'Accept-Language' : 'en-US, en;q=0.5'})
    # The webpage URL
    URL = "https://www.amazon.in/s?k=smart+locks&page=7&crid=2NARLW8NQXPB1&qid=1722832538&sprefix=smart+lock%2Caps%2C239&ref=sr_pg_7"
    # HTTP Request
    webpage = requests.get(URL, headers=HEADERS)

    # Soup Object containing all data
    soup = BeautifulSoup(webpage.content, "html.parser")

    # Fetch links as List of Tag Objects
    links = soup.find_all("a", attrs={'class':'a-link-normal s-no-outline'})

    # Store the links
    links_list = []

    # Loop for extracting links from Tag Objects
    for link in links:
            links_list.append(link.get('href'))

    d = {"title":[], "price":[], "rating":[], "reviews":[],"availability":[]}
    
    # Loop for extracting product details from each link 
    for link in links_list:
        new_webpage = requests.get("https://www.amazon.in" + link, headers=HEADERS)

        new_soup = BeautifulSoup(new_webpage.content, "html.parser")

        # Function calls to display all necessary product information
        d['title'].append(get_title(new_soup))
        d['price'].append(get_price(new_soup))
        d['rating'].append(get_rating(new_soup))
        d['reviews'].append(get_review_count(new_soup))
        d['availability'].append(get_availability(new_soup))
    
    amazon_df = pd.DataFrame.from_dict(d)
    amazon_df['title'].replace('', np.nan, inplace=True)
    amazon_df = amazon_df.dropna(subset=['title'])
    amazon_df.to_csv("amazon_data.csv", header=True, index=False)


In [164]:
list_7 = amazon_df

In [250]:
list_final4 = pd.concat([list, list_6, list_7, list_4], ignore_index=True)
list_final4

Unnamed: 0,title,price,rating,reviews,availability
0,LAVNA Smart Fingerprint Padlock with 2 way acc...,"₹1,698.",4.2 out of 5 stars,8 ratings,In stock
1,"LAVNA Smart Door Lock LA16 with Fingerprint, B...","₹5,689.",4.2 out of 5 stars,233 ratings,In stock
2,LAVNA LA15 Smart Digital Door Lock with Finger...,"₹3,298.",4.3 out of 5 stars,31 ratings,In stock
3,QUBO Smart Door Lock Essential from Hero Group...,"₹8,990.",4.4 out of 5 stars,725 ratings,In stock
4,QUBO Smart Door Lock Essential from Hero Group...,"₹9,490.",4.2 out of 5 stars,338 ratings,In stock
...,...,...,...,...,...
298,Hafele RE-Size Digital Door Lock for Wooden Do...,"₹3,500.",Previous page,,Only 1 left in stock.
299,"Tuchware XS1100 Smart RFID+BLE, Door Lock - Ke...","₹7,399.",Previous page,,Only 2 left in stock.
300,Lockomatic Twister TBFLV1B Black Chrome Smart ...,"₹28,999.",Previous page,,In stock
301,"Tuchware Smart Lock XS3200 with Fingerprint, P...","₹8,999.",Previous page,,Only 2 left in stock.


In [251]:
list_final4['ranking'] = range(1, len(list_final4) + 1)
list_final4

Unnamed: 0,title,price,rating,reviews,availability,ranking
0,LAVNA Smart Fingerprint Padlock with 2 way acc...,"₹1,698.",4.2 out of 5 stars,8 ratings,In stock,1
1,"LAVNA Smart Door Lock LA16 with Fingerprint, B...","₹5,689.",4.2 out of 5 stars,233 ratings,In stock,2
2,LAVNA LA15 Smart Digital Door Lock with Finger...,"₹3,298.",4.3 out of 5 stars,31 ratings,In stock,3
3,QUBO Smart Door Lock Essential from Hero Group...,"₹8,990.",4.4 out of 5 stars,725 ratings,In stock,4
4,QUBO Smart Door Lock Essential from Hero Group...,"₹9,490.",4.2 out of 5 stars,338 ratings,In stock,5
...,...,...,...,...,...,...
298,Hafele RE-Size Digital Door Lock for Wooden Do...,"₹3,500.",Previous page,,Only 1 left in stock.,299
299,"Tuchware XS1100 Smart RFID+BLE, Door Lock - Ke...","₹7,399.",Previous page,,Only 2 left in stock.,300
300,Lockomatic Twister TBFLV1B Black Chrome Smart ...,"₹28,999.",Previous page,,In stock,301
301,"Tuchware Smart Lock XS3200 with Fingerprint, P...","₹8,999.",Previous page,,Only 2 left in stock.,302


In [257]:
import pandas as pd
import os

def export_to_csv(df, file_path):
  try:
    df.to_csv(file_path, index=False)
    print(f"CSV file exported successfully to: {file_path}")
  except Exception as e:
    print(f"Error exporting CSV: {e}")

# Assuming you have a DataFrame named 'list_final'
output_file = os.path.join('Creative Cloud Files', 'list_final4.csv')
export_to_csv(list_final, output_file)

CSV file exported successfully to: Creative Cloud Files/list_final4.csv


In [2]:
list_final4.head(20)

NameError: name 'list_final4' is not defined

In [263]:

# Assuming you have a DataFrame named 'final_list_4'
list_final4.to_excel('final_list_4.xlsx', index=False)
