In [62]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [63]:
URL ="https://www.biba.in/kurtas-and-tops/kurtas/?"

In [64]:
HEADERS = ({'User-Agent':'': 'en-US, en;q=0.5'})

In [65]:
# Function to extract Product Title
def get_title(soup):

    try:
        title_element = soup.find('h3', class_='pdp-link')
        title_string = title_element.get_text(strip=True)

    except AttributeError:
        title_string = ""

    return title_string

# print(get_title(soup=soup))

In [66]:
def get_pid(soup):
    # Find the div with class 'product-tile' and get the value of 'data-pid' attribute
    product_div = soup.find('div', class_='product-tile')
    pid = product_div.get('data-pid')
    return pid
    # print("Product ID:", pid)

In [67]:
# Function to extract Product Price
def get_price(soup):

    try:
        price_element = soup.find('span', class_='sales')
        price = price_element.get_text(strip=True)
        # print("Price:", price)
        

    except AttributeError:
        price= None

    return price

In [68]:
# Function to extract Product Price
def get_discount(soup):

    try:
        discount_element = soup.find('span', class_='product-discount')
        discount = discount_element.get_text(strip=True)
        # print("Discount:", discount)
        

    except AttributeError:
        discount= None

    return discount

In [69]:
# div_lst=soup.findAll("div", class_= "col-6 col-sm-6 col-md-3")
# len(div_lst)

In [70]:
# Function to extract product data
def extract_product_data(url):
    webpage = requests.get(url=URL, headers= HEADERS)
    soup= BeautifulSoup(webpage.content, "html.parser")
    products = soup.findAll("div", class_= "col-6 col-sm-6 col-md-3")

    product_data = []

    for product in products:
        pid = get_pid(product)
        title = get_title(product)
        price = get_price(product)
        discount = get_discount(product)

        product_info = {
            'Product_Id': pid,
            'Title': title,
            'Price': price,
            'Discount': discount
        }

        product_data.append(product_info)

    return product_data

# Extract product data from the webpage
product_data = extract_product_data(soup)

# Convert the list of dictionaries into a DataFrame
df = pd.DataFrame(product_data)

# Print the DataFrame
print(df)


                  Product_Id  \
0          MNTGE19372AW23BRN   
1        COLORPO19315SS23PNK   
2          ASRTD19754AW23NVY   
3        JAMAWAR20167SS24BLU   
4      ASSORTED20018SS24TEAL   
5        JAMAWAR20171SS24BRN   
6       ASSORTED20616SS24MRN   
7     CHINTZPOP20226SS24OWHT   
8   ASSORTED20405SS24YELTEAL   
9     TOPSTUNICS20193SS24PNK   
10   SUMMERSTPLS20260SS24WHT   
11   BOHOSTORY20139SS24LILAC   

                                                Title      Price Discount  
0   Brown LIVA Straight Printed KurtaBrown LIVA St...     ₹1,399  30% OFF  
1   Pink Cotton Straight Embroidered KurtaPink Cot...     ₹2,099  30% OFF  
2   Navy LIVA Straight Printed KurtaNavy LIVA Stra...       ₹909  30% OFF  
3   Blue Viscose Straight KurtaBlue Viscose Straig...  MRP₹1,799     None  
4   Teal Cotton Straight KurtaTeal Cotton Straight...  MRP₹1,599     None  
5   Brown Viscose Straight KurtaBrown Viscose Stra...  MRP₹1,799     None  
6   Maroon Rayon Straight KurtaMaroon Rayon Straig.

In [71]:
# Function to scrape data from multiple pages
def scrape_all_pages(base_url, num_pages):
    all_data = []

    for page in range(1, num_pages + 1):
        url = f"{base_url}&page={page}"  # Append page number to the base URL
        data = extract_product_data(url)
        all_data.extend(data)

    return all_data

In [75]:
all_data= scrape_all_pages(base_url= URL, num_pages=65)

In [77]:
# Save the DataFrame to a CSV file
df= pd.DataFrame(all_data)
df.to_csv('biba_products.csv', index=False)