In [161]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

#config
base_url = "https://www.avvatarindia.com"
LIMIT = 100 #change limit according to your own needs
to_visit = [base_url]
visited_urls = set()
products = []
seen_products = set()

def crawler():
    crawl_count = 0

    while to_visit and crawl_count < LIMIT:
        current_url = to_visit.pop()
        print(f'Crawling: {current_url}')

        if current_url in visited_urls:
            continue

        try:
            response = requests.get(current_url, timeout=10)
            soup = BeautifulSoup(response.content, "html.parser")
        except Exception as e:
            print(f"Failed to fetch {current_url}: {e}")
            continue

        visited_urls.add(current_url)
        crawl_count += 1

        #extract products
        product_blocks = soup.find_all("div", class_="product-body")
        for block in product_blocks:
            try:
                #getting the product itle
                title_elem = block.find("h3", class_="product-title")
                title = title_elem.get_text(strip=True) if title_elem else "N/A"

                #getting the price
                price_elem = block.find_next("span", class_="new-price")
                price = price_elem.get_text(strip=True) if price_elem else "N/A"

                #getting the product url from <a> inside <figure class="product-media"> 
                figure = block.find_previous("figure", class_="product-media")
                a_tag = figure.find("a", href=True) if figure else None
                product_url = urljoin(base_url, a_tag["href"]) if a_tag else "N/A"

                unique_key = (title, price)

                if unique_key not in seen_products:
                    products.append({
                        "name": title,
                        "price": price,
                        "url": product_url
                    })
                    seen_products.add(unique_key)
                    print(f"{title} | {price} | {product_url}")

            except Exception as e:
                print(f"Error extracting product from {current_url}: {e}")

        #finding links
        for link in soup.find_all('a', href=True):
            href = link['href']
            full_url = urljoin(base_url, href)

            if urlparse(full_url).netloc == urlparse(base_url).netloc:
                if full_url not in visited_urls and full_url not in to_visit:
                    to_visit.append(full_url)

    print(f"\nCrawling complete, visited {crawl_count} pages.")
    print(f"Total Unique Products Scraped: {len(products)}")

# Run the crawler
crawler()


Crawling: https://www.avvatarindia.com
Avvatar Whey Protein | 4 Kg | Chocolate Hazelnut Flavour | ₹ 10499 | https://www.avvatarindia.com/product/whey-protein-chocolate-hazelnut-flavour-4-kg
Avvatar Whey Protein | 2 Kg | Malai Kulfi Flavour | ₹ 5499 | https://www.avvatarindia.com/product/whey-protein-malai-kulfi-flavour-2-kg
Avvatar Whey Protein | 4 Kg | Malai Kulfi Flavour | ₹ 10499 | https://www.avvatarindia.com/product/whey-protein-malai-kulfi-flavour-4-kg
Avvatar Isorich Protein | 1 Kg | Malai Kulfi Flavour | ₹ 3899 | https://www.avvatarindia.com/product/isorich-protein-malai-kulfi-flavour-1-kg
Avvatar Whey Protein | 1 Kg | Raw Unflavoured | ₹ 2299 | https://www.avvatarindia.com/product/whey-protein-raw-unflavoured-1-kg
Avvatar Whey Protein | 4 Kg | Raw Unflavoured | ₹ 8899 | https://www.avvatarindia.com/product/whey-protein-raw-unflavoured-4-kg
Avvatar Creatine Monohydrate | 100G | Unflavoured | ₹ 499 | https://www.avvatarindia.com/product/prepost-workout-series-avvatar-micronised-

Avvatar Nitro Iso Whey | 2 Kg | Belgian Chocolate Flavour | ₹5699 | https://www.avvatarindia.com/product/nitro-iso-whey-belgian-chocolate-flavour-2-kg
Crawling: https://www.avvatarindia.com/blogs
Crawling: https://www.avvatarindia.com/blogs/fitness-trends-and-updates
Crawling: https://www.avvatarindia.com/login.html
Crawling: https://www.avvatarindia.com#page-top
Crawling: https://www.avvatarindia.com/blogs/experts-speaks
Crawling: https://www.avvatarindia.com/blog/abc
Crawling: https://www.avvatarindia.com/blog/abcdt
Crawling: https://www.avvatarindia.com/blog/at-aut-eiusmod-velit
Crawling: https://www.avvatarindia.com/product/protein-wafer-bar-chocolate-flavour-320-g
Crawling: https://www.avvatarindia.com/product/100-performance-whey-assorted-power-pack-175g-2504073931-426
Crawling: https://www.avvatarindia.com/product/100-performance-whey-assorted-power-pack-175g
Crawling: https://www.avvatarindia.com/product/5-sachet-travel-pack-cold-coffee-flavour-175g
Crawling: https://www.avvata

Avvatar Isorich Protein | 1 Kg | Malai Kulfi Flavour | ₹3899 | https://www.avvatarindia.com/product/isorich-protein-malai-kulfi-flavour-1-kg
Crawling: https://www.avvatarindia.com/product/isorich-protein-mango-rush-flavour-2-kg
Failed to fetch https://www.avvatarindia.com/product/isorich-protein-mango-rush-flavour-2-kg: HTTPSConnectionPool(host='www.avvatarindia.com', port=443): Read timed out.
Crawling: https://www.avvatarindia.com/product/isorich-protein-mango-rush-flavour-1-kg
Crawling: https://www.avvatarindia.com/product/isorich-protein-mango-rush-flavour-2-kg
Crawling: https://www.avvatarindia.com/product/isorich-protein-belgian-chocolate-flavour-4-kg
Crawling: https://www.avvatarindia.com/product/isorich-protein-belgian-chocolate-flavour-2-kg
Crawling: https://www.avvatarindia.com/product/isorich-protein-belgian-chocolate-flavour-1-kg
Crawling: https://www.avvatarindia.com/product/isorich-protein-malai-kulfi-flavour-4-kg
Crawling: https://www.avvatarindia.com/product/isorich-pro

In [149]:
import pandas as pd
product_data = pd.DataFrame(products)

In [148]:
#save as csv
# product.to_csv("product_data.csv")

In [170]:
#function to find stuff withing the dataframe
def product_lookup(dataframe, name_lookup):
    df = pd.DataFrame(dataframe)
    
    df['price'] = df['price'].str.replace("₹", "").str.replace(" ", "").astype(int)
    result = df[df['name'].str.contains(name_lookup, case=False)].sort_values('price')
    return pd.DataFrame(result)

In [174]:
product_lookup(product_data, 'Whey')

Unnamed: 0,name,price,url
24,Avvatar 100% Performance Whey | 175G | Raw Unf...,549,https://www.avvatarindia.com/product/100-perfo...
94,Avvatar 100% Performance Whey | 175G | Raw Unf...,549,https://www.avvatarindia.com/product/100-perfo...
22,Avvatar 100% Performance Whey | 175G | Belgian...,599,https://www.avvatarindia.com/product/100-perfo...
21,Avvatar 100% Performance Whey | 175G | Malai K...,599,https://www.avvatarindia.com/product/100-perfo...
25,Avvatar 100% Performance Whey | 175G | Cold Co...,599,https://www.avvatarindia.com/product/5-sachet-...
...,...,...,...
53,Avvatar Whey Protein | 4 Kg | Belgian Chocolat...,10499,https://www.avvatarindia.com/product/whey-prot...
62,Avvatar Whey Protein | 4 Kg | Malai Kulfi Flavour,10499,https://www.avvatarindia.com/product/whey-prot...
57,Avvatar Whey Protein | 4 Kg | Chocolate Hazeln...,10499,https://www.avvatarindia.com/product/whey-prot...
65,Avvatar Whey Protein | 4 Kg | Mango Rush Flavour,10499,https://www.avvatarindia.com/product/whey-prot...
