In [1]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Set up Selenium options and driver
options = Options()
options.add_argument("--disable-gpu")
# options.add_argument("--headless")  # Optional: Run in headless mode

service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

# Navigate to the page
base_url = "https://www.nykaa.com/luxe/brands/clinique/c/3814?"
params = "transaction_id=4f155750ba7b2edde1dae2503f6bf1db&intcmp=nykaa:sp:skin-native-desktop:skin:luxe-indulgences-v2-slider:SLIDING_WIDGET_V2:13:Clinique:3814:4f155750ba7b2edde1dae2503f6bf1db"
paginationNumber = 1

# Allow initial time for the page to load
time.sleep(5)

# Scroll incrementally until all products are loaded
scroll_pause_time = 2  # Wait time after each scroll

# Lists to store data
productLinks = []
products = []

while True:

    if paginationNumber == 1:
        url = f"{base_url}{params}"
    else:
        url = f"{base_url}page_no={paginationNumber}&sort=popularity&transaction_id=4f155750ba7b2edde1dae2503f6bf1db&intcmp=nykaa:sp:skin-native-desktop:skin:luxe-indulgences-v2-slider:SLIDING_WIDGET_V2:13:Clinique:3814:4f155750ba7b2edde1dae2503f6bf1db&eq=desktop"
    
    # Load the Page
    driver.get(url)
    print(f"Scraping Page {paginationNumber}: {url}")
    time.sleep(3)
    
    # Scroll to the bottom of the page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(scroll_pause_time)

    # Wait until the products are loaded
    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.productWrapper.css-17nge1h'))
        )
    except TimeoutException:
        print(f"Scraped the Total number of pages: {paginationNumber - 1}")
        break

    # Parse the page content with BeautifulSoup
    content = driver.page_source
    soup = BeautifulSoup(content, 'html.parser')
    
    # Find all product items
    deygaOrganicProducts = soup.findAll('div', class_='productWrapper css-17nge1h')
    if not deygaOrganicProducts:
        print("No Products found on this page.")
        break

    # Extract product links
    for product in deygaOrganicProducts:
        # Extract the Stock Status
        stockTag = product.find('span', class_ = "css-lg5xc9")
        if stockTag:
            stock = stockTag.get_text(strip = True)
        else:
            stock = "In Stock"

        # Extract the Product Highlight
        highlightTag = product.find("li", class_ = "custom-tag css-1bse542")
        if highlightTag:
            highlight = highlightTag.get_text(strip = True)
        else:
            highlight = "None"

        # Extract product link
        linkTag = product.find('a', href=True)
        if linkTag:
            productLink = linkTag['href']
            # Check if the link is a relative URL and add the base URL if necessary
            if not productLink.startswith("http"):  # Relative URL
                productLink = "https://www.nykaa.com" + productLink
            productLinks.append({"link" : productLink, "stock" : stock, "highlight" : highlight})
                
    for productData in productLinks:
        
        link = productData["link"]
        stock = productData["stock"]
        highlight = productData["highlight"]
        
        driver.get(link)
        time.sleep(3)  # Allow time for the page to load

        # Parse individual product page
        productPageContent = driver.page_source
        productSoup = BeautifulSoup(productPageContent, 'html.parser')
    
        # Extract Product Name
        productTag = productSoup.find('h1', class_='css-1gc4x7i')
        if productTag:
            prodName = productTag.get_text(strip=True)
        else:
            prodName = "No Description Available"
    
        # Extract Ratings
        ratingTag = productSoup.find('div', class_="css-1m0y15j")
        if ratingTag:
            ratingText = ratingTag.find('div', class_="css-m6n3ou")
            if ratingText:
                ratings = ratingText.get_text(strip = True)
            else:
                ratings = "N/A"
        else:
            ratings = "N/A"

        # Extract the Ratings & Reviews Count
        ratingCountTag = productSoup.find('div', class_ = "css-1eip5u4")
        if ratingCountTag:
            ratingCount = ratingCountTag.get_text(strip = True)
        else:
            ratingCount = "None"

        # Extract the Offer Price and Original Price
        productDiv = productSoup.find('div', class_ = "css-1d0jf8e")

        # Extract the first two (span) tags
        if productDiv:
            spanTags = productDiv.find_all('span')
            # print(len(spanTags))

            originalPrice = "N/A"
            offerPrice = "N/A"

            if len(spanTags) == 2:
                originalPrice = spanTags[1].get_text(strip = True)
                offerPrice = "N/A"
            elif len(spanTags) == 4:
                originalPrice = spanTags[1].get_text(strip = True)
                offerPrice = spanTags[2].get_text(strip = True)

            # Print the Values
            # print(f"Original Price: {originalPrice}, Offer Price: {offerPrice}")

        else:
            print("Product div not found, defaulting to Unavailable for prices")

        # Extract the Discount
        discountTag = productSoup.find('span', class_ = "css-bhhehx")
        if discountTag:
            discount = discountTag.get_text(strip = True)
        else:
            discount = "No Discount"

        products.append({
            "Product Brand" : "Clinique",
            "Product Name" : prodName,
            "Product Rating" : ratings,
            "Product Rating & Review Count" : ratingCount,
            "Product Original Price" : originalPrice, 
            "Product Offer Price" : offerPrice, 
            "Product Discount" : discount,
            "Product Highlight" : highlight,
            "Product Stock Status" : stock
        })

    # Clear the product links list to avoid the duplication
    productLinks.clear()

    paginationNumber += 1 # Increment the Page Number
    time.sleep(5)

# Close the browser after scraping
driver.quit()

Scraping Page 1: https://www.nykaa.com/luxe/brands/clinique/c/3814?transaction_id=4f155750ba7b2edde1dae2503f6bf1db&intcmp=nykaa:sp:skin-native-desktop:skin:luxe-indulgences-v2-slider:SLIDING_WIDGET_V2:13:Clinique:3814:4f155750ba7b2edde1dae2503f6bf1db
Scraping Page 2: https://www.nykaa.com/luxe/brands/clinique/c/3814?page_no=2&sort=popularity&transaction_id=4f155750ba7b2edde1dae2503f6bf1db&intcmp=nykaa:sp:skin-native-desktop:skin:luxe-indulgences-v2-slider:SLIDING_WIDGET_V2:13:Clinique:3814:4f155750ba7b2edde1dae2503f6bf1db&eq=desktop
Scraping Page 3: https://www.nykaa.com/luxe/brands/clinique/c/3814?page_no=3&sort=popularity&transaction_id=4f155750ba7b2edde1dae2503f6bf1db&intcmp=nykaa:sp:skin-native-desktop:skin:luxe-indulgences-v2-slider:SLIDING_WIDGET_V2:13:Clinique:3814:4f155750ba7b2edde1dae2503f6bf1db&eq=desktop
Scraping Page 4: https://www.nykaa.com/luxe/brands/clinique/c/3814?page_no=4&sort=popularity&transaction_id=4f155750ba7b2edde1dae2503f6bf1db&intcmp=nykaa:sp:skin-native-desk

In [3]:
print(f"Products: \n {products}")

Products: 
 [{'Product Brand': 'Clinique', 'Product Name': 'Clinique Moisture Surge 100h Auto-replenishing Hydrator (Moisturizer)(30ml)', 'Product Rating': '4.5/5', 'Product Rating & Review Count': '26783ratings&4719reviews', 'Product Original Price': '₹2250', 'Product Offer Price': 'N/A', 'Product Discount': 'No Discount', 'Product Highlight': 'BESTSELLER', 'Product Stock Status': 'In Stock'}, {'Product Brand': 'Clinique', 'Product Name': 'Clinique Take The Day Off Cleansing Balm (Makeup Remover)(125ml)', 'Product Rating': '4.6/5', 'Product Rating & Review Count': '3452ratings&617reviews', 'Product Original Price': '₹3800', 'Product Offer Price': 'N/A', 'Product Discount': 'No Discount', 'Product Highlight': 'BESTSELLER', 'Product Stock Status': 'In Stock'}, {'Product Brand': 'Clinique', 'Product Name': 'Clinique Almost Lipstick In Black Honey (Lip Balm)(1.9g)', 'Product Rating': '4.3/5', 'Product Rating & Review Count': '2068ratings&367reviews', 'Product Original Price': '₹2300', 'Pr

In [4]:
clinique  = pd.DataFrame(products)
clinique

Unnamed: 0,Product Brand,Product Name,Product Rating,Product Rating & Review Count,Product Original Price,Product Offer Price,Product Discount,Product Highlight,Product Stock Status
0,Clinique,Clinique Moisture Surge 100h Auto-replenishing...,4.5/5,26783ratings&4719reviews,₹2250,,No Discount,BESTSELLER,In Stock
1,Clinique,Clinique Take The Day Off Cleansing Balm (Make...,4.6/5,3452ratings&617reviews,₹3800,,No Discount,BESTSELLER,In Stock
2,Clinique,Clinique Almost Lipstick In Black Honey (Lip B...,4.3/5,2068ratings&367reviews,₹2300,,No Discount,BESTSELLER,In Stock
3,Clinique,Clinique Pop Longwear Lipstick - Mocha(3.9g),5/5,2ratings&1reviews,₹2400,,No Discount,,In Stock
4,Clinique,Clinique Protect Your Glow Bundle - Sunscreen ...,4.6/5,2693ratings&337reviews,₹7100,₹5680,20% Off,,In Stock
...,...,...,...,...,...,...,...,...,...
122,Clinique,Clinique Smart Clinical MD Multi-Dimensional A...,2.8/5,9ratings,₹7750,,No Discount,,Out Of Stock
123,Clinique,Clinique Anti-Blemish Clinical Clearing Gel Wi...,4.1/5,140ratings&46reviews,₹2450,,No Discount,,Out Of Stock
124,Clinique,Clinique High Impact Kajal - Black(0.14gm),4.2/5,55ratings&5reviews,₹1050,,No Discount,,Out Of Stock
125,Clinique,Clinique Smart SPF 15 Custom-Repair Moisturize...,4.3/5,39ratings&3reviews,₹6900,,No Discount,,Out Of Stock


In [5]:
# clinique.to_csv("C:/Users/Logiya Vidhyapathy/Documents/KGISL Data Science/Project Files and Documents/Capstone Project/Clinique Products_Nykaa.csv", index = False)