# Scraping using Web Scraper

In [None]:
from selenium.webdriver.common.by import By
from selenium import webdriver
import pandas as pd
import time

# Initialize ChromeDriver
wd = webdriver.Chrome()

# Define a function to scrape product details from a given URL
def scrape_product_details(url):
    wd.get(url)
    
    # Brand Name
    brand_name_element = wd.find_element(By.XPATH, '//h1[@class="product-header-name"]/span[@class="perfume-name serif product-name-short"]')
    brand_name = brand_name_element.text
    
    # Number of Reviews
    reviews_element = wd.find_element(By.XPATH, '//span[@class="h3 total-reviews"]')
    num_reviews = reviews_element.text
    
    # Description
    description_element = wd.find_element(By.XPATH, '//div[@class="faq-description"]')
    description = description_element.text
    
    # Prices (as a list)
    price_elements = wd.find_elements(By.XPATH, '//span[@class="price-value"]/bdo/span[@class="base-price-val"]')
    prices = [price_element.text for price_element in price_elements]
    
    # Reviews and Ratings (up to 10)
    reviews = []
    ratings = []
    review_elements = wd.find_elements(By.XPATH, '//div[@class="c3-6-of-12 review-text"]')
    rating_elements = wd.find_elements(By.XPATH, '//span[@itemprop="ratingValue" and @class="rating"]')
    
    for i, review_element in enumerate(review_elements[:10]):
        review_text = review_element.text
        rating_text = rating_elements[i].text if i < len(rating_elements) else "N/A"
        
        reviews.append(review_text)
        ratings.append(rating_text)
    
    product_details = {
        "brand_name": brand_name,
        "num_reviews": num_reviews,
        "description": description,
        "prices": prices,
        "reviews": reviews,
        "ratings": ratings,
    }
    
    return product_details

# Initialize an empty list to store product details
product_details_list = []

# Iterate through the list of links and scrape product details for the first 5 URLs
for product_url in href_list[5:15]:
    try:
        product_details = scrape_product_details(product_url)
        
        # Add a new row for each review
        for review, rating in zip(product_details["reviews"], product_details["ratings"]):
            new_row = product_details.copy()
            new_row["reviews"] = review
            new_row["ratings"] = rating
            product_details_list.append(new_row)
    except Exception as e:
        print(f"Error scraping product details for URL: {product_url}")
        print(e)
    
    # Sleep for a few seconds to avoid overloading the website
    time.sleep(30)

# Convert the list of dictionaries to a Pandas DataFrame
df = pd.DataFrame(product_details_list)

# Now, 'df' contains the scraped product information with a new column for ratings
print(f"Total product details scraped: {len(df)}")

# Don't forget to close the webdriver when done
wd.quit()

# Additional manual scraping to acquire scents description.

In [None]:
from selenium.webdriver.common.by import By
from selenium import webdriver
import pandas as pd
import time

# Initialize ChromeDriver
wd = webdriver.Chrome()

# Define a function to scrape product details from a given URL
def scrape_product_details(url):
    wd.get(url)
    
    # Brand Name
    brand_name_element = wd.find_element(By.XPATH, '//h1[@class="product-header-name"]/span[@class="perfume-name serif product-name-short"]')
    brand_name = brand_name_element.text
    
    # Number of Reviews
    reviews_element = wd.find_element(By.XPATH, '//span[@class="h3 total-reviews"]')
    num_reviews = reviews_element.text
    
    # Description
    description_element = wd.find_element(By.XPATH, '//div[@class="faq-description"]')
    description = description_element.text
    
    # Prices (as a list)
    price_elements = wd.find_elements(By.XPATH, '//span[@class="price-value"]/bdo/span[@class="base-price-val"]')
    prices = [price_element.text for price_element in price_elements]
    
    # Reviews and Ratings (up to 10)
    reviews = []
    ratings = []
    review_elements = wd.find_elements(By.XPATH, '//div[@class="c3-6-of-12 review-text"]')
    rating_elements = wd.find_elements(By.XPATH, '//span[@itemprop="ratingValue" and @class="rating"]')
    
    for i, review_element in enumerate(review_elements[:10]):
        review_text = review_element.text
        rating_text = rating_elements[i].text if i < len(rating_elements) else "N/A"
        
        reviews.append(review_text)
        ratings.append(rating_text)
    
    product_details = {
        "brand_name": brand_name,
        "num_reviews": num_reviews,
        "description": description,
        "prices": prices,
        "reviews": reviews,
        "ratings": ratings,
    }
    
    return product_details

# Initialize an empty list to store product details
product_details_list = []

# Iterate through the list of links and scrape product details for the first 5 URLs
for product_url in href_list[5:15]:
    try:
        product_details = scrape_product_details(product_url)
        
        # Add a new row for each review
        for review, rating in zip(product_details["reviews"], product_details["ratings"]):
            new_row = product_details.copy()
            new_row["reviews"] = review
            new_row["ratings"] = rating
            product_details_list.append(new_row)
    except Exception as e:
        print(f"Error scraping product details for URL: {product_url}")
        print(e)
    
    # Sleep for a few seconds to avoid overloading the website
    time.sleep(30)

# Convert the list of dictionaries to a Pandas DataFrame
df = pd.DataFrame(product_details_list)

# Now, 'df' contains the scraped product information with a new column for ratings
print(f"Total product details scraped: {len(df)}")

# Don't forget to close the webdriver when done
wd.quit()

# Additional manual scraping to acquire scents description.

In [26]:
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd

# Initialize ChromeDriver with the appropriate version
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.implicitly_wait(30)

# Your Selenium code here
# Set the URL
base_url = 'https://www.fragrancex.com/shopping/type/perfume?currentPage='

link_list_popular = []
total_links = 0

# Define the number of pages you want to scrape
num_pages = 25

for page_num in range(1, num_pages + 1):
    url = base_url + str(page_num)
    driver.get(url)

    elements = driver.find_elements(By.CLASS_NAME, "animate")

    for element in elements:
        # Use get_attribute to extract the "href" attribute
        link = element.get_attribute("href")
        if link:
            link_list_popular.append(link)

    # If you reach the desired number of links, break the loop
    if total_links >= 1501:
        break

# Print the total number of links collected
print(f'Total links collected: {len(link_list_popular)}')

  driver = webdriver.Chrome(ChromeDriverManager().install())


Total links collected: 1500


In [30]:
link_list_popular[-1]

'https://www.fragrancex.com/products/montale/montale-aoud-purple-rose-perfume'

In [47]:
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import time
from tqdm import tqdm

# Initialize Selenium webdriver
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.implicitly_wait(10)

# Initialize a DataFrame to store the results
data = pd.DataFrame(columns=["product name", "Fragrance Family", "Fragrance Classification", 
                             "Top Notes", "Heart Notes", "Base Notes"])

# # Replace this with your actual URLs
# link_list_popular = ["https://www.example.com"]
    
# Loop through each URL
for url in tqdm(link_list_popular):
    driver.get(url)
    time.sleep(5)  # Ensure enough time for page loading
    
    # Get page source and parse with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    attributes = {}
    try:
        # Ensure the page has the necessary elements before proceeding
        rows = soup.find('tbody').find_all('tr')
        product_name = soup.find('span', class_='perfume-name').get_text()
        
        # Extract attribute data
        for row in rows:
            attribute_name = row.find_all('td')[0].get_text().strip()
            attribute_value = row.find_all('td')[1].get_text().strip()
            attributes[attribute_name] = attribute_value
            
        
        # Append extracted data to DataFrame
        data = data.append({
            "product name": product_name,
            "Fragrance Family": attributes.get('Fragrance Family', ''),
            "Fragrance Classification": attributes.get('Fragrance Classification', ''),
            "Top Notes": attributes.get('Top Notes', ''),
            "Heart Notes": attributes.get('Heart Notes', ''),
            "Base Notes": attributes.get('Base Notes', '')
        }, ignore_index=True)
    except AttributeError as e:
        print(f"Failed to extract information from {url}: {str(e)}")

# Close the webdriver
driver.quit()

data.to_csv('desc_final.csv', index=False)

  driver = webdriver.Chrome(ChromeDriverManager().install())
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({


  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
  data = data.append({
100%|█████████████████████████████████████| 1500/1500 [2:21:42<00:00,  5.67s/it]


In [157]:
data.to_csv('desc_final.csv', index = False)

desc = pd.read_csv('desc_final.csv')
desc

Unnamed: 0,product name,Fragrance Family,Fragrance Classification,Top Notes,Heart Notes,Base Notes
0,Light Blue Perfume,Citrus,Eau De Toilette (EDT),"Sicilian Lemon, Apple, Cedar, Bellflower","Bamboo, Jasmine, White Rose","Cedar, Musk, Amber"
1,Eternity Perfume,Floral,Eau De Parfum (EDP),"Green Notes, Freesia, Sage, Citruses, Mandarin...","Carnation, Lily, Lily of the valley, Narcissus...","Heliotrope, Musk, Sandalwood, Amber, Patchouli"
2,Bright Crystal by Versace,"Floral, Fruity and Musk",Eau De Toilette (EDT),"Yuzu, Pomegranate, Water Notes","Peony, Lotus, Magnolia","Musk, Mahogany, Amber"
3,Obsession Perfume,Bergamot,"Gift Set, Pure Perfume, Perfume, Body Lotion, ...","Bergamot, Mandarin, Green notes, Vanillin","Jasmine, Orange blossom, Sandalwood, Vetiver, ...","Oakmoss, Amber, Musk, Incense"
4,Elizabeth Arden Red Door Perfume for Women,fall,"Shower Gel, Gift Set, Body Cream, Perfume, Deo...","Rose (JFK), Ylang-ylang, Fruity accord","Carnation, Freesia, Jasmine, Lily, Lily of the...","Sandalwood, Vetiver, Oakmoss, Musk, Honey"
...,...,...,...,...,...,...
1495,Naturally Fierce Perfume,Vetiver,Perfume,,,
1496,Musc Invisible Perfume,,Perfume,,,
1497,Morning Muscs Perfume,Citrus,Perfume,,,
1498,Montale Day Dreams Perfume,fresh,"Perfume, Candles",,,


In [158]:
desc['all_scent'] = desc['Fragrance Family'].fillna('') + ', ' + \
                    desc['Top Notes'].fillna('') + ', ' + \
                    desc['Heart Notes'].fillna('') + ', ' + \
                    desc['Base Notes'].fillna('')

In [164]:
concat = desc[['product name', 'all_scent', 'Fragrance Classification']]
concat.columns = ['product_name', 'all_scent', 'Fragrance_Classification']

In [167]:
df_new = pd.read_csv('df_new.csv', index_col=0).reset_index(drop=True)
df_new

df_new = df_new.iloc[::-1].reset_index(drop=True)

In [168]:
merged_df = pd.merge(df_new, concat, on='product_name')
merged_df

Unnamed: 0,brand_name,product_name,price,total_num_reviews,overall_review,reviews,brand_product,all_scent,Fragrance_Classification
0,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,I would buy this fragrance again\nI use this f...,Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
1,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,"I would love this product\nI love it, is an ev...",Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
2,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,I would buy this product again and again\nVery...,Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
3,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,I would buy this product again and again.\nI l...,Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
4,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,Love the fragrance\nLove the fragrance! Will b...,Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
...,...,...,...,...,...,...,...,...,...
33012,Juliette Has A Gun,Not A Perfume Superdose Perfume,118.99,6.0,4.2,Would buy again\nLuv this scent,Juliette Has A Gun Not A Perfume Superdose Per...,", , ,",Perfume
33013,Juliette Has A Gun,Not A Perfume Superdose Perfume,118.99,6.0,4.2,Got compliments with it!\nI got compliments an...,Juliette Has A Gun Not A Perfume Superdose Per...,", , ,",Perfume
33014,Juliette Has A Gun,Not A Perfume Superdose Perfume,118.99,6.0,4.2,Terrible\nI love the regular Not A Perfume but...,Juliette Has A Gun Not A Perfume Superdose Per...,", , ,",Perfume
33015,Juliette Has A Gun,Not A Perfume Superdose Perfume,118.99,6.0,4.2,Show Stopper\nEveryone wants to know the name ...,Juliette Has A Gun Not A Perfume Superdose Per...,", , ,",Perfume


In [153]:
merged_df[merged_df['all_scent'] != ', , , ']['product_name'].nunique()

900

In [154]:
merged_df[merged_df['all_scent'] != ', , , ']['brand_name'].nunique()

238

In [151]:
merged_df[merged_df['all_scent'] != ', , , ']

Unnamed: 0,brand_name,product_name,price,total_num_reviews,overall_review,reviews,brand_product,all_scent,Fragrance Classification
0,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,I would buy this fragrance again\nI use this f...,Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
1,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,"I would love this product\nI love it, is an ev...",Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
2,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,I would buy this product again and again\nVery...,Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
3,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,I would buy this product again and again.\nI l...,Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
4,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,Love the fragrance\nLove the fragrance! Will b...,Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
...,...,...,...,...,...,...,...,...,...
32985,Reminiscence,Patchouli Blanc Perfume,59.49,18.0,4.3,"Love the scent\nThis smells amazing! sweet, fl...",Reminiscence Patchouli Blanc Perfume,"Patchouli, , ,","Perfume, Sample"
32986,Reminiscence,Patchouli Blanc Perfume,59.49,18.0,4.3,"No\nThe patch is very cheap here, doesnt worth...",Reminiscence Patchouli Blanc Perfume,"Patchouli, , ,","Perfume, Sample"
32987,Reminiscence,Patchouli Blanc Perfume,59.49,18.0,4.3,I love patchouli blanc smells wonderful ❤\nUse...,Reminiscence Patchouli Blanc Perfume,"Patchouli, , ,","Perfume, Sample"
32988,Reminiscence,Patchouli Blanc Perfume,59.49,18.0,4.3,Patchouli Blanc\nPatchouli Blanc is a lovely p...,Reminiscence Patchouli Blanc Perfume,"Patchouli, , ,","Perfume, Sample"


In [173]:
merged_df = merged_df[merged_df['all_scent'] != ', , , ']
merged_df

Unnamed: 0,brand_name,product_name,price,total_num_reviews,overall_review,reviews,brand_product,all_scent,Fragrance_Classification
0,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,I would buy this fragrance again\nI use this f...,Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
1,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,"I would love this product\nI love it, is an ev...",Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
2,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,I would buy this product again and again\nVery...,Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
3,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,I would buy this product again and again.\nI l...,Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
4,Dolce & Gabbana,Light Blue Perfume,34.60,1851.0,4.7,Love the fragrance\nLove the fragrance! Will b...,Dolce & Gabbana Light Blue Perfume,"Citrus, Sicilian Lemon, Apple, Cedar, Bellflow...",Eau De Toilette (EDT)
...,...,...,...,...,...,...,...,...,...
32985,Reminiscence,Patchouli Blanc Perfume,59.49,18.0,4.3,"Love the scent\nThis smells amazing! sweet, fl...",Reminiscence Patchouli Blanc Perfume,"Patchouli, , ,","Perfume, Sample"
32986,Reminiscence,Patchouli Blanc Perfume,59.49,18.0,4.3,"No\nThe patch is very cheap here, doesnt worth...",Reminiscence Patchouli Blanc Perfume,"Patchouli, , ,","Perfume, Sample"
32987,Reminiscence,Patchouli Blanc Perfume,59.49,18.0,4.3,I love patchouli blanc smells wonderful ❤\nUse...,Reminiscence Patchouli Blanc Perfume,"Patchouli, , ,","Perfume, Sample"
32988,Reminiscence,Patchouli Blanc Perfume,59.49,18.0,4.3,Patchouli Blanc\nPatchouli Blanc is a lovely p...,Reminiscence Patchouli Blanc Perfume,"Patchouli, , ,","Perfume, Sample"


In [172]:
merged_df.to_csv('merged_df.csv')

In [53]:
desc = pd.read_csv('desc_final.csv')
desc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 6 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   product name              1500 non-null   object
 1   Fragrance Family          1050 non-null   object
 2   Fragrance Classification  1474 non-null   object
 3   Top Notes                 496 non-null    object
 4   Heart Notes               492 non-null    object
 5   Base Notes                491 non-null    object
dtypes: object(6)
memory usage: 70.4+ KB
