In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


In [3]:
def scrape_product_info(url):
    """Scrapes product information from the given URL."""

    response = requests.get(url)
    response.raise_for_status()  # Raise an exception for bad status codes

    soup = BeautifulSoup(response.content, 'html.parser')

    product_name = soup.find('h1', class_='product-title').span.text.strip()
    product_images = ['https:' + a['href'] for a in soup.find_all('a', class_='fancybox')]
    product_description = soup.find('ul', class_='tabs-content').find_all('p')[1].text.strip()

    return {
        'URL': url,
        'Product_Name': product_name,
        'Product_Images': product_images,
        'Product_Description': product_description
    }

def process_urls(urls, df):
    for url in urls:
        if not df['URL'].str.contains(url).any():
            product_info = scrape_product_info(url)
            df = pd.concat([df, pd.DataFrame([product_info])], ignore_index=True)
    return df

In [4]:
# Example usage:
urls = [
    'https://www.pazzion.com/collections/best-seller/products/1108-2a-karina-studded-bow-ballet-flats?variant=44575817367795',
    'https://www.pazzion.com/collections/best-seller/products/6831-2-della-slip-on-slide-sandals?variant=44932972347635',
    'https://www.pazzion.com/collections/best-seller/products/6188-15-taryn-leather-espadrilles?variant=45258557325555',
    'https://www.pazzion.com/collections/best-seller/products/1389-3-nayeli-leather-sneakers?variant=44133419122931',
    'https://www.pazzion.com/collections/best-seller/products/128-55a-nova-bow-covered-flats?variant=44079692415219',
    'https://www.pazzion.com/collections/best-seller/products/copy-of-aurora-glitter-weaved-flats?variant=43757807436019'
]


In [5]:
df = pd.DataFrame(columns=['URL', 'Product_Name', 'Product_Images', 'Product_Description'])
df = process_urls(urls, df)
df

Unnamed: 0,URL,Product_Name,Product_Images,Product_Description
0,https://www.pazzion.com/collections/best-selle...,Karina Studded Bow Ballet Flats,[https://www.pazzion.com/cdn/shop/files/Karina...,Elegantly embellished with chic studded detail...
1,https://www.pazzion.com/collections/best-selle...,Della Slip On Slide Sandals,[https://www.pazzion.com/cdn/shop/files/DellaS...,For the fashion-forward woman seeking both sty...
2,https://www.pazzion.com/collections/best-selle...,Taryn Leather Espadrilles,[https://www.pazzion.com/cdn/shop/files/TarynL...,Taryn - redefining elegance with their refined...
3,https://www.pazzion.com/collections/best-selle...,Nayeli Leather Sneakers,[https://www.pazzion.com/cdn/shop/files/Nayeli...,Nayeli Leather Sneakers is the epitome of dura...
4,https://www.pazzion.com/collections/best-selle...,Nova Bow Covered Flats,[https://www.pazzion.com/cdn/shop/files/NovaBo...,"Nova exemplifies the beauty of simplicity, whe..."
5,https://www.pazzion.com/collections/best-selle...,Aurora Glitter Weaved Flats,[https://www.pazzion.com/cdn/shop/files/Aurora...,Nothing screams bridal beauty like a pair of g...


In [15]:
df.to_csv('examples/examples_original.csv', index=False)


In [7]:
df.to_csv('demo/batch_1.csv', index=False)
