In [13]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os

In [14]:
def get_data_from_catalog(catalog_url):
    response = requests.get(catalog_url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Initialize list to store product data
    products_data = []

    # Example: Find categories and iterate through them
    categories = soup.find_all('div', class_='category')
    
    for category in categories:
        category_name = category.find('h2').text.strip()
        
        # Example: Get subcategories within each category
        subcategories = category.find_all('div', class_='subcategory')
        
        for subcategory in subcategories:
            subcategory_name = subcategory.find('h3').text.strip()
            
            # Example: Get sub-subcategories within each subcategory
            sub_subcategories = subcategory.find_all('div', class_='sub-subcategory')
            
            for sub_subcategory in sub_subcategories:
                sub_subcategory_name = sub_subcategory.find('h4').text.strip()
                
                # Example: Find products within each sub-subcategory
                products = sub_subcategory.find_all('div', class_='product')
                
                for product in products:
                    product_name = product.find('span', class_='product-name').text.strip()
                    reference = product.find('span', class_='reference').text.strip() if product.find('span', class_='reference') else ''
                    other_references = product.find('span', class_='other-references').text.strip() if product.find('span', class_='other-references') else ''
                    brand = product.find('span', class_='brand').text.strip() if product.find('span', class_='brand') else ''
                    model = product.find('span', class_='model').text.strip() if product.find('span', class_='model') else ''
                    price = product.find('span', class_='price').text.strip() if product.find('span', class_='price') else ''
                    availability = product.find('span', class_='availability').text.strip() if product.find('span', class_='availability') else ''
                    
                    # Append product data to list
                    products_data.append({
                        'Catalog': catalog_url,
                        'Category': category_name,
                        'Subcategory': subcategory_name,
                        'Sub-Subcategory': sub_subcategory_name,
                        'Atribut Brand': brand,
                        'Atribut Model': model,
                        'Product Name': product_name,
                        'Reference': reference,
                        'Other References': other_references,
                        'Suitable for brand': brand,
                        'Suitable for model': model,
                        'Price': price,
                        'Availability': availability
                    })
    return products_data


In [15]:
def main():
    base_url = 'https://landwirtschaft.rw.net/'
    response = requests.get(base_url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Initialize an empty list to hold all product data
    all_products = []
    
    # Example: Find all catalog URLs and iterate through them
    catalogs = soup.find_all('div', class_='catalog')
    for catalog in catalogs:
        catalog_url = catalog.find('a')['href']
        # Get data from each catalog
        catalog_data = get_data_from_catalog(catalog_url)
        all_products.extend(catalog_data)
    
    # Convert the list of products data into a DataFrame and save to CSV
    df = pd.DataFrame(all_products)
    df.to_csv('products_data.csv', index=False)

if __name__ == "__main__":
    main()
    # Save to Downloads folder
    downloads_path = os.path.join(os.path.expanduser("~"), "Downloads", "products_data.csv")
    df.to_csv(downloads_path, index=False)
    print(f"CSV file has been created and saved to {downloads_path}")

CSV file has been created and saved to C:\Users\user\Downloads\products_data.csv
