In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Définition de l'User-Agent et des en-têtes
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
    'Accept-Language': 'fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7',
    'Accept-Encoding': 'gzip, deflate, br',
    'Referer': 'https://www.alibaba.com/',
    'Connection': 'keep-alive'
}

# Liste des URLs des sites à scraper
urls = [
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=laptop',
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartphone',
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartwatch',
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=airpods'
]

# Stockage des résultats
all_data = []

# Fonction pour scraper une URL donnée
def scrape_url(url):
    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')

        # Détection du site cible
        if "alibaba.com" in url:
            site = "Alibaba"
            items = soup.select('.div.organic-list.app-organic-search-mb-20.viewtype-gallery')

            for item in items:
                # Extraction des données
                title = item.select_one('.div.search-card-info__wrapper > div.card-info.gallery-card-layout-info > h2 > a > span')
                price = item.select_one('.div.search-card-info__wrapper > div.card-info.gallery-card-layout-info > a:nth-child(3) > div > div')
                rating = item.select_one('.div.search-card-info__wrapper > div.card-info.gallery-card-layout-info > div > div:nth-child(2) > div:nth-child(2) > a > span > strong')
                reviews = item.select_one('.div.search-card-info__wrapper > div.card-info.gallery-card-layout-info > div > div:nth-child(2) > div:nth-child(2) > a > span > span')
                stock = item.select_one('.div.search-card-info__wrapper > div.card-info.gallery-card-layout-info > a.search-card-e-detail-wrapper.gallery-card-info__sales > div.search-card-e-popper__trigger > div')
                discount = item.select_one('.div.search-card-info__wrapper > div.card-info.gallery-card-layout-info > a:nth-child(3) > div > div.search-card-e-price__list.margin-left-4 > span')  # Élément des rabais

                # Nettoyage des données
                title_text = title.text.strip() if title else "N/A"
                price_text = price.text.strip() if price else "N/A"
                rating_text = float(rating.text.strip()) if rating else "N/A"
                reviews_text = reviews.text.strip() if reviews else "0"
                stock_text = stock.text.strip() if stock else "N/A"
                discount_text = discount.text.strip() if discount else "Aucun rabais"  # Gestion des rabais

                # Ajout des données à la liste
                all_data.append({
                    'Site': site,
                    'Titre': title_text,
                    'Prix (USD)': price_text,
                    'Note': rating_text,
                    'Avis': reviews_text,
                    'Stock': stock_text,
                    'Rabais': discount_text  # Ajout du rabais
                })
        else:
            print(f"⚠️ Site non pris en charge : {url}")

    except requests.exceptions.RequestException as e:
        print(f"❌ Erreur lors de la requête {url}: {e}")

    time.sleep(2)  # Pause pour éviter le blocage

# Scraping de toutes les URLs
for url in urls:
    print(f"Scraping {url}...")
    scrape_url(url)

# Conversion des résultats en DataFrame Pandas
df = pd.DataFrame(all_data)

# Affichage du DataFrame
print(df)

# Sauvegarde en CSV
if not df.empty:
    df.to_csv("resultats_scraping_alibaba.csv", index=False, encoding='utf-8-sig')
    print("\n✅ Données sauvegardées dans 'resultats_scraping_alibaba.csv'")
else:
    print("\n⚠️ Aucune donnée collectée.")

Scraping https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=laptop...
Scraping https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartphone...
Scraping https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartwatch...
Scraping https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=airpods...
Empty DataFrame
Columns: []
Index: []

⚠️ Aucune donnée collectée.


In [None]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service as EdgeService
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

loading_time = 30

driver = webdriver.Edge(service=EdgeService(EdgeChromiumDriverManager().install()))


import pandas as pd
import time

# Liste des URLs des sites à scraper
urls = [
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=laptop',
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartphone',
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartwatch',
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=airpods'
]

# Stockage des résultats
all_data = []

# Fonction pour scraper une URL donnée
def scrape_url(url):
    try:
        driver.get(url)
        time.sleep(5)  # Attendre que la page se charge

        # Détection du site cible
        if "alibaba.com" in url:
            site = "Alibaba"
            items = driver.find_elements(By.XPATH, '//*[@id="sse-fluent-offerlist-ssr"]/div[2]')

            for item in items:
                # Extraction des données
                title = item.find_element(By.XPATH, './/div/div/div/div[3]').text
                price = item.find_element(By.XPATH, './/div/div/div/div[4]/div/div/div').text
                rating = item.find_element(By.XPATH, './/div/div/div/div[9]/div[2]/div/div/span/strong').text
                reviews = item.find_element(By.XPATH, './/div/div/div/div[9]/div[2]/div/div/span/span').text
                stock = item.find_element(By.XPATH, './/div[2]/div[1]/a[3]/div[2]/div').text
                discount = item.find_element(By.XPATH, './/div[2]/div[1]/a[2]/div/div[2]/span[2]').text

                # Ajout des données à la liste
                all_data.append({
                    'Site': site,
                    'Titre': title,
                    'Prix (USD)': price,
                    'Note': rating,
                    'Avis': reviews,
                    'Stock': stock,
                    'Rabais': discount
                })
        else:
            print(f"⚠️ Site non pris en charge : {url}")

    except Exception as e:
        print(f"❌ Erreur lors du traitement des données pour {url}: {e}")

    time.sleep(2)  # Pause pour éviter le blocage

# Scraping de toutes les URLs
for url in urls:
    print(f"Scraping {url}...")
    scrape_url(url)

# Fermeture du navigateur
driver.quit()

# Conversion des résultats en DataFrame Pandas
df = pd.DataFrame(all_data)

# Affichage du DataFrame
print(df)

# Sauvegarde en CSV
if not df.empty:
    df.to_csv("resultats_scraping_alibaba.csv", index=False, encoding='utf-8-sig')
    print("\n✅ Données sauvegardées dans 'resultats_scraping_alibaba.csv'")
else:
    print("\n⚠️ Aucune donnée collectée.")

Scraping https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=laptop...
❌ Erreur lors du traitement des données pour https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=laptop: Message: no such element: Unable to locate element: {"method":"xpath","selector":".//div/div/div/div[9]/div[2]/div/div/span/strong"}
  (Session info: MicrosoftEdge=133.0.3065.59); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF69467E6D5+13397]
	Microsoft::Applications::Events::EventProperty::empty [0x00007FF69490B174+2060308]
	Microsoft::Applications::Events::EventProperty::empty [0x00007FF6948622D8+1368440]
	(No symbol) [0x00007FF694453219]
	(No symbol) [0x00007FF69445340B]
	(No symbol) [0x00007FF69444960C]
	(No symbol) [0x00007FF69447420F]
	(No symbol) [0x00007FF6944495BD]
	(N

In [None]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service as EdgeService
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Initialize the WebDriver
driver = webdriver.Edge(service=EdgeService(EdgeChromiumDriverManager().install()))

# List of URLs to scrape
urls = [
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=laptop',
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartphone',
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartwatch',
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=airpods'
]

# Storage for results
all_data = []

# Function to scrape a given URL
def scrape_url(url):
    try:
        driver.get(url)
        WebDriverWait(driver, 10).until()
        EC.presence_of_element_located((By.XPATH, '//*[@id="sse-fluent-offerlist-ssr"]/div[2]'))
        time.sleep(5)  # Wait for the page to load

        # Detect the target site
        if "alibaba.com" in url:
            site = "Alibaba"
            items = driver.find_elements(By.XPATH, '//*[@id="sse-fluent-offerlist-ssr"]/div[2]')

            for item in items:
                try:
                    # Extract data
                    title = item.find_element(By.XPATH, './/div/div/div/div[3]').text
                    price = item.find_element(By.XPATH, './/div/div/div/div[4]/div/div/div').text
                    rating = item.find_element(By.XPATH, './/div/div/div/div[9]/div[2]/div/div/span/strong').text
                    reviews = item.find_element(By.XPATH, './/div/div/div/div[9]/div[2]/div/div/span/span').text
                    stock = item.find_element(By.XPATH, './/div[2]/div[1]/a[3]/div[2]/div').text
                    discount = item.find_element(By.XPATH, './/div[2]/div[1]/a[2]/div/div[2]/span[2]').text

                    # Add data to the list
                    all_data.append({
                        'Site': site,
                        'Titre': title,
                        'Prix (USD)': price,
                        'Note': rating,
                        'Avis': reviews,
                        'Stock': stock,
                        'Rabais': discount
                    })
                except Exception as e:
                    print(f"❌ Error extracting data from an item: {e}")
        else:
            print(f"⚠️ Site not supported: {url}")

    except Exception as e:
        print(f"❌ Error processing data for {url}: {e}")

    time.sleep(2)  # Pause to avoid blocking

# Scrape all URLs
for url in urls:
    print(f"Scraping {url}...")
    scrape_url(url)

# Close the browser
driver.quit()

# Convert results to a Pandas DataFrame
df = pd.DataFrame(all_data)

# Display the DataFrame
print(df)

# Save to CSV
if not df.empty:
    df.to_csv("resultats_scraping_alibaba.csv", index=False, encoding='utf-8-sig')
    print("\n✅ Data saved to 'resultats_scraping_alibaba.csv'")
else:
    print("\n⚠️ No data collected.")

Scraping https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=laptop...
❌ Error processing data for https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=laptop: WebDriverWait.until() missing 1 required positional argument: 'method'
Scraping https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartphone...
❌ Error processing data for https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartphone: WebDriverWait.until() missing 1 required positional argument: 'method'
Scraping https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartwatch...
❌ Error processing data for https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartwatch: WebDriverWait.until() missing 1 required positional argument: 'me

In [None]:
from selenium import webdriver
from selenium.webdriver.edge.service import Service as EdgeService
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

# Initialize the WebDriver
driver = webdriver.Edge(service=EdgeService(EdgeChromiumDriverManager().install()))

# List of URLs to scrape
urls = [
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=laptop',
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartphone',
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=smartwatch',
    'https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=airpods'
]

# Storage for results
all_data = []

# Function to scrape a given URL
def scrape_url(url):
    try:
        driver.get(url)
        # Wait for the product list container to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, '//*[@id="sse-fluent-offerlist-ssr"]/div[2]'))
        )
        time.sleep(5)  # Additional wait for dynamic content

        # Detect the target site
        if "alibaba.com" in url:
            site = "Alibaba"
            items = driver.find_elements(By.XPATH, '//*[@id="sse-fluent-offerlist-ssr"]/div[2]/div')

            for item in items:
                try:
                    # Extract data
                    title = item.find_element(By.XPATH, './/div/div/div/div[3]').text
                    price = item.find_element(By.XPATH, './/div/div/div/div[4]/div/div/div').text
                    rating = item.find_element(By.XPATH, './/div[2]/div[1]/div/div[2]/div[2]/a/span/strong').text
                    reviews = item.find_element(By.XPATH, './/div/div/div/div[9]/div[2]/div/div/span/span').text
                    stock = item.find_element(By.XPATH, './/div[2]/div[1]/a[3]/div[2]/div').text
                    discount = item.find_element(By.XPATH, './/div[2]/div[1]/a[2]/div/div[2]/span[2]').text

                    # Add data to the list
                    all_data.append({
                        'Site': site,
                        'Titre': title,
                        'Prix (USD)': price,
                        'Note': rating,
                        'Avis': reviews,
                        'Stock': stock,
                        'Rabais': discount
                    })
                except Exception as e:
                    print(f"❌ Error extracting data from an item: {e}")
        else:
            print(f"⚠️ Site not supported: {url}")

    except Exception as e:
        print(f"❌ Error processing data for {url}: {e}")

    time.sleep(2)  # Pause to avoid blocking

# Scrape all URLs
for url in urls:
    print(f"Scraping {url}...")
    scrape_url(url)

# Close the browser
driver.quit()

# Convert results to a Pandas DataFrame
df = pd.DataFrame(all_data)

# Display the DataFrame
print(df)

# Save to CSV
if not df.empty:
    df.to_csv("resultats_scraping_alibaba.csv", index=False, encoding='utf-8-sig')
    print("\n✅ Data saved to 'resultats_scraping_alibaba.csv'")
else:
    print("\n⚠️ No data collected.")

Scraping https://www.alibaba.com/trade/search?spm=a2700.galleryofferlist.0.0.1234567890ABCD&tab=all&searchText=laptop...
❌ Error extracting data from an item: Message: no such element: Unable to locate element: {"method":"xpath","selector":".//div[2]/div[1]/div/div[2]/div[2]/a/span/strong"}
  (Session info: MicrosoftEdge=133.0.3065.59); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF69467E6D5+13397]
	Microsoft::Applications::Events::EventProperty::empty [0x00007FF69490B174+2060308]
	Microsoft::Applications::Events::EventProperty::empty [0x00007FF6948622D8+1368440]
	(No symbol) [0x00007FF694453219]
	(No symbol) [0x00007FF69445340B]
	(No symbol) [0x00007FF69444960C]
	(No symbol) [0x00007FF69447420F]
	(No symbol) [0x00007FF6944495BD]
	(No symbol) [0x00007FF69444947D]
	(No symbol) [0x00007FF694474490]
	(No symbol) [0x00007FF6944495BD]
	(No symbol) [0x00