In [1]:
import os
import time
import requests
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

# === CONFIGURATION ===
BASE_URL = "https://www.mql5.com"
START_URL = "https://www.mql5.com/en/code"
DOWNLOAD_FOLDER = "mql5_experts"
PAGES_TO_SCRAPE = 50
DELAY = 2  # seconds

# === SETUP CHROMEDRIVER ===
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(service=Service(), options=chrome_options)

# === FONCTION POUR TÉLÉCHARGER UN FICHIER ===
def download_file(url, folder_path):
    filename = os.path.basename(urlparse(url).path)
    filepath = os.path.join(folder_path, filename)
    try:
        r = requests.get(url, stream=True)
        if r.status_code == 200:
            with open(filepath, 'wb') as f:
                for chunk in r.iter_content(1024):
                    f.write(chunk)
            print(f"[+] Fichier téléchargé : {filename}")
    except Exception as e:
        print(f"[!] Erreur lors du téléchargement : {url} -> {e}")

# === SCRAPER PRINCIPAL ===
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)

for page_num in range(PAGES_TO_SCRAPE):
    print(f"==> Scraping page {page_num+1}")
    page_url = f"{START_URL}/!{page_num}" if page_num > 0 else START_URL
    driver.get(page_url)
    time.sleep(DELAY)

    soup = BeautifulSoup(driver.page_source, "html.parser")
    items = soup.select(".title > a")

    for link in items:
        href = link.get("href")
        expert_url = urljoin(BASE_URL, href)
        driver.get(expert_url)
        time.sleep(DELAY)
        expert_soup = BeautifulSoup(driver.page_source, "html.parser")

        # Récupérer le nom de l'expert
        title_tag = expert_soup.select_one("h1")
        if not title_tag:
            continue
        expert_name = title_tag.text.strip().replace("/", "_").replace("\\", "_")
        expert_folder = os.path.join(DOWNLOAD_FOLDER, expert_name)
        os.makedirs(expert_folder, exist_ok=True)
        print(f"[*] Traitement de : {expert_name}")

        # Télécharger le fichier .mq5 ou .mq4
        code_links = expert_soup.select("a")
        for a in code_links:
            href = a.get("href", "")
            if href.endswith(".mq5") or href.endswith(".mq4"):
                full_url = urljoin(BASE_URL, href)
                download_file(full_url, expert_folder)

        # Télécharger les images
        images = expert_soup.select("img")
        for img in images:
            src = img.get("src")
            if src:
                img_url = urljoin(BASE_URL, src)
                download_file(img_url, expert_folder)

driver.quit()


==> Scraping page 1
==> Scraping page 2
==> Scraping page 3
==> Scraping page 4
==> Scraping page 5
==> Scraping page 6
==> Scraping page 7
==> Scraping page 8
==> Scraping page 9
==> Scraping page 10
==> Scraping page 11
==> Scraping page 12
==> Scraping page 13
==> Scraping page 14
==> Scraping page 15
==> Scraping page 16
==> Scraping page 17
==> Scraping page 18
==> Scraping page 19
==> Scraping page 20
==> Scraping page 21
==> Scraping page 22
==> Scraping page 23
==> Scraping page 24
==> Scraping page 25
==> Scraping page 26
==> Scraping page 27
==> Scraping page 28
==> Scraping page 29
==> Scraping page 30
==> Scraping page 31
==> Scraping page 32
==> Scraping page 33
==> Scraping page 34
==> Scraping page 35
==> Scraping page 36
==> Scraping page 37
==> Scraping page 38
==> Scraping page 39
==> Scraping page 40
==> Scraping page 41
==> Scraping page 42
==> Scraping page 43
==> Scraping page 44
==> Scraping page 45
==> Scraping page 46
==> Scraping page 47
==> Scraping page 48
=

In [2]:
import os
import time
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

# === CONFIGURATION ===
BASE_URL = "https://www.mql5.com"
START_URL = "https://www.mql5.com/en/code"
DOWNLOAD_FOLDER = "mql5_experts"
PAGES_TO_SCRAPE = 50
DELAY = 2

# === CONFIGURATION SELENIUM HEADLESS ===
chrome_options = Options()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
driver = webdriver.Chrome(service=Service(), options=chrome_options)

# === CRÉER DOSSIER GLOBAL ===
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)

# === TÉLÉCHARGER UN FICHIER ===
def download_file(url, folder_path):
    try:
        filename = os.path.basename(urlparse(url).path)
        filepath = os.path.join(folder_path, filename)
        r = requests.get(url, stream=True)
        if r.status_code == 200:
            with open(filepath, 'wb') as f:
                for chunk in r.iter_content(1024):
                    f.write(chunk)
            print(f"[+] Téléchargé : {filename}")
        else:
            print(f"[!] Erreur {r.status_code} pour {url}")
    except Exception as e:
        print(f"[!] Exception sur téléchargement : {url} -> {e}")

# === SCRAPER PRINCIPAL ===
for page_num in range(PAGES_TO_SCRAPE):
    page_url = f"{START_URL}/!{page_num}" if page_num > 0 else START_URL
    print(f"==> Scraping page {page_num+1}: {page_url}")
    driver.get(page_url)
    time.sleep(DELAY)

    soup = BeautifulSoup(driver.page_source, "html.parser")
    links = soup.select("div.title a")

    for link in links:
        try:
            href = link.get("href")
            expert_url = urljoin(BASE_URL, href)
            driver.get(expert_url)
            time.sleep(DELAY)

            expert_soup = BeautifulSoup(driver.page_source, "html.parser")

            # === NOM DE L'EXPERT ===
            title_tag = expert_soup.select_one("h1")
            if not title_tag:
                continue
            expert_name = title_tag.text.strip().replace("/", "_").replace("\\", "_")
            expert_folder = os.path.join(DOWNLOAD_FOLDER, expert_name)
            os.makedirs(expert_folder, exist_ok=True)
            print(f"[*] Traitement de : {expert_name}")

            # === LIEN DE TÉLÉCHARGEMENT ZIP OU MQ5 ===
            download_link = expert_soup.find("a", string=lambda s: s and "Download as ZIP" in s)
            if not download_link:
                download_link = expert_soup.find("a", href=lambda href: href and (href.endswith(".mq5") or href.endswith(".mq4")))
            if download_link:
                file_url = urljoin(BASE_URL, download_link["href"])
                download_file(file_url, expert_folder)
            else:
                print(f"[!] Aucun fichier à télécharger pour {expert_name}")

            # === TÉLÉCHARGER IMAGES PRÉSENTATION ===
            images = expert_soup.select("img")
            for img in images:
                src = img.get("src")
                if src and any(ext in src for ext in ['.png', '.jpg', '.jpeg']):
                    img_url = urljoin(BASE_URL, src)
                    download_file(img_url, expert_folder)

        except Exception as e:
            print(f"[!] Erreur sur expert : {link.get('href')} -> {e}")

driver.quit()


==> Scraping page 1: https://www.mql5.com/en/code
==> Scraping page 2: https://www.mql5.com/en/code/!1
==> Scraping page 3: https://www.mql5.com/en/code/!2
==> Scraping page 4: https://www.mql5.com/en/code/!3
==> Scraping page 5: https://www.mql5.com/en/code/!4
==> Scraping page 6: https://www.mql5.com/en/code/!5
==> Scraping page 7: https://www.mql5.com/en/code/!6
==> Scraping page 8: https://www.mql5.com/en/code/!7
==> Scraping page 9: https://www.mql5.com/en/code/!8
==> Scraping page 10: https://www.mql5.com/en/code/!9
==> Scraping page 11: https://www.mql5.com/en/code/!10
==> Scraping page 12: https://www.mql5.com/en/code/!11
==> Scraping page 13: https://www.mql5.com/en/code/!12
==> Scraping page 14: https://www.mql5.com/en/code/!13
==> Scraping page 15: https://www.mql5.com/en/code/!14
==> Scraping page 16: https://www.mql5.com/en/code/!15
==> Scraping page 17: https://www.mql5.com/en/code/!16
==> Scraping page 18: https://www.mql5.com/en/code/!17
==> Scraping page 19: https://ww