In [2]:
%pip install selenium webdriver-manager

Collecting selenium
  Downloading selenium-4.38.0-py3-none-any.whl.metadata (7.5 kB)
Collecting webdriver-manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting urllib3<3.0,>=2.5.0 (from urllib3[socks]<3.0,>=2.5.0->selenium)
  Downloading urllib3-2.5.0-py3-none-any.whl.metadata (6.5 kB)
Collecting trio<1.0,>=0.31.0 (from selenium)
  Downloading trio-0.32.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket<1.0,>=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting certifi>=2025.10.5 (from selenium)
  Downloading certifi-2025.11.12-py3-none-any.whl.metadata (2.5 kB)
Collecting typing_extensions<5.0,>=4.15.0 (from selenium)
  Downloading typing_extensions-4.15.0-py3-none-any.whl.metadata (3.3 kB)
Collecting attrs>=23.2.0 (from trio<1.0,>=0.31.0->selenium)
  Downloading attrs-25.4.0-py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio<1.0,>=0.31.0->selenium)
  Downloading outco

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
httpcore 0.9.1 requires h11<0.10,>=0.8, but you have h11 0.16.0 which is incompatible.
langsmith 0.1.135 requires httpx<1,>=0.23.0, but you have httpx 0.13.3 which is incompatible.
openai 1.51.2 requires httpx<1,>=0.23.0, but you have httpx 0.13.3 which is incompatible.
spyder 5.5.1 requires ipython!=8.17.1,<9.0.0,>=8.13.0; python_version > "3.8", but you have ipython 9.0.2 which is incompatible.
streamlit 1.32.0 requires packaging<24,>=16.8, but you have packaging 24.2 which is incompatible.
tensorflow-intel 2.16.1 requires ml-dtypes~=0.3.1, but you have ml-dtypes 0.5.1 which is incompatible.
tensorflow-intel 2.16.1 requires tensorboard<2.17,>=2.16, but you have tensorboard 2.19.0 which is incompatible.

[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --

In [10]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time

# Dictionnaire des URLs
crypto_urls = {
    'BTC-USD': 'https://finance.yahoo.com/quote/BTC-USD/history/',
    'HYPER36428-USD': 'https://finance.yahoo.com/quote/HYPER36428-USD/history/',
    'HOOD37295-USD': 'https://finance.yahoo.com/quote/HOOD37295-USD/history/'
}

# Dictionnaire pour stocker les DataFrames
crypto_dataframes = {}

options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

one_year_ago = datetime.now() - timedelta(days=365)

for ticker, url in crypto_urls.items():
    print(f"Scraping {ticker}...")
    driver.get(url)
    
    # Gestion de la popup de consentement
    try:
        consent_button = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.NAME, "agree"))
        )
        consent_button.click()
    except:
        pass

    # Scroll pour charger les données (environ 1 an)
    last_height = driver.execute_script("return document.body.scrollHeight")
    for _ in range(15):
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    
    # Parser le HTML
    soup = BeautifulSoup(driver.page_source, "html.parser")
    
    # Recherche robuste de la table
    target_table = None
    for table in soup.find_all("table"):
        headers = [th.text.strip() for th in table.find_all("th")]
        if any("Date" in h for h in headers) and any("Close" in h for h in headers):
            target_table = table
            break
    
    if target_table:
        rows = target_table.find_all("tr")
        data_list = []
        
        for row in rows[1:]:  
            cols = row.find_all("td")
            if len(cols) >= 6:
                date_str = cols[0].text.strip()
                try:
                    date_obj = datetime.strptime(date_str, "%b %d, %Y")
                   
                    if date_obj >= one_year_ago:
                        data_list.append({
                            'Date': date_str,
                            'Open': cols[1].text.strip(),
                            'High': cols[2].text.strip(),
                            'Low': cols[3].text.strip(),
                            'Close': cols[4].text.strip(),
                            'Adj Close': cols[5].text.strip(),
                            'Volume': cols[6].text.strip() if len(cols) > 6 else '0'
                        })
                except ValueError:
                    continue
        
        if data_list:
            df = pd.DataFrame(data_list)
            crypto_dataframes[ticker] = df
            print(f"{ticker}: {len(df)} lignes scrapées")
            print(df.head(3))
        else:
            print(f"Aucune donnée trouvée pour {ticker} (après filtre date)")
        
    else:
        print(f"Table non trouvée pour {ticker}")

driver.quit()

if crypto_dataframes:
    for ticker, df in crypto_dataframes.items():
        df['Ticker'] = ticker
    
    df_all = pd.concat(crypto_dataframes.values(), ignore_index=True)
    
    print(f"Total de lignes : {len(df_all)}")
    print(f"Cryptos : {list(crypto_dataframes.keys())}")
    
    df_all.to_csv('données_de_lhistorique_des_crypto.csv', index=False)
    print("Données sauvegardées")
    
else:
    print(" Aucune donnée n'a pu être scrapée")

Scraping BTC-USD...
BTC-USD: 365 lignes scrapées
           Date       Open       High        Low      Close  Adj Close  \
0  Dec 13, 2025  90,281.64  90,614.31  89,988.05  90,167.33  90,167.33   
1  Dec 12, 2025  92,513.66  92,747.93  89,532.60  90,270.41  90,270.41   
2  Dec 11, 2025  92,011.30  93,554.27  89,335.30  92,511.34  92,511.34   

           Volume  
0  65,443,397,632  
1  80,275,884,583  
2  64,532,834,621  
Scraping HYPER36428-USD...
HYPER36428-USD: 222 lignes scrapées
           Date      Open      High       Low     Close Adj Close     Volume
0  Dec 13, 2025  0.000306  0.000426  0.000137  0.000215  0.000215  1,066,186
1  Dec 12, 2025  0.000025  0.000469  0.000025  0.000319  0.000319    640,925
2  Dec 11, 2025  0.000024  0.000025  0.000023  0.000025  0.000025          -
Scraping HOOD37295-USD...
HOOD37295-USD: 158 lignes scrapées
           Date      Open      High       Low     Close Adj Close  Volume
0  Dec 13, 2025  0.013883  0.013883  0.000008  0.000563  0.000563  2

In [6]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time

# Configuration
ticker = 'BTC-USD'
five_years_ago = datetime.now() - timedelta(days=1825) 
period1 = int(five_years_ago.timestamp())
period2 = int(datetime.now().timestamp())


url = f"https://finance.yahoo.com/quote/{ticker}/history?period1={period1}&period2={period2}&interval=1d&filter=history&frequency=1d"

options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

print(f"Scraping {ticker} sur 5 ans...")
driver.get(url)

try:
    consent_button = WebDriverWait(driver, 5).until(
        EC.element_to_be_clickable((By.NAME, "agree"))
    )
    consent_button.click()
    print("Consentement accepté.")
except:
    pass


last_height = driver.execute_script("return document.body.scrollHeight")
for _ in range(30): 
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height


soup = BeautifulSoup(driver.page_source, "html.parser")


target_table = None
for table in soup.find_all("table"):
    headers = [th.text.strip() for th in table.find_all("th")]
    if any("Date" in h for h in headers) and any("Close" in h for h in headers):
        target_table = table
        break

data_list = []
if target_table:
    rows = target_table.find_all("tr")
    for row in rows[1:]:
        cols = row.find_all("td")
        if len(cols) >= 6:
            date_str = cols[0].text.strip()
            try:
                date_obj = datetime.strptime(date_str, "%b %d, %Y")
                if date_obj >= five_years_ago:
                    data_list.append({
                        'Date': date_str,
                        'Open': cols[1].text.strip(),
                        'High': cols[2].text.strip(),
                        'Low': cols[3].text.strip(),
                        'Close': cols[4].text.strip(),
                        'Adj Close': cols[5].text.strip(),
                        'Volume': cols[6].text.strip()
                    })
            except ValueError:
                continue
else:
    print("Table non trouvée (structure modifiée ?)")

driver.quit()

if data_list:
    df = pd.DataFrame(data_list)
    df['Ticker'] = ticker
    print(f"{ticker}: {len(df)} lignes récupérées")
    print(df.head())
    df.to_csv('btc_historique_5ans.csv', index=False)
    print("Sauvegarde terminée.")
else:
    print("Aucune donnée récupérée.")

Scraping BTC-USD sur 5 ans...
BTC-USD: 1825 lignes récupérées
           Date       Open       High        Low      Close  Adj Close  \
0  Dec 13, 2025  90,281.64  90,614.31  89,988.05  90,033.09  90,033.09   
1  Dec 12, 2025  92,513.66  92,747.93  89,532.60  90,270.41  90,270.41   
2  Dec 11, 2025  92,011.30  93,554.27  89,335.30  92,511.34  92,511.34   
3  Dec 10, 2025  92,695.23  94,477.16  91,640.13  92,020.95  92,020.95   
4   Dec 9, 2025  90,639.70  94,601.57  89,586.98  92,691.71  92,691.71   

           Volume   Ticker  
0  64,809,832,448  BTC-USD  
1  80,275,884,583  BTC-USD  
2  64,532,834,621  BTC-USD  
3  65,420,694,513  BTC-USD  
4  66,861,721,440  BTC-USD  
Sauvegarde terminée.
