In [1]:
from dotenv import load_dotenv, find_dotenv
import os

for var in ["BASE_URL", "USERNAME", "PASSWORD", "DOWNLOAD_FOLDER", "FIRSTNAME"]:
    os.environ.pop(var, None)

env_path = find_dotenv()
if not env_path:
    raise FileNotFoundError("⚠️ No .env file found! Check its location.")
load_dotenv(env_path, override=True)

BASE_URL = os.getenv("BASE_URL")
EMAIL = os.getenv("USERNAME")
PASSWORD = os.getenv("PASSWORD")
DOWNLOAD_DIR = os.getenv("DOWNLOAD_FOLDER")
FIRST_NAME = os.getenv("FIRSTNAME")

# SETUP BELOW
undetected_chromedriver is setup below.
However download pdf is enabled by hand before executing the script.
Headless does not work, 

In [6]:
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import undetected_chromedriver as uc
import time
import json
import re

def setup_driver():
    # 🔹 Set up Chrome options
    chrome_options = uc.ChromeOptions()
    
    chrome_options.add_experimental_option("prefs", {
        "download.default_directory": os.path.join(os.getcwd(),DOWNLOAD_DIR),
        "download.prompt_for_download": False,
        "download.directory_upgrade": True,
        "plugins.always_open_pdf_externally": True,
        "safebrowsing.enabled": True
        })
    # 🔹 Launch Chrome WebDriver
    driver = uc.Chrome(options=chrome_options)

    return driver

def load_cookies(driver):
    try:
        with open("cookies_chrome.json", "r") as f:
            cookies = json.load(f)
            driver.get(BASE_URL)
            for cookie in cookies:
                driver.add_cookie(cookie)
            driver.refresh()
            print("[🍪] Carrefour cookies loaded.")
    except FileNotFoundError:
        print("[⚠] No cookies found, manual CAPTCHA required.")
        driver.get(BASE_URL)

def is_logged(driver):
    connected_acc = driver.find_element(By.CLASS_NAME, "mainbar__account")
    # Check if login is successful OR cookies were able to connect us
    if FIRST_NAME in connected_acc.find_element(By.CLASS_NAME, "mainbar-item__label").text:
        print("[✅] Successfully logged in!")
        # Save cookies to avoid re-login
        cookies = driver.get_cookies()
        with open("cookies_chrome.json", "w") as f:
            json.dump(cookies, f)
    else:
        print("[❌] Login failed.")
        return False
    return True

def login(driver):
    #May not appear dependent of profile
    try: 
        time.sleep(2)
        driver.find_element(By.CLASS_NAME, "by_close").click()
    except Exception:
        pass
    #Cookies loaded should not make this appear
    try: 
        time.sleep(2)
        driver.find_element(By.ID, "onetrust-reject-all-handler").click()
    except Exception:
        pass
    #Proceeds to login
    time.sleep(2)
    driver.find_element(By.CLASS_NAME, "mainbar__account").click()
    #Click first button of login menu
    time.sleep(2)
    driver.find_elements(By.CLASS_NAME, 'mainbar-account-menu__cta')[0].click()
    
    time.sleep(5)
    input1 = driver.find_element(By.ID, "idToken1").send_keys(EMAIL)
    time.sleep(1)
    driver.find_element(By.ID, "idToken2").send_keys(PASSWORD)
    driver.find_element(By.ID, "idToken2").send_keys(Keys.RETURN)

    time.sleep(5)  # Wait for login
    return is_logged(driver)

# URL SCRAPING AND REWRITING

In [3]:
def rewrite_url(url):
    base_url_pattern = r"^(\D+)\/([\d]*)\/([\d]*)\/([\d]*)\/([\d-]*)"
    #https://www.carrefour.fr/mon-compte/mes-achats/en-magasin/0000000000000/YYYYMMDD/0000/123-45-6789
    #groupe1 = https://www.carrefour.fr/mon-compte/mes-achats/en-magasin
    #groupe2 = 0000000000000
    #groupe3 = YYYYMMDD
    #groupe4 = 0000
    #groupe5 = 123-45-6789
    match = re.match(base_url_pattern, url)
    dateiso = match.group(3)
    date = dateiso[0:4]+"-"+dateiso[4:6]+"-"+dateiso[6:]
    return [date, match.group(1)+"/"+match.group(2)+"/"+dateiso+"/"+match.group(5)+"/facture"]
    

def scrape_tickets_url(driver, list_bill):
    driver.get(BASE_URL+"mon-compte/mes-achats/en-magasin")
    time.sleep(10)
    list_links = []
    list_tickets = driver.find_element(By.CLASS_NAME, "receipt-list")
    all_footer = list_tickets.find_elements(By.CLASS_NAME,"order-item__footer")
    for footer in all_footer:
        list_links.append(footer.find_element(By.TAG_NAME, "a").get_attribute('href'))
    print("[✅] All URLs were copied.")
    for link in list(set(list_links)):
        #https://www.carrefour.fr/mon-compte/mes-achats/en-magasin/0000000000000/YYYYMMDD/0000/123-45-6789
        #URL facture : https://www.carrefour.fr/mon-compte/mes-achats/en-magasin/0000000000000/YYYYMMDD/123-45-6789/facture
        #OUTPUT : [date, url_facture]
        list_bill.append(rewrite_url(link))
    print("[✅] All bills URLs were generated.")
    return list_bill


# FILES DOWNLOAD

In [10]:
def latest_download_file(download_dir:'Downloads folder file path'):
      os.chdir(download_dir)
      files = sorted(os.listdir(os.getcwd()), key=os.path.getmtime)
      latest_f = files[-1]
      return latest_f

def download_files(driver, list_bill):
    path = os.path.join(os.getcwd(), DOWNLOAD_DIR)
    try:
        os.makedirs(path)
    except Exception:
        pass
    for bill in list_bill:
        driver.get(bill[1])
        time.sleep(3)
        dl_file = latest_download_file(path)
        os.rename(os.path.join(path,dl_file), os.path.join(path,bill[0]+".pdf"))
        print("[✅] File "+ dl_file +" was copied as : "+bill[0]+".pdf.")
    return 1

In [11]:
list_bill = []
driver = setup_driver()
load_cookies(driver)
time.sleep(7)
if is_logged(driver):
    print("[✅] Cookies were OK.")
else :
    login(driver)
list_bill = scrape_tickets_url(driver, list_bill)
download_files(driver, list_bill)
driver.quit()

[🍪] Carrefour cookies loaded.
[❌] Login failed.
[❌] Login failed.
[✅] All URLs were copied.
[✅] All bills URLs were generated.
[✅] File 100-184-1745_facture.pdf was copied as : 2025-01-15.pdf.
[✅] File 6-6-0849_facture.pdf was copied as : 2025-01-11.pdf.
[✅] File 34-6-0855_facture.pdf was copied as : 2025-03-22.pdf.
[✅] File 104-134-1723_facture.pdf was copied as : 2025-02-05.pdf.
[✅] File 31-11-0903_facture.pdf was copied as : 2025-02-01.pdf.
[✅] File 29-22-1243_facture.pdf was copied as : 2025-03-10.pdf.
[✅] File 35-5-0910_facture.pdf was copied as : 2024-12-21.pdf.
[✅] File 8-11-0859_facture.pdf was copied as : 2025-01-04.pdf.
[✅] File 27-71-1735_facture.pdf was copied as : 2025-03-21.pdf.
[✅] File 32-19-0910_facture.pdf was copied as : 2025-01-18.pdf.
[✅] File 25-27-1254_facture.pdf was copied as : 2025-04-04.pdf.
[✅] File 41-4-0910_facture.pdf was copied as : 2025-03-01.pdf.
[✅] File 27-78-1816_facture.pdf was copied as : 2025-01-24.pdf.
[✅] File 121-4-0958_facture.pdf was copied 