If you are coding from your PC,
<br>
Install the required libraries by tiping this command in the Anaconda Prompt:
<br>
`conda install pandas openpyxl selenium webdriver_manager tqdm ipywidgets requests bs4`

## Selenium cannot easily be used from Colab, so we cannot easily scrape Carrefour.fr without using Python locally

In [1]:
try:
    # if we are on Colab
    from google.colab import files
    SELENIUM = False
except ModuleNotFoundError:
    # if we are not on Colab, we can easily use Selenium
    SELENIUM = True
    from selenium import webdriver
    from selenium.webdriver.chrome.service import Service
    from webdriver_manager.chrome import ChromeDriverManager


from time import sleep           # we can make pauses to wait for the web page to load

import requests                  # to connect to the internet
import pandas as pd              # to create Excel files
from bs4 import BeautifulSoup    # to extract text from HTML pages
from tqdm.auto import tqdm       # loading bar

## Use Selenium to open a Chrome browser and navigate to the website

In [2]:
if SELENIUM:
    # open Chrome
    driver = webdriver.Chrome(
        # install ChromeDriver, if required
        service=Service(ChromeDriverManager().install())
    )
    driver.get('https://www.carrefour.fr/r/boissons/cave-a-vins')
    sleep(10)   # wait 10 s for Carrefour page to load
else:
    response = requests.get(
        'https://raw.githubusercontent.com/ElLorans/PythonCrashCourse/main/ScrapeWines/carrefour.html'
    )

In [3]:
# Use Beautiful Soup to parse the HTML content of the page
if SELENIUM:
    soup = BeautifulSoup(driver.page_source)
else:
    soup = BeautifulSoup(response.text)

products = soup.find_all("li", {"class": "product-grid-item"})

In [6]:
carrefour_wines = dict()
for product in products:
    try:
        liters = product.find(
            'div',
            {'class': 'ds-format'}
        )
        if liters is None:
            # no liters amount, so it's not a wine. Move to next elem in loop
            next
        liters = liters.text.strip()
        wine_name = product.find('h2').text.strip()
        price = product.find(
            'span',
            {'class': 'product-price__amount-value'}
        )
        if price is not None:
            price = float(price.text.replace(',', '.').split(' €', 1)[0].strip())
        else:
            price = 0
        carrefour_wines[wine_name] = {
            'price': price,
            'liters': liters
        }
    except AttributeError as e:
        print(e)

'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'


In [7]:
carrefour_wines

{'Vin Rouge Bordeaux LA PETITE CHAPELLE DE BORDEAUX': {'price': 9.01,
  'liters': '3L'},
 'Vin rouge Bordeaux LE GRAND ECUYER': {'price': 2.74, 'liters': '75cL'},
 'Vin Rouge  Bordeaux AOP Bordeaux La Petite Chapelle De Bordeaux': {'price': 2.77,
  'liters': '75cL'},
 "Vin rouge  merlot sans alcool  GRAIN D'ENVIE": {'price': 2.54,
  'liters': '75cL'},
 'Vin Blanc Moelleux Bordeaux LA PETITE CHAPELLE DE BORDEAUX': {'price': 2.77,
  'liters': '75cL'},
 'Vin Blanc Bordeaux LA PETITE CHAPELLE DE BORDEAUX': {'price': 9.01,
  'liters': '3L'},
 'Vin blanc Bordeaux moelleux  LE GRAND ECUYER': {'price': 2.74,
  'liters': '75cL'},
 'Vin blanc Bordeaux LE GRAND ECUYER': {'price': 2.74, 'liters': '75cL'},
 'Vin blanc de Pays Lichette LES CAVES VERNAUX': {'price': 9.94,
  'liters': '5L'},
 'Vin rosé Bordeaux LE GRAND ECUYER': {'price': 9.83, 'liters': '3L'},
 'Vin rosé de Pays Champlure LES CAVES VERNAUX': {'price': 9.94,
  'liters': '5L'},
 "Vin rosé syrah sans alcool GRAIN D'ENVIE": {'price': 2.5

**Vivino**

In [17]:
def clean_wine_name(stringa: str):
    # add docstrings to function so that help(clean_wine_name) will show
    # what the function does
    """
    Return str without useless text to enhance vivino search.
    """
    clean_wine_name = stringa.lower()
    clean_wine_name = stringa.replace("vin blanc ", "")
    clean_wine_name = clean_wine_name.replace("vin rouge ", "")
    clean_wine_name = clean_wine_name.replace("vin rosé ", "")
    return clean_wine_name

In [18]:
help(clean_wine_name)

Help on function clean_wine_name in module __main__:

clean_wine_name(stringa: str)
    Return str without useless text to enhance vivino search.



In [19]:
def scrape_vivino(wine_name: str):
    base_url = "https://www.vivino.com/search/wines?q="
    url = base_url + clean_wine_name(wine_name.replace(" ", "+"))
    print(url)
    # headers are sent by web browser to websites to specify what info they want
    # without specifying headers, vivino will think we are a bot and will not show us
    # any ratings
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "en-GB,en;q=0.5",
        "Referer": "https://www.google.com/",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0",
    }
    response = requests.get(url,
                            headers=headers)
    soup = BeautifulSoup(response.text)
    wine = soup.find("div", {"class": "default-wine-card"})

    name = wine.find("span", {"class": "header-smaller text-block wine-card__name"}).text.strip()
    price = wine.find("span", {"class": "wine-price-value"}).text.strip().replace("—", "")
    rating = float(wine.find("div", {"class": "text-inline-block light average__number"}).text.replace(",", ".").strip())
    n_reviews = int(wine.find("p", {"class": 'text-micro'}).text.split(" ", 1)[0].strip())

    return {
        "VivinoName": name,
        "VivinoPrice": price,
        "Rating": rating,
        "# Reviews": n_reviews}

## Update the dict carrefour_wines with the info from Vivino

In [25]:
# let's stop to the first 4 wines for now
count = 0

# tqdm will create a loading bar
for wine_name in tqdm(carrefour_wines):
    # add to the dictionary scraped from Carrefour the dictionary Scraped from Vivino
    vivino_dictionary = scrape_vivino(wine_name)
    for key in vivino_dictionary:
        carrefour_wines[key] = vivino_dictionary[key]
    count = count + 1
    if count == 4:
        break

  0%|          | 0/27 [00:00<?, ?it/s]

https://www.vivino.com/search/wines?q=Vin+Rouge+Bordeaux+LA+PETITE+CHAPELLE+DE+BORDEAUX
https://www.vivino.com/search/wines?q=Vin+rouge+Bordeaux+LE+GRAND+ECUYER
https://www.vivino.com/search/wines?q=Vin+Rouge++Bordeaux+AOP+Bordeaux+La+Petite+Chapelle+De+Bordeaux


In [26]:
carrefour_wines

{'Vin Rouge Bordeaux LA PETITE CHAPELLE DE BORDEAUX': {'price': 9.01,
  'liters': '3L',
  'VivinoName': 'La Petite Chapelle Bordeaux Rouge',
  'VivinoPrice': '',
  'Rating': 3.2,
  '# Reviews': 1253},
 'Vin rouge Bordeaux LE GRAND ECUYER': {'price': 2.74,
  'liters': '75cL',
  'VivinoName': 'Bertrand Ravache Le Grand Ecuyer Bordeaux Supérieur',
  'VivinoPrice': '',
  'Rating': 3.3,
  '# Reviews': 314},
 'Vin Rouge  Bordeaux AOP Bordeaux La Petite Chapelle De Bordeaux': {'price': 2.77,
  'liters': '75cL',
  'VivinoName': 'La Petite Chapelle Bordeaux Rouge',
  'VivinoPrice': '',
  'Rating': 3.2,
  '# Reviews': 1253},
 "Vin rouge  merlot sans alcool  GRAIN D'ENVIE": {'price': 2.54,
  'liters': '75cL',
  'VivinoName': "Castel Freres Grain d'Envie Merlot",
  'VivinoPrice': '',
  'Rating': 1.7,
  '# Reviews': 53},
 'Vin Blanc Moelleux Bordeaux LA PETITE CHAPELLE DE BORDEAUX': {'price': 2.77,
  'liters': '75cL'},
 'Vin Blanc Bordeaux LA PETITE CHAPELLE DE BORDEAUX': {'price': 9.01,
  'liters'

In [27]:
# carrefour_wines now has more information from vivino but only for the first
# count wines. So we create a copy "result" of carrefour_wines with only the first count
# key-value pairs

result = dict()
second_count = 0
for key in carrefour_wines:
    result[key] = carrefour_wines[key]
    second_count = second_count + 1
    if second_count == count:
        break

In [30]:
result

{'Vin Rouge Bordeaux LA PETITE CHAPELLE DE BORDEAUX': {'price': 9.01,
  'liters': '3L',
  'VivinoName': 'La Petite Chapelle Bordeaux Rouge',
  'VivinoPrice': '',
  'Rating': 3.2,
  '# Reviews': 1253},
 'Vin rouge Bordeaux LE GRAND ECUYER': {'price': 2.74,
  'liters': '75cL',
  'VivinoName': 'Bertrand Ravache Le Grand Ecuyer Bordeaux Supérieur',
  'VivinoPrice': '',
  'Rating': 3.3,
  '# Reviews': 314},
 'Vin Rouge  Bordeaux AOP Bordeaux La Petite Chapelle De Bordeaux': {'price': 2.77,
  'liters': '75cL',
  'VivinoName': 'La Petite Chapelle Bordeaux Rouge',
  'VivinoPrice': '',
  'Rating': 3.2,
  '# Reviews': 1253},
 "Vin rouge  merlot sans alcool  GRAIN D'ENVIE": {'price': 2.54,
  'liters': '75cL',
  'VivinoName': "Castel Freres Grain d'Envie Merlot",
  'VivinoPrice': '',
  'Rating': 1.7,
  '# Reviews': 53}}

In [28]:
df = pd.DataFrame.from_dict(result, orient='index')

In [29]:
df

Unnamed: 0,price,liters,VivinoName,VivinoPrice,Rating,# Reviews
Vin Rouge Bordeaux LA PETITE CHAPELLE DE BORDEAUX,9.01,3L,La Petite Chapelle Bordeaux Rouge,,3.2,1253
Vin rouge Bordeaux LE GRAND ECUYER,2.74,75cL,Bertrand Ravache Le Grand Ecuyer Bordeaux Supé...,,3.3,314
Vin Rouge Bordeaux AOP Bordeaux La Petite Chapelle De Bordeaux,2.77,75cL,La Petite Chapelle Bordeaux Rouge,,3.2,1253
Vin rouge merlot sans alcool GRAIN D'ENVIE,2.54,75cL,Castel Freres Grain d'Envie Merlot,,1.7,53


In [31]:
df.to_excel("carrefour_wines.xlsx")

try:
    # if we are on Colab
    from google.colab import files
    files.download("carrefour_wines.xlsx")
except ModuleNotFoundError:
    pass