In [None]:
# ActionChains Documentation: 
# https://www.selenium.dev/selenium/docs/api/py/webdriver/selenium.webdriver.common.action_chains.html#selenium.webdriver.common.action_chains.ActionChains.release

# Beautiful Soup Docs: https://www.crummy.com/software/BeautifulSoup/bs4/doc/


from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time
import pandas as pd

# Set up Selenium
options = webdriver.ChromeOptions()
options.headless = False  # Run Chrome in headless mode (without GUI)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Visit Vivino's page
url = "https://www.vivino.com/explore"
driver.get(url)

# Wait for the page to load properly
wait = WebDriverWait(driver, 10)

# Step 1: click the dropdown menu
dropdown = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "simpleLabel-module__selectedKey--3ngzL")))
dropdown.click()
time.sleep(2)

# Step 2: select "United States" from the list
country_option = wait.until(EC.element_to_be_clickable((By.XPATH, "//a[@data-value='us']")))
country_option.click()
time.sleep(3)

# Step 3: unselect the "Red" wine filter
red_wine_label = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[contains(@class, 'MuiButtonBase-root') and @data-testid='wineTypes_1']")))
driver.execute_script("arguments[0].click();", red_wine_label)
time.sleep(3)

# Step 4: drag slider to select all prices.
# Find the first slider handle (left)
slider_1 = driver.find_element(By.XPATH, "//div[@class='rc-slider-handle rc-slider-handle-1']")

# Use ActionChains to drag the first slider
actions = ActionChains(driver)
actions.click_and_hold(slider_1).move_by_offset(-10, 0).release().perform()

# Find the second slider handle (right)
slider_2 = driver.find_element(By.XPATH, "//div[@class='rc-slider-handle rc-slider-handle-2']")

# Use ActionChains to drag the second slider
actions = ActionChains(driver)
actions.click_and_hold(slider_2).move_by_offset(150, 0).release().perform()

# Step 5: Select Any Rating option
any_rating_label = wait.until(EC.element_to_be_clickable((By.XPATH, "//label[.//input[@name='rating' and @value='1']]")))
driver.execute_script("arguments[0].click();", any_rating_label)


# Step 6: Select Countries
countries_dropdown = driver.find_element(By.XPATH, "//legend[@data-testid='filter-toggle-button'][.//h5[.='Countries']]")
driver.execute_script("arguments[0].click();", countries_dropdown)
time.sleep(2)
countries_ar = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[contains(@class, 'MuiButtonBase-root')]/parent::label[@data-testid='countries_ar']")))
#countries_cl = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[contains(@class, 'MuiButtonBase-root')]/parent::label[@data-testid='countries_cl']")))
#driver.execute_script("arguments[0].click(); arguments[1].click();", countries_ar countries_cl)
driver.execute_script("arguments[0].click();", countries_ar)
countries_search = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[@id='filter-checkboxes-search' and @placeholder='Search countries']")))
countries_search.click()
countries_search.send_keys("Brazil")
time.sleep(1)
countries_br = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[contains(@class, 'MuiButtonBase-root')]/parent::label[@data-testid='countries_br']")))
driver.execute_script("arguments[0].click()", countries_br)

# Wait and see the result
time.sleep(5)

def getPageLinks(list):
    # Get the page source after it's fully loaded
    html = driver.page_source

    # Pass it to BeautifulSoup
    soup = BeautifulSoup(html, 'html.parser')

    links = [a["href"] for a in soup.find_all('a', {"data-testid": "vintagePageLink"})]

    full_links = ["https://www.vivino.com" + link for link in links]

    list.extend(full_links)

    try:
        next_page_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//a[@data-trackingid='buttonDefault'][.//span[.='Next']]")))
        driver.execute_script("arguments[0].click()", next_page_btn)
        time.sleep(7)
    except:
        print("No hay más páginas disponibles!")

link_list = []
counter = 0
while counter < 1:
    getPageLinks(link_list)
    counter += 1

link_df = pd.DataFrame(link_list, columns=["wine_link"])
link_df.to_csv("wine_links.csv", index=False)

driver.quit()

In [32]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time
import pandas as pd
import json

csv = pd.read_csv("wine_test.csv")
wine_df = pd.DataFrame(csv, columns=["wine_link"])
wine_df["name"] = None
wine_df["year"] = None
wine_df["winery"] = None
wine_df["rating"] = None
wine_df["rating_qty"] = None
wine_df["body"] = None
wine_df["tannis"] = None
wine_df["sweetness"] = None
wine_df["acidity"] = None
wine_df["notes"] = None

options = webdriver.ChromeOptions()
options.headless = False  
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Iterar en las columnas del DF
for index, row in wine_df.iterrows():
    link = row["wine_link"]
    driver.get(link)
    time.sleep(5)  

    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # DATA 1: NOMBRE (name)
    wine_name_tag = soup.find("a", {"data-cartitemsource": "wine-page-master-link"})
    wine_name = wine_name_tag.get_text(strip=True) if wine_name_tag else None

    # DATA 2: AÑO (year)
    wine_year = None
    if wine_name_tag:
        parent_div = wine_name_tag.find_parent("div")
        if parent_div:
            year_text = parent_div.get_text(strip=True)
            wine_year = year_text.replace(wine_name, "").strip()

    # DATA 3: BODEGA (WINERY)
    winery = soup.find("h1").find("div").find("a").find("div").get_text(strip=True)

    # DATA 4: REVIEWS (Rating)
    rating_tag = soup.find("a", {"href": "#all_reviews"})
    rating_value = rating_tag.find("div").find("div").get_text(strip=True)
    rating_qty = rating_tag.find_all("div")[-1].get_text(strip=True).split(" ")[0]

    # DATA 5: TASTE (Sabor)

    wine_taste = soup.find_all("tr", {"class": "tasteStructure__tasteCharacteristic--jLtsE"})

    #taste_dict = {
    #    "body": "",
    #    "tannis": "",
    #    "sweetness": "",
    #    "acidity": ""
    #}

    taste_val_list = []
    for i in range(4):
        bar = wine_taste[i].find("span", {"class", "indicatorBar__progress--3aXLX"})["style"]
        val = bar.split(";")[1].split(":")[1]
        taste_val_list.append(val)

    # 5.1 : Light/Bold - Body (Cuerpo)   
    body = taste_val_list[0]
    # 5.2 : Smooth/Tannic - Tannis (Tanicidad)
    tannis = taste_val_list[1]
    # 5.3 : Dry/Sweet - Sweetness (Dulzura)
    sweetness = taste_val_list[2]
    # 5.4 : Soft/Acid - Acidity (Acidez)
    acidity = taste_val_list[3]


    # DATA 6 : NOTE MENTIONS (menciones de notas)

    mentions = soup.find_all("div", {"data-testid": "mentions"})

    mentions_dict = {}

    for mention in mentions:
        note = mention.find("span").get_text(strip=True)
        n_mentions = mention.get_text().split(" ")[0]
        mentions_dict[str(note)] = str(n_mentions)


    # ÚLTIMO PASO: GUARDAR EN DATAFRAME
    wine_df.at[index, "name"] = wine_name
    wine_df.at[index, "year"] = wine_year
    wine_df.at[index, "winery"] = winery
    wine_df.at[index, "rating"] = rating_value
    wine_df.at[index, "rating_qty"] = rating_qty
    wine_df.at[index, "body"] = body
    wine_df.at[index, "tannis"] = tannis
    wine_df.at[index, "sweetness"] = sweetness
    wine_df.at[index, "acidity"] = acidity
    wine_df["notes"] = str(mentions_dict)
    wine_df["notes"] = wine_df["notes"].apply(json.dumps)
# Guardar el CSV actualizado
wine_df.to_csv("updated_test.csv", index=False)

driver.quit()

wine_df

Unnamed: 0,wine_link,name,year,winery,rating,rating_qty,body,tannis,sweetness,acidity,notes
0,https://www.vivino.com/US/en/luigi-bosca-parai...,Paraiso,2020,Luigi Bosca,4.8,574,73.43435%,50.905894999999994%,13.619732499999998%,44.74824%,"""{'black fruit': '14', 'oaky': '13', 'earthy':..."
