In [3]:
import requests
from bs4 import BeautifulSoup

baseUrl = 'https://www.wine.com'

page = requests.get(f'{baseUrl}/list/wine/red-wine/7155-124?showOutOfStock=true')
cont = 0
while cont < 1:
    soup = BeautifulSoup(page.text, 'html.parser')

    wineList = soup.find('ul', class_='listGridLayout_list')
    wineElements = wineList.find_all('li', class_='listGridLayout_listItem')

    # iterate <li> items
    for wineElem in wineElements[:3]:
        # Try <5 times to load the product page
        trials = 0
        while trials < 5:
            try:
                # is out of stock?
                outOfStock = wineElem.find('div', class_='productUnavailable')

                wineInfo = { 'color': 'red' }   # Iterate over red, white, ...

                # ----- Getting info from main page -----
                # Name and vintage  
                name = wineElem.find('span', itemprop='name').text
                vintage = name[-4:]
                wineInfo['name'] = name
                wineInfo['vintage'] = int(vintage)

                # Variety
                variety = wineElem.find('span', class_='listGridItemOrigin_varietal').text
                wineInfo['variety'] = variety

                # Origin
                region = wineElem.find('span', class_='listGridItemOrigin_text').text
                wineInfo['region'] = region

                # Price
                price = wineElem.find('meta', itemprop='price')['content']
                wineInfo['price'] = float(price)

                # Ratings
                ulRatings = wineElem.find('ul', class_='wineRatings_list')
                if ulRatings:
                    ratings = []
                    for ratingElem in ulRatings.find_all('li'):
                        ratingName = ratingElem['title'][:-16].strip()
                        ratingValue = ratingElem['title'][-15:-13].strip()
                        
                        rating = {'name': ratingName, 'rating': int(ratingValue)}

                        ratings.append(rating)

                    wineInfo['ratings'] = ratings

                # print(wineInfo['region'])
                # break

                # ----- Getting info from wine page -----
                
                # Get wine detail URL
                a = wineElem.find('a', class_='event_productClick', href=True)
                wineUrl = f"{baseUrl}{a['href']}"

                # Open wine page
                winePage = requests.get(wineUrl)
                wineSoup = BeautifulSoup(winePage.text, 'html.parser')       

                if outOfStock: # If out of stock, page is different
                    # Size
                    size = wineSoup.find('span', class_='prodAlcoholVolume_text').text
                    wineInfo['size'] = size

                    # Images
                    divImage = wineSoup.find('div', class_='pipThumbs')
                    images = []
                    for image in divImage.find_all('img'):
                        imageUrl = image['src'].split('/')[-1]
                        images.append('https://www.wine.com/product/images/w_600,h_600,c_fit,q_auto:good,fl_progressive/' + imageUrl)
                else:
                    # Getting info from product details table
                    productDetails = wineSoup.find('section', class_='pipProdDetails')
                    titles = productDetails.find_all('div', class_='pipProdDetails_title')
                    values = productDetails.find_all('div', class_='pipProdDetails_name')
                    for i, title in enumerate(titles):
                        if title.text.strip() == 'Size':    # Size
                            wineInfo['size'] = values[i].text[:-2]
                        elif title.text.strip() == 'Producer':  # Producer
                            wineInfo['producer'] = values[i].text
                    
                    # Images
                    divImage = wineSoup.find('div', class_='pipProdThumbs')
                    images = []
                    for image in divImage.find_all('img'):
                        imageUrl = image['src'].split('/')[-1]
                        images.append('https://www.wine.com/product/images/w_600,h_600,c_fit,q_auto:good,fl_progressive/' + imageUrl)
                
                wineInfo['image'] = images

                # Print wine
                print(wineUrl)
                for key, value in wineInfo.items():
                    print(f"{key}: {value}")
                print()
                
                break
            except Exception as err:
                trials += 1
                continue
        else:
            print(f"FAIL")

    nextPageUrl = soup.find('a', class_='listPageNextUrl')['href']
    page = requests.get(nextPageUrl)

    cont += 1

https://www.wine.com/product/antinori-badia-a-passignano-chianti-classico-gran-selezione-2018/806034
color: red
name: Antinori Badia a Passignano Chianti Classico Gran Selezione 2018
vintage: 2018
variety: Sangiovese
region: Chianti Classico, Chianti, Tuscany, Italy
price: 47.98
ratings: [{'name': 'James Suckling', 'rating': 95}, {'name': 'Wine Spectator', 'rating': 94}, {'name': 'Wine & Spirits', 'rating': 92}, {'name': 'Wine Enthusiast', 'rating': 90}]
producer: Antinori
size: 750
image: ['https://www.wine.com/product/images/w_600,h_600,c_fit,q_auto:good,fl_progressive/tict3jscz7n7xdtvreyw.jpg', 'https://www.wine.com/product/images/w_600,h_600,c_fit,q_auto:good,fl_progressive/d7lpaxzi8mdvly1e1hfq.jpg']

https://www.wine.com/product/le-cellier-des-princes-cotes-du-rhone-la-couronne-du-prince-2018/1079083
color: red
name: Le Cellier des Princes Cotes du Rhone La Couronne du Prince 2018
vintage: 2018
variety: Rhone Red Blends
region: Cotes du Rhone, Rhone, France
price: 14.98
ratings: [

In [1]:
import mysql.connector

# ----- MySQL Connection -----
conn = mysql.connector.connect(host='localhost',
                               database='thewinegame',
                               user='user',
                               password='password',
                               port=6033)
cursor = conn.cursor(buffered=True)

wineInfo = {
    'color': 'Red',
    'name': 'Antica Mountain Select Cabernet Sauvignon',
    'vintage': 2018,
    'variety': ['Cabernet Sauvignon'],
    'region': 'Atlas Peak, Napa Valley, California',
    'price': {'value': 59.99, 'currency': 'dolar'},
    'store': 'wine.com',
    'ratings': [{'name': 'James Suckling', 'rating': 94}, 
                {'name': "Robert Parkers Wine Advocate", 'rating': 93}, 
                {'name': 'Jeb Dunnuck', 'rating': 92}, 
                {'name': 'Wine Spectator', 'rating': 92}],
    'producer': 'Antica',
    'size': 750,
    'image': ['https://www.wine.com/product/images/w_600,h_600,c_fit,q_auto:good,fl_progressive/vaqmy9xth7r9l69hsxdj.jpg', 
              'https://www.wine.com/product/images/w_600,h_600,c_fit,q_auto:good,fl_progressive/ykuvjgp7ax3829j6alip.jpg']
}

try:
    # ----- Wine Deduplication -----
    if 'producer' in wineInfo.keys():
        cursor.execute(f"SELECT id FROM ( \
                            SELECT *, SQRT( \
                                        POW(overlap(name, '{wineInfo['name']}'), 2) + \
                                        POW(levenshtein_ratio(producer, '{wineInfo['producer']}'), 2) + \
                                        POW(levenshtein_ratio(region, '{wineInfo['region']}'), 2) \
                                    )/SQRT(3) AS distance \
                            FROM wine \
                        ) AS subquery \
                        WHERE color = '{wineInfo['color']}' AND vintage = {wineInfo['vintage']} AND \
                            size = {wineInfo['size']} AND distance > 0.8 \
                        ORDER BY distance DESC LIMIT 1")
    else:
        cursor.execute(f"SELECT id FROM ( \
                            SELECT *, SQRT( \
                                        POW(overlap(name, '{wineInfo['name']}'), 2) + \
                                        POW(levenshtein_ratio(region, '{wineInfo['region']}'), 2) \
                                    )/SQRT(2) AS distance \
                            FROM wine \
                        ) AS subquery \
                        WHERE color = '{wineInfo['color']}' AND vintage = {wineInfo['vintage']} AND \
                            size = {wineInfo['size']} AND distance > 0.8 \
                        ORDER BY distance DESC LIMIT 1")
    wine_id = cursor.fetchone()

    # If wine was not found, insert
    if wine_id:
        wine_id = wine_id[0]
    else:
        if 'producer' in wineInfo.keys():
            cursor.execute(f"INSERT INTO wine (color, name, producer, region, vintage, size) \
                             VALUES ('{wineInfo['color']}', '{wineInfo['name']}', '{wineInfo['producer']}', \
                                     '{wineInfo['region']}', {wineInfo['vintage']}, {wineInfo['size']})")
        else:
            cursor.execute(f"INSERT INTO wine (color, name, region, vintage, size) \
                             VALUES ('{wineInfo['color']}', '{wineInfo['name']}', '{wineInfo['region']}', \
                                     {wineInfo['vintage']}, {wineInfo['size']}")
        cursor.execute(f"SELECT id FROM wine WHERE color='{wineInfo['color']}' AND name='{wineInfo['name']}' AND \
                                                   region='{wineInfo['region']}' AND vintage={wineInfo['vintage']} AND \
                                                   size={wineInfo['size']}")
        wine_id = cursor.fetchone()[0]

        # Wine Variety
        for variety in wineInfo['variety']:
            cursor.execute(f"INSERT INTO wine_variety VALUES ({wine_id}, '{variety}')")

    # ----- Dependent Attributes -----

    # Images
    for image in wineInfo['image']:
        cursor.execute(f"INSERT INTO image (link, wine) \
                         SELECT * FROM (SELECT '{image}', {wine_id}) AS tmp \
                         WHERE NOT EXISTS (SELECT wine FROM image WHERE link='{image}' AND wine={wine_id})")

    # Ratings
    if 'ratings' in wineInfo.keys():
        for rating in wineInfo['ratings']:
            cursor.execute(f"INSERT INTO rating (type, value, wine) \
                             SELECT * FROM (SELECT '{rating['name']}', {rating['rating']}, {wine_id}) AS tmp \
                             WHERE NOT EXISTS (SELECT id FROM rating WHERE wine={wine_id} AND type='{rating['rating']}')")

    # Price
    cursor.execute(f"INSERT INTO price (value, currency, store, wine) \
                     VALUES ({wineInfo['price']['value']}, '{wineInfo['price']['currency']}', \
                             '{wineInfo['store']}', {wine_id})")

    conn.commit()
except Exception as err:
    print(err)
    conn.close()