In [34]:
# import library to request data from URL
import requests 
# import "regular expressions" for removing extra spaces 
import re
# import pandas (staple data science library!)
import pandas as pd
# import Soup for scraping 
from bs4 import BeautifulSoup

# Request the page of red wines from BottleRocket
red_wine_data = requests.get("https://www.bottlerocket.com/wine/category/red-wine.html?product_list_limit=all").content
# Create a Soup object using the HTML data that comes back 
soup = BeautifulSoup(red_wine_data, 'html.parser')
# Find all of the products on the page 
products = find_all('div', class_="product-item-details")

In [56]:
# Empty list that we will add to as we loop 
rows = []

# Iterate over each product that Soup found 
for p in products:
    # Get the name
    name = p.find('span', itemprop="name").text
    # Get the vintage
    vintage = p.find("li", class_="product_vintage").find('a').text
    # Replace extra spaces in middle of sentence using re library, then strip the trailing and leading spaces off the sentence as well
    # strip("   Hello There!     ") = "Hello There!"
    vintage = re.sub(' +', ' ', vintage).strip()
    # Get the variety of grape
    grapes = p.find("li", class_="product_grapes").text.strip()
    # Get the country/region 
    region = p.find("li", class_="product_country").text.strip()
    # Get the details of the wine, the "product_brief"
    brief = p.find("li", class_="product_brief").text
    brief = re.sub(' +', ' ', brief).strip()
    # Separate/split on the comma into separate variables 
    color, dry_level, body, taste1 = brief.split(',')[0:4]
    # Add the row to list we declared earlier (think of this add adding a row to a spread sheet)
    rows.append( [name, vintage, grapes, country, brief, color, dry_level, body, taste1, taste2] )

# Create a DataFrame (Spreadsheet equivalent in code)
# Just specify the data and the names of the columns 
df = pd.DataFrame(data=rows, columns=['name', 'vintage', 'grape_variety', 'region', 'tags', 'color', 'dryness', 'body', 'taste1', 'taste2'])

In [57]:
# Display the data!
df

Unnamed: 0,name,vintage,grape_variety,origin,tags,color,dryness,body,taste1,taste2
0,Camins 2 Dreams,Syrah Spear Vineyards 2018,Syrah,U.S.A. > California,"Red, Dry, Medium-Bodied, Spicy, Vibrant",Red,Dry,Medium-Bodied,Spicy,Fresh
1,Domaine Faiveley,Chambolle-Musigny 2017,Pinot Noir,France > Burgundy,"Red, Dry, Medium-Bodied, Floral, Rich",Red,Dry,Medium-Bodied,Floral,Fresh
2,Bodegas Ponce,Pino 2018,Bobal,Spain,"Red, Dry, Full-Bodied, Mineral, Muscular",Red,Dry,Full-Bodied,Mineral,Fresh
3,Evening Land,Pinot Noir Seven Springs 2017,Pinot Noir,U.S.A. > Oregon,"Red, Dry, Medium-Bodied, Floral, Rich",Red,Dry,Medium-Bodied,Floral,Fresh
4,Maison Noir,Horseshoes and Handgrenades NV,"Syrah, Cabernet Sauvignon, Merlot",U.S.A. > Oregon,"Red, Dry, Full-Bodied, Fruity, Hearty",Red,Dry,Full-Bodied,Fruity,Fresh
5,Nomadica,Red Wine Blend NV,"Sangiovese, Grenache, Zinfandel",U.S.A. > California,"Red, Dry, Medium-Bodied, Fruity, Vibrant",Red,Dry,Medium-Bodied,Fruity,Fresh
6,Kita,Red NV,"Grenache, Syrah",U.S.A. > California,"Red, Dry, Medium-Bodied, Fruity, Vibrant",Red,Dry,Medium-Bodied,Fruity,Fresh
7,Domaine de la Vieille Julienne,Chateauneuf-du-Pape les Trois Sources 2,"Grenache, Cinsault, Syrah, Mourvedre",France > Rhone,"Red, Dry, Full-Bodied, Fruity, Hearty",Red,Dry,Full-Bodied,Fruity,Fresh
8,Folly of the Beast,Pinot Noir 2018,Pinot Noir,U.S.A. > California,"Red, Dry, Medium-Bodied, Fruity, Fresh",Red,Dry,Medium-Bodied,Fruity,Fresh
9,Felsina,Fontalloro 2017,Sangiovese,Italy > Tuscany,"Red, Dry, Full-Bodied, Fruity",Red,Dry,Full-Bodied,Fruity,Fresh
