In [1]:
import pandas as pd
import urllib.request
from urllib.parse import urlparse
from bs4 import BeautifulSoup

In [2]:
def get_page(url):
    """Scrapes a URL and returns the HTML source.

    Args:
        url (string): Fully qualified URL of a page.

    Returns:
        soup (string): HTML source of scraped page.
    """

    response = urllib.request.urlopen(url)
    soup = BeautifulSoup(response, 
                         'html.parser', 
                         from_encoding=response.info().get_param('charset'))

    return soup

In [7]:
soup = get_page('https://usualwines.com/')

In [6]:
def get_description(soup):
    """Return the meta description content

    Args:
        soup: HTML code from Beautiful Soup
        
    Returns: 
        value (string): Parsed value
    """

    if soup.findAll("meta", attrs={"name": "description"}):
        return soup.find("meta", attrs={"name": "description"}).get("content")
    else:
        return

    return

In [8]:
meta = get_description(soup)
meta

'Our California wines are made with minimal intervention and no added sugars, shipped in single serve bottles for a fresh pour every time.'

In [10]:
def get_title(soup):
    """Return the page title

    Args:
        soup: HTML code from Beautiful Soup
        
    Returns: 
        value (string): Parsed value
    """

    if soup.findAll("title"):
        return soup.find("title").string
    else:
        return

In [11]:
title = get_title(soup)
title

'\n  Usual Wines - Wine you can feel good about\n  \n  \n  \n'

In [12]:
df_pages = pd.DataFrame(columns = ['url', 'title', 'description'])

In [None]:
for index, row in df.iterrows(): 

    soup = get_page(row['loc'])
    title = get_title(soup)
    description = get_description(soup)

    page = {
        'url': row['loc'],
        'title': title,
        'description': description
    }

    df_pages = df_pages.append(page, ignore_index=True)