In [1]:
from bs4 import BeautifulSoup

In [2]:
html = """
<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Document</title>
</head>

<body>
    <h1>pandas</h1>
    <blockquote>pandas is a powerful Python data analysis library. </blockquote>

    <div id="start" class="section">
        <h3 class="section-header">Getting Started</h3>
        <ul class="pandas-ul">
            <a href="https://pandas.pydata.org/getting_started.html">
                <li>Install pandas</li>
            </a>
            <a href="https://pandas.pydata.org/docs/getting_started/index.html">
                <li>Getting started</li>
            </a>
        </ul>
    </div>

    <div id="documentation" class="section">
        <h3 class="section-header">Documentation</h3>
        <ul class="pandas-ul">
            <a href="https://pandas.pydata.org/docs/user_guide/index.html">
                <li>User guide</li>
            </a>
            <a href="https://pandas.pydata.org/docs/reference/index.html">
                <li>API reference</li>
            </a>
            <a href="https://pandas.pydata.org/docs/development/index.html">
                <li>How to contribute to pandas</li>
            </a>
        </ul>
    </div>

    <div id="community" class="section">
        <h3 class="section-header">The pandas Community</h3>
        <ul class="pandas-ul">
            <a href="https://pandas.pydata.org/about/index.html">
                <li>More about pandas</li>
            </a>
            <a href="https://stackoverflow.com/questions/tagged/pandas">
                <li>Have questions?</li>
            </a>
            <a href="https://pandas.pydata.org/community/ecosystem.html">
                <li>The pandas ecosystem</li>
            </a>
        </ul>
    </div>
</body>

</html>
"""

In [3]:
soup = BeautifulSoup(html, 'html.parser')

In [4]:
all_titles = soup.find_all('h3')
all_titles

[<h3 class="section-header">Getting Started</h3>,
 <h3 class="section-header">Documentation</h3>,
 <h3 class="section-header">The pandas Community</h3>]

In [5]:
titles = [title.text for title in all_titles]
titles

['Getting Started', 'Documentation', 'The pandas Community']

In [6]:
# Start section
start_section = soup.find('div', id="start")
start_section

<div class="section" id="start">
<h3 class="section-header">Getting Started</h3>
<ul class="pandas-ul">
<a href="https://pandas.pydata.org/getting_started.html">
<li>Install pandas</li>
</a>
<a href="https://pandas.pydata.org/docs/getting_started/index.html">
<li>Getting started</li>
</a>
</ul>
</div>

In [7]:
start_description = start_section.find('h3').text
start_description

'Getting Started'

In [8]:
start_links = start_section.find_all('a')
start_links

[<a href="https://pandas.pydata.org/getting_started.html">
 <li>Install pandas</li>
 </a>,
 <a href="https://pandas.pydata.org/docs/getting_started/index.html">
 <li>Getting started</li>
 </a>]

In [9]:
start_urls = [link['href'] for link in start_links]
start_urls

['https://pandas.pydata.org/getting_started.html',
 'https://pandas.pydata.org/docs/getting_started/index.html']

In [10]:
# All URLs
all_urls = [link['href'] for link in soup.find_all('a')]
all_urls

['https://pandas.pydata.org/getting_started.html',
 'https://pandas.pydata.org/docs/getting_started/index.html',
 'https://pandas.pydata.org/docs/user_guide/index.html',
 'https://pandas.pydata.org/docs/reference/index.html',
 'https://pandas.pydata.org/docs/development/index.html',
 'https://pandas.pydata.org/about/index.html',
 'https://stackoverflow.com/questions/tagged/pandas',
 'https://pandas.pydata.org/community/ecosystem.html']

In [11]:
# BONUS: Compile a dictionary of titles and URLs
pandas_info_dict = {}

In [12]:
all_divs = soup.find_all('div')
for div in all_divs:
    div_description = div.find('h3').text
    urls = [link['href'] for link in div.find_all('a')]
    pandas_info_dict[div_description] = urls    

In [13]:
pandas_info_dict

{'Getting Started': ['https://pandas.pydata.org/getting_started.html',
  'https://pandas.pydata.org/docs/getting_started/index.html'],
 'Documentation': ['https://pandas.pydata.org/docs/user_guide/index.html',
  'https://pandas.pydata.org/docs/reference/index.html',
  'https://pandas.pydata.org/docs/development/index.html'],
 'The pandas Community': ['https://pandas.pydata.org/about/index.html',
  'https://stackoverflow.com/questions/tagged/pandas',
  'https://pandas.pydata.org/community/ecosystem.html']}

In [14]:
# Using a for-loop
alternate_dict = {}
for div in all_divs:
    div_desc = div.find('h3').text
    div_urls = div.find_all('a')
    url_list = []
    for url in div_urls:
        url_list.append(url['href'])
    alternate_dict[div_desc] = url_list        

In [15]:
alternate_dict

{'Getting Started': ['https://pandas.pydata.org/getting_started.html',
  'https://pandas.pydata.org/docs/getting_started/index.html'],
 'Documentation': ['https://pandas.pydata.org/docs/user_guide/index.html',
  'https://pandas.pydata.org/docs/reference/index.html',
  'https://pandas.pydata.org/docs/development/index.html'],
 'The pandas Community': ['https://pandas.pydata.org/about/index.html',
  'https://stackoverflow.com/questions/tagged/pandas',
  'https://pandas.pydata.org/community/ecosystem.html']}