### Imports

In [1]:
from bs4 import BeautifulSoup
import requests

### HTTP Request

In [2]:
url = 'http://automationpractice.com/index.php?id_category=3&controller=category'

In [3]:
response = requests.get(url)

In [4]:
response.status_code

200

### Soup Object

In [5]:
soup = BeautifulSoup(response.content, 'html.parser')

### Important Functions

#### find() 

returns first element

In [10]:
# FIND BY ID

In [9]:
contact = soup.find(id = 'contact-link').get_text().replace('\n','') # replace() or strip()
contact

'Contact us'

In [13]:
contact = soup.find(id = 'contact-link').find('a')
contact

<a href="http://automationpractice.com/index.php?controller=contact" title="Contact Us">Contact us</a>

In [None]:
# FIND BY CLASS

In [15]:
result = soup.find(class_ = 'ajax_block_product').find(class_ = 'content_price')
result

<div class="content_price" itemprop="offers" itemscope="" itemtype="http://schema.org/Offer">
<span class="price product-price" itemprop="price">
										$16.51									</span>
<meta content="USD" itemprop="priceCurrency"/>
</div>

#### find_all() & findAll()

In [23]:
result = soup.find_all(class_ = 'ajax_block_product')
len(result)

7

returns a list

#### select_one()

select by css selectors - returns first element - equivalent to "find()"

In [28]:
soup.select_one('.shop-phone').get_text().strip() # . for class

'Call us now: 0123-456-789'

In [30]:
soup.select_one('#contact-link').get_text().strip() # # for id

'Contact us'

#### select()

select by css selectors - returns a list - equivalent to "find_all()/findAll()"

In [35]:
result = soup.select('.ajax_block_product')

In [34]:
len(result)

7

#### get value of attributes vs. get text

In [36]:
soup.find(itemprop="name")

<h5 itemprop="name">
<a class="product-name" href="http://automationpractice.com/index.php?id_product=1&amp;controller=product" itemprop="url" title="Faded Short Sleeve T-shirts">
							Faded Short Sleeve T-shirts
						</a>
</h5>

In [39]:
soup.find(itemprop="name").find('a').get('title')

'Faded Short Sleeve T-shirts'

In [40]:
soup.find(itemprop="name").find('a').get_text().strip()

'Faded Short Sleeve T-shirts'

### Siblings and Parents

In [43]:
# find product1's siblings
soup.select_one('.ajax_block_product').find_next_sibling().select_one('.product-name').get('title').strip()

'Blouse'

In [45]:
# go to last product
soup.select('.ajax_block_product')[6].find_previous_sibling().select_one('.product-name').get('title').strip()

'Printed Summer Dress'

In [48]:
soup.select('.ajax_block_product')[6].find_parent()

<ul class="product_list grid row">
<li class="ajax_block_product col-xs-12 col-sm-6 col-md-4 first-in-line first-item-of-tablet-line first-item-of-mobile-line">
<div class="product-container" itemscope="" itemtype="http://schema.org/Product">
<div class="left-block">
<div class="product-image-container">
<a class="product_img_link" href="http://automationpractice.com/index.php?id_product=1&amp;controller=product" itemprop="url" title="Faded Short Sleeve T-shirts">
<img alt="Faded Short Sleeve T-shirts" class="replace-2x img-responsive" height="250" itemprop="image" src="http://automationpractice.com/img/p/1/1-home_default.jpg" title="Faded Short Sleeve T-shirts" width="250"/>
</a>
<div class="quick-view-wrapper-mobile">
<a class="quick-view-mobile" href="http://automationpractice.com/index.php?id_product=1&amp;controller=product" rel="http://automationpractice.com/index.php?id_product=1&amp;controller=product">
<i class="icon-eye-open"></i>
</a>
</div>
<a class="quick-view" href="http:

### Extract Links

#### Single Link

In [60]:
soup.find(id = 'contact-link').find('a').get('href')

'http://automationpractice.com/index.php?controller=contact'

In [61]:
soup.find(id = 'contact-link').find('a')['href']

'http://automationpractice.com/index.php?controller=contact'

#### multipe links

In [57]:
link = soup.find_all(itemprop = 'name')
len(link)

7

In [58]:
links = []
for i in link:
    links.append(i.find('a')['href'])

In [59]:
links

['http://automationpractice.com/index.php?id_product=1&controller=product',
 'http://automationpractice.com/index.php?id_product=2&controller=product',
 'http://automationpractice.com/index.php?id_product=3&controller=product',
 'http://automationpractice.com/index.php?id_product=4&controller=product',
 'http://automationpractice.com/index.php?id_product=5&controller=product',
 'http://automationpractice.com/index.php?id_product=6&controller=product',
 'http://automationpractice.com/index.php?id_product=7&controller=product']

### Find Elements - Alternative Syntax

#### Alternative 1

In [62]:
soup.find(id = 'contact-link').get_text().strip()

'Contact us'

#### Alternative 2

In [66]:
soup.find('div',{'id':'contact-link'}).get_text().strip()

'Contact us'

#### Alternative 1

In [65]:
sub = soup.find(class_ = 'subcategory-name').get_text()
sub

'Tops'

#### Alternative 2

In [67]:
sub = soup.find('a',{'class':'subcategory-name'}).get_text()
sub

'Tops'