# Web Scraping Examples

In [2]:
# import needed libraries
import requests
from bs4 import BeautifulSoup
import csv

In [3]:
# defyining url for HTTP request
url = 'http://quotes.toscrape.com/'
response = requests.get(url)

In [4]:
# analyzing html 
soup = BeautifulSoup(response.text, 'html.parser')

In [5]:
# displaying tree structure
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Quotes to Scrape
  </title>
  <link href="/static/bootstrap.min.css" rel="stylesheet"/>
  <link href="/static/main.css" rel="stylesheet"/>
 </head>
 <body>
  <div class="container">
   <div class="row header-box">
    <div class="col-md-8">
     <h1>
      <a href="/" style="text-decoration: none">
       Quotes to Scrape
      </a>
     </h1>
    </div>
    <div class="col-md-4">
     <p>
      <a href="/login">
       Login
      </a>
     </p>
    </div>
   </div>
   <div class="row">
    <div class="col-md-8">
     <div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
      <span class="text" itemprop="text">
       “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”
      </span>
      <span>
       by
       <small class="author" itemprop="author">
        Albert Einstein
       </small>
       <a href="/author/Albert

In [6]:
# displaying structure of the first div with class quote
first_quote = soup.find('div', class_='quote')
print(first_quote.prettify())

<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
 <span class="text" itemprop="text">
  “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”
 </span>
 <span>
  by
  <small class="author" itemprop="author">
   Albert Einstein
  </small>
  <a href="/author/Albert-Einstein">
   (about)
  </a>
 </span>
 <div class="tags">
  Tags:
  <meta class="keywords" content="change,deep-thoughts,thinking,world" itemprop="keywords"/>
  <a class="tag" href="/tag/change/page/1/">
   change
  </a>
  <a class="tag" href="/tag/deep-thoughts/page/1/">
   deep-thoughts
  </a>
  <a class="tag" href="/tag/thinking/page/1/">
   thinking
  </a>
  <a class="tag" href="/tag/world/page/1/">
   world
  </a>
 </div>
</div>



In [7]:
# extracting data
quotes = soup.find_all('div', class_='quote')

In [8]:
# displaying data
for quote in quotes:
    text = quote.find('span', class_='text').text
    author = quote.find('small', class_='author').text
    print(f'{text} - {author}')

“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.” - Albert Einstein
“It is our choices, Harry, that show what we truly are, far more than our abilities.” - J.K. Rowling
“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.” - Albert Einstein
“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.” - Jane Austen
“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.” - Marilyn Monroe
“Try not to become a man of success. Rather become a man of value.” - Albert Einstein
“It is better to be hated for what you are than to be loved for what you are not.” - André Gide
“I have not failed. I've just found 10,000 ways that won't work.” - Thomas A. Edison
“A woman is like a tea bag; you never know how strong it is until it's in hot water.” - Eleanor Roos

In [9]:
# add data to display (tags) and refine output
for quote in quotes:
    # extracting quote text
    text = quote.find('span', class_='text').text
    # extracting author
    author = quote.find('small', class_='author').text
    # extracting related tags
    tags = [tag.text for tag in quote.find_all('a', class_='tag')]
    # displaying quote, author and related tags
    print(f'Quote: {text}\nAuthor: {author}\nTags: {", ".join(tags)}\n{"-"*50}')

Quote: “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”
Author: Albert Einstein
Tags: change, deep-thoughts, thinking, world
--------------------------------------------------
Quote: “It is our choices, Harry, that show what we truly are, far more than our abilities.”
Author: J.K. Rowling
Tags: abilities, choices
--------------------------------------------------
Quote: “There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”
Author: Albert Einstein
Tags: inspirational, life, live, miracle, miracles
--------------------------------------------------
Quote: “The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”
Author: Jane Austen
Tags: aliteracy, books, classic, humor
--------------------------------------------------
Quote: “Imperfection is beauty, madness is genius and it's better to be absolute

In [10]:
# ALTERNATIVE VERSION # (saving data extracted as csv)

# display data and save all as csv file

# Create CSV file to write on
with open('quotes.csv', 'w', newline='', encoding='utf-8') as csvfile:
    # define writer object
    csvwriter = csv.writer(csvfile)
    # header
    csvwriter.writerow(['Quote', 'Author', 'Tags'])
    
    # extract quotes
    quotes = soup.find_all('div', class_='quote')
    
    for quote in quotes:
        # extraction text from quote
        text = quote.find('span', class_='text').text
        # author
        author = quote.find('small', class_='author').text
        # related tags
        tags = ', '.join([tag.text for tag in quote.find_all('a', class_='tag')])
        
        # display data
        print(f'QUOTE: {text}\nAUTHOR: {author}\nTAGS: {tags}\n{"-"*50}')
        
        # write data on csv file
        csvwriter.writerow([text, author, tags])

QUOTE: “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”
AUTHOR: Albert Einstein
TAGS: change, deep-thoughts, thinking, world
--------------------------------------------------
QUOTE: “It is our choices, Harry, that show what we truly are, far more than our abilities.”
AUTHOR: J.K. Rowling
TAGS: abilities, choices
--------------------------------------------------
QUOTE: “There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”
AUTHOR: Albert Einstein
TAGS: inspirational, life, live, miracle, miracles
--------------------------------------------------
QUOTE: “The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”
AUTHOR: Jane Austen
TAGS: aliteracy, books, classic, humor
--------------------------------------------------
QUOTE: “Imperfection is beauty, madness is genius and it's better to be absolute