# Import libraries required for web scraping

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Request for fetching data from website 

In [178]:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'}
webpage=requests.get("http://quotes.toscrape.com/page/1/",headers=headers).text

 # Convert into proper Html code Format using lxml from beautiful soup

In [183]:
soup=BeautifulSoup(webpage,'lxml')
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Quotes to Scrape
  </title>
  <link href="/static/bootstrap.min.css" rel="stylesheet"/>
  <link href="/static/main.css" rel="stylesheet"/>
 </head>
 <body>
  <div class="container">
   <div class="row header-box">
    <div class="col-md-8">
     <h1>
      <a href="/" style="text-decoration: none">
       Quotes to Scrape
      </a>
     </h1>
    </div>
    <div class="col-md-4">
     <p>
      <a href="/login">
       Login
      </a>
     </p>
    </div>
   </div>
   <div class="row">
    <div class="col-md-8">
     <div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
      <span class="text" itemprop="text">
       “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”
      </span>
      <span>
       by
       <small class="author" itemprop="author">
        Albert Einstein
       </small>
       <a href="/author/Albert

# Finding h1, p tag from webpage

In [186]:
soup.find_all('h1')[0].text

'\nQuotes to Scrape\n'

In [188]:
soup.find_all('p')


[<p>
 <a href="/login">Login</a>
 </p>,
 <p class="text-muted">
                 Quotes by: <a href="https://www.goodreads.com/quotes">GoodReads.com</a>
 </p>,
 <p class="copyright">
                 Made with <span class="zyte">❤</span> by <a class="zyte" href="https://www.zyte.com">Zyte</a>
 </p>]

# Getting quotes,author,tags From single Webpage

In [191]:
quotes=soup.find_all('div',class_='quote')
quote=[]
author=[]
tags=[]
for i in quotes:
    quote.append(i.find('span',class_="text").text.strip())
    author.append(i.find('small',class_="author").text.strip())
    tags_container = i.find('div', class_="tags")
    all_tags=[tag.text for tag in tags_container.find_all('a', class_='tag')]
    tags.append(all_tags)

In [193]:
d={'Name of Author':author,'Quote':quote ,'Tags':tags}

In [195]:
df=pd.DataFrame(d)

In [197]:
df

Unnamed: 0,Name of Author,Quote,Tags
0,Albert Einstein,“The world as we have created it is a process ...,"[change, deep-thoughts, thinking, world]"
1,J.K. Rowling,"“It is our choices, Harry, that show what we t...","[abilities, choices]"
2,Albert Einstein,“There are only two ways to live your life. On...,"[inspirational, life, live, miracle, miracles]"
3,Jane Austen,"“The person, be it gentleman or lady, who has ...","[aliteracy, books, classic, humor]"
4,Marilyn Monroe,"“Imperfection is beauty, madness is genius and...","[be-yourself, inspirational]"
5,Albert Einstein,“Try not to become a man of success. Rather be...,"[adulthood, success, value]"
6,André Gide,“It is better to be hated for what you are tha...,"[life, love]"
7,Thomas A. Edison,"“I have not failed. I've just found 10,000 way...","[edison, failure, inspirational, paraphrased]"
8,Eleanor Roosevelt,“A woman is like a tea bag; you never know how...,[misattributed-eleanor-roosevelt]
9,Steve Martin,"“A day without sunshine is like, you know, nig...","[humor, obvious, simile]"


In [199]:
df.shape


(10, 3)

# Getting all data from the website using loop (10 webpages)

In [202]:
final=pd.DataFrame()
for j in range(1,11):
    url="http://quotes.toscrape.com/page/{}".format(j)
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'}
    webpage=requests.get(url,headers=headers).text
    
    soup=BeautifulSoup(webpage,'lxml')
    quotes=soup.find_all('div',class_='quote')
    quote=[]
    author=[]
    tags=[]
    for i in quotes:
        quote.append(i.find('span',class_="text").text.strip())
        author.append(i.find('small',class_="author").text.strip())
        tags_container = i.find('div', class_="tags")
        all_tags=[tag.text for tag in tags_container.find_all('a', class_='tag')]
        tags.append(all_tags)
        
    d={'Name of Author':author,'Quote':quote ,'Tags':tags}
    df=pd.DataFrame(d)
    final = pd.concat([final, df], ignore_index=True) 

In [203]:
final.shape

(100, 3)

In [204]:
final

Unnamed: 0,Name of Author,Quote,Tags
0,Albert Einstein,“The world as we have created it is a process ...,"[change, deep-thoughts, thinking, world]"
1,J.K. Rowling,"“It is our choices, Harry, that show what we t...","[abilities, choices]"
2,Albert Einstein,“There are only two ways to live your life. On...,"[inspirational, life, live, miracle, miracles]"
3,Jane Austen,"“The person, be it gentleman or lady, who has ...","[aliteracy, books, classic, humor]"
4,Marilyn Monroe,"“Imperfection is beauty, madness is genius and...","[be-yourself, inspirational]"
...,...,...,...
95,Harper Lee,“You never really understand a person until yo...,[better-life-empathy]
96,Madeleine L'Engle,“You have to write the book that wants to be w...,"[books, children, difficult, grown-ups, write,..."
97,Mark Twain,“Never tell the truth to people who are not wo...,[truth]
98,Dr. Seuss,"“A person's a person, no matter how small.”",[inspirational]


# converting into csv file

In [206]:
final.to_csv("quotes.csv", index=False, encoding="utf-8")