In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Step 1: Target URL
url = "https://quotes.toscrape.com/"

# Step 2: Send GET request to fetch HTML
response = requests.get(url)
print("Status Code:", response.status_code)  # 200 means success

# Step 3: Parse HTML with BeautifulSoup
soup = BeautifulSoup(response.text, "lxml")

# Step 4: Extract all quotes and authors
quotes_data = []
quotes = soup.find_all("div", class_="quote")

for quote in quotes:
    text = quote.find("span", class_="text").get_text(strip=True)
    author = quote.find("small", class_="author").get_text(strip=True)
    quotes_data.append({"Quote": text, "Author": author})

# Step 5: Save to CSV
df = pd.DataFrame(quotes_data)
df.to_csv("quotes.csv", index=False, encoding="utf-8")

print("Scraping Completed! Quotes saved to quotes.csv")


Status Code: 200
Scraping Completed! Quotes saved to quotes.csv


In [2]:
df

Unnamed: 0,Quote,Author
0,“The world as we have created it is a process ...,Albert Einstein
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling
2,“There are only two ways to live your life. On...,Albert Einstein
3,"“The person, be it gentleman or lady, who has ...",Jane Austen
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe
5,“Try not to become a man of success. Rather be...,Albert Einstein
6,“It is better to be hated for what you are tha...,André Gide
7,"“I have not failed. I've just found 10,000 way...",Thomas A. Edison
8,“A woman is like a tea bag; you never know how...,Eleanor Roosevelt
9,"“A day without sunshine is like, you know, nig...",Steve Martin


In [3]:
df.columns

Index(['Quote', 'Author'], dtype='object')

In [4]:
df.head()

Unnamed: 0,Quote,Author
0,“The world as we have created it is a process ...,Albert Einstein
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling
2,“There are only two ways to live your life. On...,Albert Einstein
3,"“The person, be it gentleman or lady, who has ...",Jane Austen
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe


optimised code to fetch data from all the pages 

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

all_quotes = []
base_url = "https://quotes.toscrape.com/page/{}/"

# Loop through 10 pages
for page in range(1, 11):
    print(page)
    url = base_url.format(page)
    print(url)
    response = requests.get(url)
    
    if response.status_code != 200:
        print(f"Page {page} not found. Skipping...")
        continue

    soup = BeautifulSoup(response.text, "lxml")
    quotes = soup.find_all("div", class_="quote")

    # If no quotes found, break the loop
    if not quotes:
        break

    for quote in quotes:
        text = quote.find("span", class_="text").get_text(strip=True)
        author = quote.find("small", class_="author").get_text(strip=True)
        all_quotes.append({"Quote": text, "Author": author})

    print(f"Scraped page {page} successfully")

# Save all quotes to CSV
df = pd.DataFrame(all_quotes)
df.to_csv("all_quotes.csv", index=False, encoding="utf-8")
print("Scraping Completed! Total quotes scraped:", len(df))


1
https://quotes.toscrape.com/page/1/
Scraped page 1 successfully
2
https://quotes.toscrape.com/page/2/
Scraped page 2 successfully
3
https://quotes.toscrape.com/page/3/
Scraped page 3 successfully
4
https://quotes.toscrape.com/page/4/
Scraped page 4 successfully
5
https://quotes.toscrape.com/page/5/
Scraped page 5 successfully
6
https://quotes.toscrape.com/page/6/
Scraped page 6 successfully
7
https://quotes.toscrape.com/page/7/
Scraped page 7 successfully
8
https://quotes.toscrape.com/page/8/
Scraped page 8 successfully
9
https://quotes.toscrape.com/page/9/
Scraped page 9 successfully
10
https://quotes.toscrape.com/page/10/
Scraped page 10 successfully
Scraping Completed! Total quotes scraped: 100


In [2]:
df

Unnamed: 0,Quote,Author
0,“The world as we have created it is a process ...,Albert Einstein
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling
2,“There are only two ways to live your life. On...,Albert Einstein
3,"“The person, be it gentleman or lady, who has ...",Jane Austen
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe
...,...,...
95,“You never really understand a person until yo...,Harper Lee
96,“You have to write the book that wants to be w...,Madeleine L'Engle
97,“Never tell the truth to people who are not wo...,Mark Twain
98,"“A person's a person, no matter how small.”",Dr. Seuss


In [3]:
quotes = soup.find_all("div", class_="quote")

In [6]:
# print(quotes, type(quotes))
for i in quotes:
    print(i)
    print(type(i), "type")
    break

<div class="quote" itemscope="" itemtype="http://schema.org/CreativeWork">
<span class="text" itemprop="text">“The truth." Dumbledore sighed. "It is a beautiful and terrible thing, and should therefore be treated with great caution.”</span>
<span>by <small class="author" itemprop="author">J.K. Rowling</small>
<a href="/author/J-K-Rowling">(about)</a>
</span>
<div class="tags">
            Tags:
            <meta class="keywords" content="truth" itemprop="keywords"/>
<a class="tag" href="/tag/truth/page/1/">truth</a>
</div>
</div>
<class 'bs4.element.Tag'> type
