In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
# fetch data & store it

page_count = 1

while True:
    URL = f"https://quotes.toscrape.com/page/{page_count}/"
    res = requests.get(URL)
    
    soup = BeautifulSoup(res.text, "lxml")
    quotes = soup.select("div.quote")

    if not quotes:
        print("no valid pages anymore...")
        break
    with open(f"scraped_data/page{page_count}.html", "w", encoding="utf-8") as f:
        f.write(res.text)
        print(f"downloaded data from page {page_count}")

    page_count += 1

downloaded data from page 1
downloaded data from page 2
downloaded data from page 3
downloaded data from page 4
downloaded data from page 5
downloaded data from page 6
downloaded data from page 7
downloaded data from page 8
downloaded data from page 9
downloaded data from page 10
no valid pages anymore...


In [3]:
# Love quotes data fetch

page_count = 1

while True:
    URL = f"https://quotes.toscrape.com/tag/love/page/{page_count}/"
    res = requests.get(URL)
    
    soup = BeautifulSoup(res.text, "lxml")
    quotes = soup.select("div.quote")

    if not quotes:
        print("no valid pages anymore...")
        break
    with open(f"love_quotes_scraped_data/page{page_count}.html", "w", encoding="utf-8") as f:
        f.write(res.text)
        print(f"downloaded data from page {page_count}")

    page_count += 1

downloaded data from page 1
downloaded data from page 2
no valid pages anymore...


In [4]:
# Extract useful info - Single Page

with open("scraped_data/page1.html", "r", encoding="utf-8") as f:
    html_content = f.read()

soup = BeautifulSoup(html_content, "lxml")
    

In [5]:
all_quotes = soup.select("div.quote")
life_quotes = []

for q in all_quotes:
    all_tags = []
    
    for tag in q.select(".tags .tag"):
        all_tags.append(tag.get_text())

    if "life" in all_tags:
        text = q.select_one("span.text").get_text()
        author = q.select_one("small.author").get_text()
        life_quotes.append([text, author])

print(life_quotes)

[['“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”', 'Albert Einstein'], ['“It is better to be hated for what you are than to be loved for what you are not.”', 'André Gide']]


In [6]:
import pandas as pd

df = pd.DataFrame(life_quotes, columns=["Quotes","Author"])
df

Unnamed: 0,Quotes,Author
0,“There are only two ways to live your life. On...,Albert Einstein
1,“It is better to be hated for what you are tha...,André Gide


In [9]:
# Extract useful info - Multiple Pages
import os
from bs4 import BeautifulSoup

all_life_quotes = []
page_count = 1


while True:
    file_path = f"scraped_data/page{page_count}.html"

    if not os.path.exists(file_path):
        break
    
    with open(file_path, "r", encoding="utf-8") as f:
        html_content = f.read()
        soup = BeautifulSoup(html_content, "lxml")

        all_quotes = soup.select("div.quote")
        for q in all_quotes:
            all_tags = []
            
            for tag in q.select(".tags .tag"):
                all_tags.append(tag.get_text())
        
            if "life" in all_tags:
                text = q.select_one("span.text").get_text()
                author = q.select_one("small.author").get_text()
                all_life_quotes.append([text, author])

    page_count += 1
print(all_life_quotes)
    

[['“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”', 'Albert Einstein'], ['“It is better to be hated for what you are than to be loved for what you are not.”', 'André Gide'], ["“This life is what you make it. No matter what, you're going to mess up sometimes, it's a universal truth. But the good part is you get to decide how you're going to mess it up. Girls will be your friends - they'll act like it anyway. But just remember, some come, some go. The ones that stay with you through everything - they're your true best friends. Don't let go of them. Also remember, sisters make the best friends in the world. As for lovers, well, they'll come and go too. And baby, I hate to say it, most of them - actually pretty much all of them are going to break your heart, but you can't give up because if you give up, you'll never find your soulmate. You'll never find that half who makes you whole and that goes for every

In [10]:
pd.set_option('display.max_colwidth', None)

df = pd.DataFrame(all_life_quotes, columns=["Quotes","Author"])
df

Unnamed: 0,Quotes,Author
0,“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”,Albert Einstein
1,“It is better to be hated for what you are than to be loved for what you are not.”,André Gide
2,"“This life is what you make it. No matter what, you're going to mess up sometimes, it's a universal truth. But the good part is you get to decide how you're going to mess it up. Girls will be your friends - they'll act like it anyway. But just remember, some come, some go. The ones that stay with you through everything - they're your true best friends. Don't let go of them. Also remember, sisters make the best friends in the world. As for lovers, well, they'll come and go too. And baby, I hate to say it, most of them - actually pretty much all of them are going to break your heart, but you can't give up because if you give up, you'll never find your soulmate. You'll never find that half who makes you whole and that goes for everything. Just because you fail once, doesn't mean you're gonna fail at everything. Keep trying, hold on, and always, always, always believe in yourself, because if you don't, then who will, sweetie? So keep your head high, keep your chin up, and most importantly, keep smiling, because life's a beautiful thing and there's so much to smile about.”",Marilyn Monroe
3,"“I may not have gone where I intended to go, but I think I have ended up where I needed to be.”",Douglas Adams
4,"“Good friends, good books, and a sleepy conscience: this is the ideal life.”",Mark Twain
5,“Life is what happens to us while we are making other plans.”,Allen Saunders
6,"“Today you are You, that is truer than true. There is no one alive who is Youer than You.”",Dr. Seuss
7,"“Life is like riding a bicycle. To keep your balance, you must keep moving.”",Albert Einstein
8,“Life isn't about finding yourself. Life is about creating yourself.”,George Bernard Shaw
9,“Finish each day and be done with it. You have done what you could. Some blunders and absurdities no doubt crept in; forget them as soon as you can. Tomorrow is a new day. You shall begin it serenely and with too high a spirit to be encumbered with your old nonsense.”,Ralph Waldo Emerson
