In [1]:
import pandas as pandas
from bs4 import BeautifulSoup
import requests


#### EXTRACTION PROCESS BY WEBSCRAPPING

* Scrape all quotes across all pages with pagination
* Use requests and BeautifulSoup to extract: Quote text, Author name, Tags as a list or comma-separated string.

In [2]:
url = "https://quotes.toscrape.com/"
response = requests.get(url)

In [3]:
# html
soup = BeautifulSoup(response.text, "html.parser")

print(soup.title.string) 

Quotes to Scrape


In [4]:
# Extracting quote block container
quote_blocks = soup.find_all("div", class_="quote")

# Print how many quotes were found on the first page
print(f"Quotes found on page 1: {len(quote_blocks)}")

Quotes found on page 1: 10


In [5]:
# Extracting quotes from first page
page_quotes = []

for i, quote in enumerate(quote_blocks, start=1):
    text = quote.find("span", class_="text").get_text(strip=True)
    author = quote.find("small", class_="author").get_text(strip=True)
    tags = [tag.get_text(strip=True) for tag in quote.find_all("a", class_="tag")]

    print(f"Quote {i}")
    print(f"Text: {text}")
    print(f"Author: {author}")
    print(f"Tags: {tags}")
    print("------")

    page_quotes.append({
        "text": text,
        "author": author,
        "tags": tags
    })


Quote 1
Text: “The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”
Author: Albert Einstein
Tags: ['change', 'deep-thoughts', 'thinking', 'world']
------
Quote 2
Text: “It is our choices, Harry, that show what we truly are, far more than our abilities.”
Author: J.K. Rowling
Tags: ['abilities', 'choices']
------
Quote 3
Text: “There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”
Author: Albert Einstein
Tags: ['inspirational', 'life', 'live', 'miracle', 'miracles']
------
Quote 4
Text: “The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”
Author: Jane Austen
Tags: ['aliteracy', 'books', 'classic', 'humor']
------
Quote 5
Text: “Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”
Author: Marilyn Monroe
Tags: ['be-yourself', 'inspirational']
--

In [6]:
# Get the URL for the next page if it exists after the second page
next_btn = soup.find("li", class_="next")

if next_btn:
    next_page = next_btn.find("a")["href"]
    print(f"Next page path: {next_page}")
else:
    next_page = None
    print("No more pages.")

Next page path: /page/2/


In [7]:

# Loading the second page using the path just extracted
response = requests.get(url + next_page)
soup = BeautifulSoup(response.text, "html.parser")

In [8]:
# Extract quote blocks from page 2
quote_blocks = soup.find_all("div", class_="quote")

print(f"Quotes found on page 2: {len(quote_blocks)}")

Quotes found on page 2: 10


In [9]:
# Extracting quotes from second page
page_quotes2 = []

for i, quote in enumerate(quote_blocks, start=1):
    text = quote.find("span", class_="text").get_text(strip=True)
    author = quote.find("small", class_="author").get_text(strip=True)
    tags = [tag.get_text(strip=True) for tag in quote.find_all("a", class_="tag")]

    print(f"Quote {i}")
    print(f"Text: {text}")
    print(f"Author: {author}")
    print(f"Tags: {tags}")
    print("------")

    page_quotes2.append({
        "text": text,
        "author": author,
        "tags": tags
    })

Quote 1
Text: “This life is what you make it. No matter what, you're going to mess up sometimes, it's a universal truth. But the good part is you get to decide how you're going to mess it up. Girls will be your friends - they'll act like it anyway. But just remember, some come, some go. The ones that stay with you through everything - they're your true best friends. Don't let go of them. Also remember, sisters make the best friends in the world. As for lovers, well, they'll come and go too. And baby, I hate to say it, most of them - actually pretty much all of them are going to break your heart, but you can't give up because if you give up, you'll never find your soulmate. You'll never find that half who makes you whole and that goes for everything. Just because you fail once, doesn't mean you're gonna fail at everything. Keep trying, hold on, and always, always, always believe in yourself, because if you don't, then who will, sweetie? So keep your head high, keep your chin up, and mos

In [10]:
# Get the URL for the next page if it exists after the third page
next_btn = soup.find("li", class_="next")

if next_btn:
    next_page = next_btn.find("a")["href"]
    print(f"Next page path: {next_page}")
else:
    next_page = None
    print("No more pages.")

Next page path: /page/3/


In [11]:
# Loading the second page using the path just extracted
response = requests.get(url + next_page)
soup = BeautifulSoup(response.text, "html.parser")

In [12]:
# Extract quote blocks from page 3
quote_blocks = soup.find_all("div", class_="quote")

print(f"Quotes found on page 3: {len(quote_blocks)}")

Quotes found on page 3: 10


In [13]:
page_quotes3 = []

for i, quote in enumerate(quote_blocks, start=1):
    text = quote.find("span", class_="text").get_text(strip=True)
    author = quote.find("small", class_="author").get_text(strip=True)
    tags = [tag.get_text(strip=True) for tag in quote.find_all("a", class_="tag")]

    print(f"Quote {i}")
    print(f"Text: {text}")
    print(f"Author: {author}")
    print(f"Tags: {tags}")
    print("------")

    page_quotes3.append({
        "text": text,
        "author": author,
        "tags": tags
    })


Quote 1
Text: “I love you without knowing how, or when, or from where. I love you simply, without problems or pride: I love you in this way because I do not know any other way of loving but this, in which there is no I or you, so intimate that your hand upon my chest is my hand, so intimate that when I fall asleep your eyes close.”
Author: Pablo Neruda
Tags: ['love', 'poetry']
------
Quote 2
Text: “For every minute you are angry you lose sixty seconds of happiness.”
Author: Ralph Waldo Emerson
Tags: ['happiness']
------
Quote 3
Text: “If you judge people, you have no time to love them.”
Author: Mother Teresa
Tags: ['attributed-no-source']
------
Quote 4
Text: “Anyone who thinks sitting in church can make you a Christian must also think that sitting in a garage can make you a car.”
Author: Garrison Keillor
Tags: ['humor', 'religion']
------
Quote 5
Text: “Beauty is in the eye of the beholder and it may be necessary from time to time to give a stupid or misinformed beholder a black eye.”

In [14]:
# Get the URL for the next page if it exists after the fourth page
next_btn = soup.find("li", class_="next")

if next_btn:
    next_page = next_btn.find("a")["href"]
    print(f"Next page path: {next_page}")
else:
    next_page = None
    print("No more pages.")

Next page path: /page/4/


In [15]:
# Loading the second page using the path just extracted
response = requests.get(url + next_page)
soup = BeautifulSoup(response.text, "html.parser")

In [16]:
# Extract quote blocks from page 4
quote_blocks = soup.find_all("div", class_="quote")

print(f"Quotes found on page 4: {len(quote_blocks)}")

Quotes found on page 4: 10


In [17]:
page_quotes4 = []

for i, quote in enumerate(quote_blocks, start=1):
    text = quote.find("span", class_="text").get_text(strip=True)
    author = quote.find("small", class_="author").get_text(strip=True)
    tags = [tag.get_text(strip=True) for tag in quote.find_all("a", class_="tag")]

    print(f"Quote {i}")
    print(f"Text: {text}")
    print(f"Author: {author}")
    print(f"Tags: {tags}")
    print("------")

    page_quotes4.append({
        "text": text,
        "author": author,
        "tags": tags
    })

Quote 1
Text: “The more that you read, the more things you will know. The more that you learn, the more places you'll go.”
Author: Dr. Seuss
Tags: ['learning', 'reading', 'seuss']
------
Quote 2
Text: “Of course it is happening inside your head, Harry, but why on earth should that mean that it is not real?”
Author: J.K. Rowling
Tags: ['dumbledore']
------
Quote 3
Text: “The truth is, everyone is going to hurt you. You just got to find the ones worth suffering for.”
Author: Bob Marley
Tags: ['friendship']
------
Quote 4
Text: “Not all of us can do great things. But we can do small things with great love.”
Author: Mother Teresa
Tags: ['misattributed-to-mother-teresa', 'paraphrased']
------
Quote 5
Text: “To the well-organized mind, death is but the next great adventure.”
Author: J.K. Rowling
Tags: ['death', 'inspirational']
------
Quote 6
Text: “All you need is love. But a little chocolate now and then doesn't hurt.”
Author: Charles M. Schulz
Tags: ['chocolate', 'food', 'humor']
------
Q

In [18]:
# Get the URL for the next page if it exists after the fifth page
next_btn = soup.find("li", class_="next")

if next_btn:
    next_page = next_btn.find("a")["href"]
    print(f"Next page path: {next_page}")
else:
    next_page = None
    print("No more pages.")

Next page path: /page/5/


In [19]:
# Loading the second page using the path just extracted
response = requests.get(url + next_page)
soup = BeautifulSoup(response.text, "html.parser")

In [20]:
# Extract quote blocks from page 5
quote_blocks = soup.find_all("div", class_="quote")

print(f"Quotes found on page 5: {len(quote_blocks)}")

Quotes found on page 5: 10


In [21]:
page_quotes5 = []

for i, quote in enumerate(quote_blocks, start=1):
    text = quote.find("span", class_="text").get_text(strip=True)
    author = quote.find("small", class_="author").get_text(strip=True)
    tags = [tag.get_text(strip=True) for tag in quote.find_all("a", class_="tag")]

    print(f"Quote {i}")
    print(f"Text: {text}")
    print(f"Author: {author}")
    print(f"Tags: {tags}")
    print("------")

    page_quotes5.append({
        "text": text,
        "author": author,
        "tags": tags
    })

Quote 1
Text: “A reader lives a thousand lives before he dies, said Jojen. The man who never reads lives only one.”
Author: George R.R. Martin
Tags: ['read', 'readers', 'reading', 'reading-books']
------
Quote 2
Text: “You can never get a cup of tea large enough or a book long enough to suit me.”
Author: C.S. Lewis
Tags: ['books', 'inspirational', 'reading', 'tea']
------
Quote 3
Text: “You believe lies so you eventually learn to trust no one but yourself.”
Author: Marilyn Monroe
Tags: []
------
Quote 4
Text: “If you can make a woman laugh, you can make her do anything.”
Author: Marilyn Monroe
Tags: ['girls', 'love']
------
Quote 5
Text: “Life is like riding a bicycle. To keep your balance, you must keep moving.”
Author: Albert Einstein
Tags: ['life', 'simile']
------
Quote 6
Text: “The real lover is the man who can thrill you by kissing your forehead or smiling into your eyes or just staring into space.”
Author: Marilyn Monroe
Tags: ['love']
------
Quote 7
Text: “A wise girl kisses bu

In [22]:
# Get the URL for the next page if it exists after the fifth page
next_btn = soup.find("li", class_="next")

if next_btn:
    next_page = next_btn.find("a")["href"]
    print(f"Next page path: {next_page}")
else:
    next_page = None
    print("No more pages.")

Next page path: /page/6/


In [23]:
# Loading the second page using the path just extracted
response = requests.get(url + next_page)
soup = BeautifulSoup(response.text, "html.parser")

In [24]:
# Extract quote blocks from page 6
quote_blocks = soup.find_all("div", class_="quote")

print(f"Quotes found on page 6: {len(quote_blocks)}")

Quotes found on page 6: 10


In [25]:
page_quotes6 = []

for i, quote in enumerate(quote_blocks, start=1):
    text = quote.find("span", class_="text").get_text(strip=True)
    author = quote.find("small", class_="author").get_text(strip=True)
    tags = [tag.get_text(strip=True) for tag in quote.find_all("a", class_="tag")]

    print(f"Quote {i}")
    print(f"Text: {text}")
    print(f"Author: {author}")
    print(f"Tags: {tags}")
    print("------")

    page_quotes6.append({
        "text": text,
        "author": author,
        "tags": tags
    })

Quote 1
Text: “There is nothing I would not do for those who are really my friends. I have no notion of loving people by halves, it is not my nature.”
Author: Jane Austen
Tags: ['friendship', 'love']
------
Quote 2
Text: “Do one thing every day that scares you.”
Author: Eleanor Roosevelt
Tags: ['attributed', 'fear', 'inspiration']
------
Quote 3
Text: “I am good, but not an angel. I do sin, but I am not the devil. I am just a small girl in a big world trying to find someone to love.”
Author: Marilyn Monroe
Tags: ['attributed-no-source']
------
Quote 4
Text: “If I were not a physicist, I would probably be a musician. I often think in music. I live my daydreams in music. I see my life in terms of music.”
Author: Albert Einstein
Tags: ['music']
------
Quote 5
Text: “If you only read the books that everyone else is reading, you can only think what everyone else is thinking.”
Author: Haruki Murakami
Tags: ['books', 'thought']
------
Quote 6
Text: “The difference between genius and stupidity

In [26]:
next_btn = soup.find("li", class_="next")

if next_btn:
    next_page = next_btn.find("a")["href"]
    print("Next page path:", next_page)
else:
    next_page = None
    print("No more pages.")


Next page path: /page/7/


In [27]:
# Loading the second page using the path just extracted
response = requests.get(url + next_page)
soup = BeautifulSoup(response.text, "html.parser")

In [28]:
# Extract quote blocks from page 7
quote_blocks = soup.find_all("div", class_="quote")

print(f"Quotes found on page 7: {len(quote_blocks)}")

Quotes found on page 7: 10


In [29]:
page_quotes7 = []

for i, quote in enumerate(quote_blocks, start=1):
    text = quote.find("span", class_="text").get_text(strip=True)
    author = quote.find("small", class_="author").get_text(strip=True)
    tags = [tag.get_text(strip=True) for tag in quote.find_all("a", class_="tag")]

    print(f"Quote {i}")
    print(f"Text: {text}")
    print(f"Author: {author}")
    print(f"Tags: {tags}")
    print("------")

    page_quotes7.append({
        "text": text,
        "author": author,
        "tags": tags
    })


Quote 1
Text: “That's the problem with drinking, I thought, as I poured myself a drink. If something bad happens you drink in an attempt to forget; if something good happens you drink in order to celebrate; and if nothing happens you drink to make something happen.”
Author: Charles Bukowski
Tags: ['alcohol']
------
Quote 2
Text: “You don’t forget the face of the person who was your last hope.”
Author: Suzanne Collins
Tags: ['the-hunger-games']
------
Quote 3
Text: “Remember, we're madly in love, so it's all right to kiss me anytime you feel like it.”
Author: Suzanne Collins
Tags: ['humor']
------
Quote 4
Text: “To love at all is to be vulnerable. Love anything and your heart will be wrung and possibly broken. If you want to make sure of keeping it intact you must give it to no one, not even an animal. Wrap it carefully round with hobbies and little luxuries; avoid all entanglements. Lock it up safe in the casket or coffin of your selfishness. But in that casket, safe, dark, motionless,

In [30]:
next_btn = soup.find("li", class_="next")

if next_btn:
    next_page = next_btn.find("a")["href"]
    print("Next page path:", next_page)
else:
    next_page = None
    print("No more pages.")

Next page path: /page/8/


In [31]:
# Loading the second page using the path just extracted
response = requests.get(url + next_page)
soup = BeautifulSoup(response.text, "html.parser")

In [32]:
# Extract quote blocks from page 8
quote_blocks = soup.find_all("div", class_="quote")

print(f"Quotes found on page 8: {len(quote_blocks)}")

Quotes found on page 8: 10


In [33]:
page_quotes8 = []

for i, quote in enumerate(quote_blocks, start=1):
    text = quote.find("span", class_="text").get_text(strip=True)
    author = quote.find("small", class_="author").get_text(strip=True)
    tags = [tag.get_text(strip=True) for tag in quote.find_all("a", class_="tag")]

    print(f"Quote {i}")
    print(f"Text: {text}")
    print(f"Author: {author}")
    print(f"Tags: {tags}")
    print("------")

    page_quotes8.append({
        "text": text,
        "author": author,
        "tags": tags
    })


Quote 1
Text: “If I had a flower for every time I thought of you...I could walk through my garden forever.”
Author: Alfred Tennyson
Tags: ['friendship', 'love']
------
Quote 2
Text: “Some people never go crazy. What truly horrible lives they must lead.”
Author: Charles Bukowski
Tags: ['humor']
------
Quote 3
Text: “The trouble with having an open mind, of course, is that people will insist on coming along and trying to put things in it.”
Author: Terry Pratchett
Tags: ['humor', 'open-mind', 'thinking']
------
Quote 4
Text: “Think left and think right and think low and think high. Oh, the thinks you can think up if only you try!”
Author: Dr. Seuss
Tags: ['humor', 'philosophy']
------
Quote 5
Text: “What really knocks me out is a book that, when you're all done reading it, you wish the author that wrote it was a terrific friend of yours and you could call him up on the phone whenever you felt like it. That doesn't happen much, though.”
Author: J.D. Salinger
Tags: ['authors', 'books', 'lit

In [34]:
next_btn = soup.find("li", class_="next")

if next_btn:
    next_page = next_btn.find("a")["href"]
    print("Next page path:", next_page)
else:
    next_page = None
    print("No more pages.")

Next page path: /page/9/


In [35]:
# Loading the second page using the path just extracted
response = requests.get(url + next_page)
soup = BeautifulSoup(response.text, "html.parser")

In [36]:
# Extract quote blocks from page 9
quote_blocks = soup.find_all("div", class_="quote")

print(f"Quotes found on page 9: {len(quote_blocks)}")

Quotes found on page 9: 10


In [37]:
page_quotes9 = []

for i, quote in enumerate(quote_blocks, start=1):
    text = quote.find("span", class_="text").get_text(strip=True)
    author = quote.find("small", class_="author").get_text(strip=True)
    tags = [tag.get_text(strip=True) for tag in quote.find_all("a", class_="tag")]

    print(f"Quote {i}")
    print(f"Text: {text}")
    print(f"Author: {author}")
    print(f"Tags: {tags}")
    print("------")

    page_quotes9.append({
        "text": text,
        "author": author,
        "tags": tags
    })


Quote 1
Text: “Anyone who has never made a mistake has never tried anything new.”
Author: Albert Einstein
Tags: ['mistakes']
------
Quote 2
Text: “A lady's imagination is very rapid; it jumps from admiration to love, from love to matrimony in a moment.”
Author: Jane Austen
Tags: ['humor', 'love', 'romantic', 'women']
------
Quote 3
Text: “Remember, if the time should come when you have to make a choice between what is right and what is easy, remember what happened to a boy who was good, and kind, and brave, because he strayed across the path of Lord Voldemort. Remember Cedric Diggory.”
Author: J.K. Rowling
Tags: ['integrity']
------
Quote 4
Text: “I declare after all there is no enjoyment like reading! How much sooner one tires of any thing than of a book! -- When I have a house of my own, I shall be miserable if I have not an excellent library.”
Author: Jane Austen
Tags: ['books', 'library', 'reading']
------
Quote 5
Text: “There are few people whom I really love, and still fewer of w

In [38]:
next_btn = soup.find("li", class_="next")

if next_btn:
    next_page = next_btn.find("a")["href"]
    print("Next page path:", next_page)
else:
    next_page = None
    print("No more pages.")

Next page path: /page/10/


In [39]:
# Loading the second page using the path just extracted
response = requests.get(url + next_page)
soup = BeautifulSoup(response.text, "html.parser")

In [40]:
# Extract quote blocks from page 10
quote_blocks = soup.find_all("div", class_="quote")

print(f"Quotes found on page 10: {len(quote_blocks)}")

Quotes found on page 10: 10


In [41]:
page_quotes10 = []

for i, quote in enumerate(quote_blocks, start=1):
    text = quote.find("span", class_="text").get_text(strip=True)
    author = quote.find("small", class_="author").get_text(strip=True)
    tags = [tag.get_text(strip=True) for tag in quote.find_all("a", class_="tag")]

    print(f"Quote {i}")
    print(f"Text: {text}")
    print(f"Author: {author}")
    print(f"Tags: {tags}")
    print("------")

    page_quotes10.append({
        "text": text,
        "author": author,
        "tags": tags
    })


Quote 1
Text: “The truth." Dumbledore sighed. "It is a beautiful and terrible thing, and should therefore be treated with great caution.”
Author: J.K. Rowling
Tags: ['truth']
------
Quote 2
Text: “I'm the one that's got to die when it's time for me to die, so let me live my life the way I want to.”
Author: Jimi Hendrix
Tags: ['death', 'life']
------
Quote 3
Text: “To die will be an awfully big adventure.”
Author: J.M. Barrie
Tags: ['adventure', 'love']
------
Quote 4
Text: “It takes courage to grow up and become who you really are.”
Author: E.E. Cummings
Tags: ['courage']
------
Quote 5
Text: “But better to get hurt by the truth than comforted with a lie.”
Author: Khaled Hosseini
Tags: ['life']
------
Quote 6
Text: “You never really understand a person until you consider things from his point of view... Until you climb inside of his skin and walk around in it.”
Author: Harper Lee
Tags: ['better-life-empathy']
------
Quote 7
Text: “You have to write the book that wants to be written. An

In [42]:
next_btn = soup.find("li", class_="next")

if next_btn:
    next_page = next_btn.find("a")["href"]
    print("Next page path:", next_page)
else:
    next_page = None
    print("No more pages.")

No more pages.


#### TRANSFORMATION OF THE WEBSCRAPED DATA 

* Clean quote text, remove extra quotes, line breaks.
* Normalize author names (title case).
* Store tags as a comma-separated string or JSON array.

In [45]:
# Combining all Page-Quotes lists into one
all_quotes = (
    page_quotes +
    page_quotes2 +
    page_quotes3 +
    page_quotes4 +
    page_quotes5 +
    page_quotes6 +
    page_quotes7 +
    page_quotes8 +
    page_quotes9 +
    page_quotes10
)

# Convert to DataFrame
import pandas as pd

df_quotes = pd.DataFrame(all_quotes)
df_quotes.head()



Unnamed: 0,text,author,tags
0,“The world as we have created it is a process ...,Albert Einstein,"[change, deep-thoughts, thinking, world]"
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling,"[abilities, choices]"
2,“There are only two ways to live your life. On...,Albert Einstein,"[inspirational, life, live, miracle, miracles]"
3,"“The person, be it gentleman or lady, who has ...",Jane Austen,"[aliteracy, books, classic, humor]"
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe,"[be-yourself, inspirational]"


##### Step 1: Cleaning quote text

In [46]:

# Cleaning the text data
df = df_quotes.copy()
df['text'] = df['text'].str.replace('\n', ' ', regex=False)
df['text'] = df['text'].str.replace('“', '', regex=False)
df['text'] = df['text'].str.replace('”', '', regex=False)
df['text'] = df['text'].str.strip()


##### Step 2: Normalizing author Names

In [47]:
# Normalizing author names to title case
df['author'] = df['author'].str.title()

In [48]:
# converting tags to a comma-separated string
df['tags'] = df['tags'].apply(lambda tag_list: ', '.join(tag_list))

df.head(10)

Unnamed: 0,text,author,tags
0,The world as we have created it is a process o...,Albert Einstein,"change, deep-thoughts, thinking, world"
1,"It is our choices, Harry, that show what we tr...",J.K. Rowling,"abilities, choices"
2,There are only two ways to live your life. One...,Albert Einstein,"inspirational, life, live, miracle, miracles"
3,"The person, be it gentleman or lady, who has n...",Jane Austen,"aliteracy, books, classic, humor"
4,"Imperfection is beauty, madness is genius and ...",Marilyn Monroe,"be-yourself, inspirational"
5,Try not to become a man of success. Rather bec...,Albert Einstein,"adulthood, success, value"
6,It is better to be hated for what you are than...,André Gide,"life, love"
7,"I have not failed. I've just found 10,000 ways...",Thomas A. Edison,"edison, failure, inspirational, paraphrased"
8,A woman is like a tea bag; you never know how ...,Eleanor Roosevelt,misattributed-eleanor-roosevelt
9,"A day without sunshine is like, you know, night.",Steve Martin,"humor, obvious, simile"


#### Designing schema on PostgreSQL 

* Define the Table Schema in SQL
* Insert Data into PostgreSQL Using psycopg2

In [58]:
# Connecting to PostgreSQL
import psycopg2

In [59]:
conn = psycopg2.connect(
    dbname="quotes_db",      
    user="postgres",         
    password="12345",
    host="127.0.0.1",        
    port="5432"      
)
cur = conn.cursor()

print("Connection successful!")


Connection successful!


##### Inserting the Cleaned Data Into PostgreSQL

In [60]:
# Inserting each quote into the PostgreSQL table
for _, row in df.iterrows():
    cur.execute("""
        INSERT INTO quotes (text, author, tags)
        VALUES (%s, %s, %s)
    """, (row['text'], row['author'], row['tags']))

# Save the changes
conn.commit()

print("Data inserted successfully!")


Data inserted successfully!


In [61]:
cur.close()
conn.close()
print("Connection closed.")

Connection closed.


#### ANALYSIS

* Task 1: Top 5 Most Quoted Authors

In [56]:
# Top 5 most quoted authors
top_authors = df['author'].value_counts().head(5)
print("Top 5 Most Quoted Authors:")
print(top_authors)


Top 5 Most Quoted Authors:
author
Albert Einstein    10
J.K. Rowling        9
Marilyn Monroe      7
Dr. Seuss           6
Mark Twain          6
Name: count, dtype: int64


##### Task 2: Count total quotes and distinct tags

In [62]:
# Total number of quotes
total_quotes = len(df)

# Make sure tags are lists
df['tags'] = df['tags'].apply(lambda x: x.split(', ') if isinstance(x, str) else x)

# Flatten all tags into one list and count unique
all_tags = df['tags'].explode()
distinct_tag_count = all_tags.nunique()

print(f"Total Quotes: {total_quotes}")
print(f"Distinct Tags: {distinct_tag_count}")


Total Quotes: 100
Distinct Tags: 138


##### Task 3. Find Quotes with Tag "life" or "inspirational"

In [63]:
# Filter quotes that contain either 'life' or 'inspirational'
filtered_quotes = df[df['tags'].apply(lambda tags: 'life' in tags or 'inspirational' in tags)]

filtered_quotes[['text', 'author', 'tags']].head()


Unnamed: 0,text,author,tags
2,There are only two ways to live your life. One...,Albert Einstein,"[inspirational, life, live, miracle, miracles]"
4,"Imperfection is beauty, madness is genius and ...",Marilyn Monroe,"[be-yourself, inspirational]"
6,It is better to be hated for what you are than...,André Gide,"[life, love]"
7,"I have not failed. I've just found 10,000 ways...",Thomas A. Edison,"[edison, failure, inspirational, paraphrased]"
10,"This life is what you make it. No matter what,...",Marilyn Monroe,"[friends, heartbreak, inspirational, life, lov..."


##### Task 4. Count the number of quotes per tag hint: explode tags

In [64]:
# Count the number of quotes per tag
df['tags'] = df['tags'].apply(lambda x: x.split(', ') if isinstance(x, str) else x)

# Explode the list of tags so each tag has its own row
exploded_tags = df.explode('tags')
tag_counts = exploded_tags['tags'].value_counts()

print("Top tags by number of quotes:")
print(tag_counts.head())

Top tags by number of quotes:
tags
love             14
inspirational    13
life             13
humor            12
books            11
Name: count, dtype: int64


##### Task 5.  Find Authors With More Than One Quote

In [65]:
multi_quote_authors = df['author'].value_counts()
multi_quote_authors = multi_quote_authors[multi_quote_authors > 1]

print("Authors with more than one quote:")
print(multi_quote_authors)


Authors with more than one quote:
author
Albert Einstein        10
J.K. Rowling            9
Marilyn Monroe          7
Dr. Seuss               6
Mark Twain              6
C.S. Lewis              5
Jane Austen             5
Bob Marley              3
Eleanor Roosevelt       2
Charles Bukowski        2
Suzanne Collins         2
George R.R. Martin      2
Ralph Waldo Emerson     2
Mother Teresa           2
Ernest Hemingway        2
Name: count, dtype: int64


#### Export Results to CSV 

In [67]:
# Export the cleaned quotes
df.to_csv("cleaned_quotes.csv", index=False)

# Export tag counts
tag_counts.to_csv("tag_counts.csv")

# Export authors with more than one quote
multi_quote_authors.to_csv("multi_quote_authors.csv")
