# Web Scraping Project: Quotes to Scrape

# Objective
Scrape data from the Quotes to Scrape website and save it to a CSV file.


In [23]:
# Use the requests library to download web pages
import requests

# Define the URL
url = "https://quotes.toscrape.com/page/1/"

# Download web page
response = requests.get(url)
html_content = response.text

In [24]:
# Use Beautiful Soup to parse and extract information
from bs4 import BeautifulSoup

# Parse and explore the structure
soup = BeautifulSoup(html_content, 'html.parser')

# Extract data (names, quotes, tags)
names = [author.text.strip() for author in soup.select('.author')]
quotes = [quote.text.strip() for quote in soup.select('.text')]
tags = [tag.text.strip() for tag in soup.select('.tag')]



In [25]:
import pandas as pd

# Create a function to save data to CSV
def save_to_csv(names, quotes, tags, filename='quotes.csv'):
    # Check if all lists have the same length
    if len(names) == len(quotes) == len(tags):
        # Create DataFrame
        df = pd.DataFrame({'Name': names, 'Quote': quotes, 'Tag': tags})
        
        # Save to CSV
        df.to_csv(filename, index=False)
        print(f"Data saved to {filename}")
    else:
        print("Error: All lists must have the same length.")
        print(f"Length of 'names': {len(names)}")
        print(f"Length of 'quotes': {len(quotes)}")
        print(f"Length of 'tags': {len(tags)}")

# Execute the function with different inputs
save_to_csv(names, quotes, tags)


Error: All lists must have the same length.
Length of 'names': 10
Length of 'quotes': 10
Length of 'tags': 40


In [26]:
def extract_data(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')

    names = [author.text.strip() for author in soup.select('.author')]
    quotes = [quote.text.strip() for quote in soup.select('.text')]

    # Extract tags for each quote separately
    tags_list = [tag_container.select('.tag') for tag_container in soup.select('.tags')]
    tags = [', '.join(tag.text.strip() for tag in tag_container) for tag_container in tags_list]

    return names, quotes, tags


In [27]:
names, quotes, tags = extract_data(html_content)
save_to_csv(names, quotes, tags)

Data saved to quotes.csv


In [28]:
# Read the CSV file back using Pandas
df = pd.read_csv('quotes.csv')
print(df.head())


              Name                                              Quote  \
0  Albert Einstein  “The world as we have created it is a process ...   
1     J.K. Rowling  “It is our choices, Harry, that show what we t...   
2  Albert Einstein  “There are only two ways to live your life. On...   
3      Jane Austen  “The person, be it gentleman or lady, who has ...   
4   Marilyn Monroe  “Imperfection is beauty, madness is genius and...   

                                            Tag  
0        change, deep-thoughts, thinking, world  
1                            abilities, choices  
2  inspirational, life, live, miracle, miracles  
3              aliteracy, books, classic, humor  
4                    be-yourself, inspirational  


## Conclusion

In this project, we successfully scraped data from the Quotes to Scrape website and saved it to a CSV file. The project demonstrates the use of web scraping techniques with Python and the Beautiful Soup library.
