### Web Scraping, Data Transformation, and Loading using Python (ETL Process)

In [21]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import sqlite3
from datetime import datetime

# Step 1: Scraping Data (Extract) from multiple pages
base_url = 'http://quotes.toscrape.com/page/{}/'
page_num = 1  # Start from page 1
quotes_data = []

# Loop through the first 5 pages (you can change this to scrape more pages)
for page_num in range(1, 6):
    url = base_url.format(page_num)
    response = requests.get(url)
    
    if response.status_code == 200:
        print(f"Successfully fetched page {page_num}")
    else:
        print(f"Failed to retrieve page {page_num}, status code: {response.status_code}")
        continue
    
    # Parse the page content with BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Extract quotes, authors, and tags
    quotes = soup.find_all('div', class_='quote')
    
    for quote in quotes:
        text = quote.find('span', class_='text').get_text()  # Extract quote text
        author = quote.find('small', class_='author').get_text()  # Extract author name
        tags = [tag.get_text() for tag in quote.find_all('a', class_='tag')]  # Extract tags
        
        # Append the data to the list
        quotes_data.append([text, author, ', '.join(tags)])

# Step 2: Transform Data (Create a DataFrame)
df = pd.DataFrame(quotes_data, columns=['Quote', 'Author', 'Tags'])

# Step 3: Load Data (Save to SQLite and CSV)

# Save data to SQLite database
conn = sqlite3.connect('./output/quotes_data.db')
df.to_sql('quotes', conn, if_exists='replace', index=False)
print("Data saved to SQLite database.")

# Save data to CSV
df.to_csv('./output/quotes_data.csv', index=False)
print("Data saved to CSV file.")

# Close SQLite connection
conn.close()

print("ETL Process Completed!")


Successfully fetched page 1
Successfully fetched page 2
Successfully fetched page 3
Successfully fetched page 4
Successfully fetched page 5
Data saved to SQLite database.
Data saved to CSV file.
ETL Process Completed!
