# <b>Web Scraping To Data ETL

In [30]:
# Importing necessary libraries
import requests
from bs4 import BeautifulSoup
import sqlite3

## <b>Extract Data

In [31]:
def scrape_data(url):
    # Sending a GET request to the specified URL
    response = requests.get(url)
    
    # Parsing the HTML content of the webpage using BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')
    
    data = []  # Initializing an empty list to store scraped data
    
    # Iterating through each book element in the HTML
    for book in soup.find_all('article', class_='product_pod'):
        # Extracting the title of the book
        title = book.h3.a['title']
        
        # Extracting the price of the book
        price = book.find('p', class_='price_color').get_text(strip=True)
        
        # Appending the tuple (title, price) to the data list
        data.append((title, price))
    
    return data  # Returning the list of tuples containing scraped data

## <b> Transform Data

In [32]:
def transform_data(raw_data):
    transformed_data = []  # Initializing an empty list for transformed data
    
    # Iterating through each item in raw_data
    for item in raw_data:
        title, price = item  # Unpacking each tuple into title and price
        transformed_data.append((title, price))  # Appending transformed tuple to the list
    
    return transformed_data  # Returning the transformed data

## <b> Load Data

In [33]:
def load_data(data):
    conn = sqlite3.connect('books.db')
    cursor = conn.cursor()
    
    # Droping the table if its already exists
    cursor.execute('DROP TABLE IF EXISTS books')
    
    # Creating a new table
    cursor.execute('''
        CREATE TABLE books (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            title TEXT,
            price TEXT
        )
    ''')
    
    # Inserting the data into the table
    cursor.executemany('INSERT INTO books (title, price) VALUES (?, ?)', data)
    
    # Commit changing and closing the connection
    conn.commit()
    conn.close()

## <b> Main Function

In [34]:
def main():
    url = 'https://books.toscrape.com/index.html' # Web Scrapping Url
    raw_data = scrape_data(url)
    transformed_data = transform_data(raw_data)
    load_data(transformed_data)

if __name__ == '__main__':
    main()