Let's apply this mapped process into some code to see how it works! 

## Import what we will need

In [2]:
# import time
# import random
# import json
from bs4 import BeautifulSoup
import requests
import pandas as pd
# import math

import logging, sys

## Create utility functions that isolate the work

In [27]:
def get_site_data(session, url_to_scrape, header, logger):
    """
    
    """
    logger.info(f"Getting data from the site")
    with session.get(url_to_scrape, headers=header) as res:
        response = BeautifulSoup(res.text, "html.parser")
    return response

def get_and_parse_product_page(product_page_url, session, logger, header):
    """
    
    """
    response = get_site_data(
        session=session, 
        url_to_scrape=product_page_url,
        logger=logger,
        header=header)
    # get product/book name
    logger.info(f"Parsing product info")
    title = response.title.text.split("|")[0].strip()
    # get product description
    description = response.find_all("div", class_="sub-header")[0].find_next('p').text
    # get product details and extract the full dictionary
    all_tables = pd.read_html(product_page_url)
    data_dict = all_tables[0].set_index(0).to_dict()[1]
    # return the data in the format of UPC, title, description, and price
    return (data_dict['UPC'], title, description, data_dict['Price (incl. tax)'],)


def initiate_logging(path_to_save_logs):
    """
    
    """
    logging.basicConfig(
    level=logging.INFO,
    format= '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(f"{path_to_save_logs}{time.strftime('%Y-%m-%d_%H-%M')}.log"),
        logging.StreamHandler(sys.stdout)
        ]
    )
    logger = logging.getLogger(__name__)
    return logger



## Create a main function for orchestrating the logic

In [28]:
def main(user_agent_string, email, path_to_save_logs, product_url_to_scape):
    """
    
    """    
    logger = initiate_logging(path_to_save_logs)
    logger.info(f"Starting the scrape of the following page: {product_url_to_scape}")

    heads = {
    'User-Agent':user_agent_string,
    'email': email,
    'Accept-Language': 'en-US, en;q=0.5'}
    session = requests.Session()

    return get_and_parse_product_page(
        product_page_url=product_url_to_scape, 
        session=session, 
        logger=logger, 
        header=heads)
    

## Now lets try it!

We can try for two products and see what we got

In [29]:
main(
    user_agent_string = "ESCAP Webscraping RAP demo scraper 1.0",
    email = "example@email.com",
    path_to_save_logs = "../data/logs/",
    product_url_to_scape = "https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html"
)

2024-09-16 23:21:37,233 - __main__ - INFO - Starting the scrape of the following page: https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html
2024-09-16 23:21:37,234 - __main__ - INFO - Getting data from the site
2024-09-16 23:21:37,406 - __main__ - INFO - Parsing product info


('a897fe39b1053632',
 'A Light in the Attic',
 "It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love th It's hard to imagine a world without A Light in the Attic. This now-classic collection of poetry and drawings from Shel Silverstein celebrates its 20th anniversary with this special edition. Silverstein's humorous and creative verse can amuse the dowdiest of readers. Lemon-faced adults and fidgety kids sit still and read these rhythmic words and laugh and smile and love that Silverstein. Need proof of his genius? RockabyeRockabye baby, in the treetopDon't you know a treetopIs no safe place to rock?And who put you up there,And your cradle, too?Baby, I think someone down 

In [30]:
main(
    user_agent_string = "ESCAP Webscraping RAP demo scraper 1.0",
    email = "example@email.com",
    path_to_save_logs = "../data/logs/",
    product_url_to_scape = "https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html"
)

2024-09-16 23:22:19,137 - __main__ - INFO - Starting the scrape of the following page: https://books.toscrape.com/catalogue/tipping-the-velvet_999/index.html
2024-09-16 23:22:19,137 - __main__ - INFO - Getting data from the site
2024-09-16 23:22:19,300 - __main__ - INFO - Parsing product info


('90fa61229261140a',
 'Tipping the Velvet',
 '"Erotic and absorbing...Written with starling power."--"The New York Times Book Review " Nan King, an oyster girl, is captivated by the music hall phenomenon Kitty Butler, a male impersonator extraordinaire treading the boards in Canterbury. Through a friend at the box office, Nan manages to visit all her shows and finally meet her heroine. Soon after, she becomes Kitty\'s "Erotic and absorbing...Written with starling power."--"The New York Times Book Review " Nan King, an oyster girl, is captivated by the music hall phenomenon Kitty Butler, a male impersonator extraordinaire treading the boards in Canterbury. Through a friend at the box office, Nan manages to visit all her shows and finally meet her heroine. Soon after, she becomes Kitty\'s dresser and the two head for the bright lights of Leicester Square where they begin a glittering career as music-hall stars in an all-singing and dancing double act. At the same time, behind closed door

Success! We now have a good way to scrape the site!