In [3]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
import logging

headers = {
    "authority": "www.amazon.com",
    "pragma": "no-cache",
    "cache-control": "no-cache",
    "dnt": "1",
    "upgrade-insecure-requests": "1",
    "user-agent": "Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36",
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "sec-fetch-site": "none",
    "sec-fetch-mode": "navigate",
    "sec-fetch-dest": "document",
    "accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
}

URLS = ["https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews"]
for i in range(2,147):
    link = f"https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_{i}?ie=UTF8&reviewerType=all_reviews&pageNumber={i}"
    URLS.append(link)  

def get_page_html(page_url: str) -> str:
    resp = requests.get(page_url, headers=headers)
    return resp.text


def get_reviews_from_html(page_html: str) -> BeautifulSoup:
    soup = BeautifulSoup(page_html, "lxml")
    reviews = soup.find_all("div", {"class": "a-section celwidget"})
    return reviews


def get_review_date(soup_object: BeautifulSoup):
    date_string = soup_object.find("span", {"class": "review-date"}).get_text()
    return date_string


def get_review_text(soup_object: BeautifulSoup) -> str:
    review_text = soup_object.find(
        "span", {"class": "a-size-base review-text review-text-content"}
    ).get_text()
    return review_text.strip()


def get_review_header(soup_object: BeautifulSoup) -> str:
    review_header = soup_object.find(
        "a",
        {
            "class": "a-size-base a-link-normal review-title a-color-base review-title-content a-text-bold"
        },
    ).get_text()
    return review_header.strip()


def get_number_stars(soup_object: BeautifulSoup) -> str:
    stars = soup_object.find("span", {"class": "a-icon-alt"}).get_text()
    return stars.strip()

def get_name(soup_object: BeautifulSoup) -> str:
    stars = soup_object.find("span", {"class": "a-profile-name"}).get_text()
    return stars.strip()

# def get_product_name(soup_object: BeautifulSoup) -> str:
#     product = soup_object.find(
#         "a", {"class": "a-size-mini a-link-normal a-color-secondary"}
#     ).get_text()
#     return product.strip()


def orchestrate_data_gathering(single_review: BeautifulSoup) -> dict:
    return {
        "review_name":get_name(single_review),
        "review_title": get_review_header(single_review),
        "review_text": get_review_text(single_review),
        "review_date": get_review_date(single_review),
        "review_stars": get_number_stars(single_review)
        #"review_flavor": get_product_name(single_review),
    }


if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    all_results = []

    for u in URLS:
        logging.info(u)
        html = get_page_html(u)
        reviews = get_reviews_from_html(html)
        for rev in reviews:
            data = orchestrate_data_gathering(rev)
            all_results.append(data)

    out = pd.DataFrame.from_records(all_results)
    logging.info(f"{out.shape[0]} Is the shape of the dataframe")
    save_name = "Amazon-com ART Webscraping.csv"
    logging.info(f"saving to {save_name}")
    out.to_csv(save_name)
    logging.info('Done yayy')

INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_2?ie=UTF8&reviewerType=all_reviews&pageNumber=2
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_3?ie=UTF8&reviewerType=all_reviews&pageNumber=3
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_4?ie=UTF8&reviewerType=all_reviews&pageNumber=4
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_5?ie=UTF8&reviewerType=all_reviews&pageNumber=5
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-re

INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_45?ie=UTF8&reviewerType=all_reviews&pageNumber=45
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_46?ie=UTF8&reviewerType=all_reviews&pageNumber=46
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_47?ie=UTF8&reviewerType=all_reviews&pageNumber=47
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_48?ie=UTF8&reviewerType=all_reviews&pageNumber=48
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_49?ie=UTF8&reviewerType=all_reviews&pageNumber=49
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-

INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_89?ie=UTF8&reviewerType=all_reviews&pageNumber=89
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_90?ie=UTF8&reviewerType=all_reviews&pageNumber=90
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_91?ie=UTF8&reviewerType=all_reviews&pageNumber=91
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_92?ie=UTF8&reviewerType=all_reviews&pageNumber=92
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_93?ie=UTF8&reviewerType=all_reviews&pageNumber=93
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-

INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_133?ie=UTF8&reviewerType=all_reviews&pageNumber=133
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_134?ie=UTF8&reviewerType=all_reviews&pageNumber=134
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_135?ie=UTF8&reviewerType=all_reviews&pageNumber=135
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_136?ie=UTF8&reviewerType=all_reviews&pageNumber=136
INFO:root:https://www.amazon.com/Soucolor-72-Color-Coloring-Crafting-72-Colors/product-reviews/B01N09ZDEI/ref=cm_cr_arp_d_paging_btm_next_137?ie=UTF8&reviewerType=all_reviews&pageNumber=137
INFO:root:https://www.amazon.com/Soucolor-72-Color