In [49]:
import csv
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from product import Product

options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")

In [None]:
driver = webdriver.Chrome()
driver.get("https://www.amazon.de")
assert "Amazon" in driver.title
elem = driver.find_element(By.NAME, "field-keywords")
elem.clear()
elem.send_keys("Höhenverstellbarer Tisch")
elem.send_keys(Keys.RETURN)
soup = BeautifulSoup(driver.page_source, 'html.parser')

results = soup.find_all('div', {'data-component-type':'s-search-result'})
item = results[10]
atag = item.h2.a
description = atag.text.strip()
url = "https://www.amazon.de" + atag.get('href')
price_parent = item.find('span', 'a-price')
price = price_parent.find('span', 'a-offscreen').text
driver.close()

In [50]:
def search(searchterm: str):
    # init webdriver
    driver = webdriver.Chrome(options=options)
    # open amazon.de return search results
    driver.get(f"https://www.amazon.de/s?k={searchterm.replace(' ', '+')}")
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    results = soup.find_all('div', {'data-component-type':'s-search-result'})
    print(f"Found {len(results)} results for {searchterm}")    
    driver.close()
    return results

def create_product(product) -> Product:
    """
    Takes a bs4.element.Tag and returns a Product object.
    """
    
    try:
        name = product.img.get('alt')
        image = product.find('img', {'class':'s-image'}).get('src')
        stars = product.i.text
        ratings = product.find('span', {'class':'a-size-base'}).text
        price = product.find('span', {'class':'a-price'}).find('span', {'class':'a-offscreen'}).text
        url = "https://www.amazon.de" + product.h2.a.get('href')
        
        product = Product(
        name,
        image, 
        stars, 
        ratings,
        price,
        url
        )
    except Exception as e:
        print("Error:", str(e), product)
        pass
    
    print("Created product:", product)
    return product
    
def filter_four_stars_and_more(results: list) -> list:
    """
    Takes a list of search results and returns only objects with a ratings bigger
    then 4 stars.
    """
    pass

In [51]:
results = search("Küchenlampe")
# print(results[0], results[1], results[2])
# product = create_product(results[0])
# print("product:", product)
products = list(map(create_product, results))
# print(products[0], products[1], products[2])

Found 48 results for Küchenlampe
Created product: Name: Deckenlampe LED Deckenleuchte 36W, SUNZOS 4000K 4250LM Deckenlampe Led Panel für Lampe Wohnzimmer, Schlafzimmer, Küchenlam..., Image: https://m.media-amazon.com/images/I/51oDrHMPqCS._AC_UL320_.jpg, Stars: 4,5 von 5 Sternen, Ratings: Deckenlampe LED Deckenleuchte 36W, SUNZOS 4000K 4250LM Deckenlampe Led Panel für Lampe Wohnzimmer, Schlafzimmer, Küchenlampe, Flur, Balkon, Esszimmer, Neutralweiß Deckenleuchte Led / 23 x 4 cm, price: 22,99 €, URL: https://www.amazon.de/Deckenleuchte-SUNZOS-Deckenlampe-Schlafzimmer-Neutralwei%C3%9F/dp/B0872LYB9S/ref=sr_1_1?keywords=K%C3%BCchenlampe&qid=1664353635&qu=eyJxc2MiOiI4LjU4IiwicXNhIjoiOC4yMiIsInFzcCI6IjcuNDQifQ%3D%3D&sr=8-1
Created product: Name: LED Deckenleuchte 24W 2400LM Deckenlampe LED 3000k/4000k/6000k Einstellbar Küchenlampe, Wohnzimmerlampe Badezimmer Lampe, ..., Image: https://m.media-amazon.com/images/I/61Ak6VYkTdL._AC_UL320_.jpg, Stars: 4,4 von 5 Sternen, Ratings: LED Deckenleuchte 

In [52]:
print(products[0])

Name: Deckenlampe LED Deckenleuchte 36W, SUNZOS 4000K 4250LM Deckenlampe Led Panel für Lampe Wohnzimmer, Schlafzimmer, Küchenlam..., Image: https://m.media-amazon.com/images/I/51oDrHMPqCS._AC_UL320_.jpg, Stars: 4,5 von 5 Sternen, Ratings: Deckenlampe LED Deckenleuchte 36W, SUNZOS 4000K 4250LM Deckenlampe Led Panel für Lampe Wohnzimmer, Schlafzimmer, Küchenlampe, Flur, Balkon, Esszimmer, Neutralweiß Deckenleuchte Led / 23 x 4 cm, price: 22,99 €, URL: https://www.amazon.de/Deckenleuchte-SUNZOS-Deckenlampe-Schlafzimmer-Neutralwei%C3%9F/dp/B0872LYB9S/ref=sr_1_1?keywords=K%C3%BCchenlampe&qid=1664353635&qu=eyJxc2MiOiI4LjU4IiwicXNhIjoiOC4yMiIsInFzcCI6IjcuNDQifQ%3D%3D&sr=8-1


In [None]:
rating = item.i.text
review_count = item.find('span', {'class': 'a-size-base'}).text


In [None]:
## Generalize the Patern

In [None]:
def extract_record(item):
    """Extract and return data from a single record"""
    
    # description and url
    atag = item.h2.a
    description = atag.text.strip()
    url = "https://www.amazon.de" + atag.get('href')

    try:
        # price
        price_parent = item.find('span', 'a-price')
        price = price_parent.find('span', 'a-offscreen').text
    except AttributeError:
        return
    
    try:
        # rating and review count
        rating = item.i.text
        review_count = item.find('span', {'class': 'a-size-base'}).text
    except AttributeError:
        rating = ""
        review_count = ""
        return
    
    result = (description, price, rating, review_count, url)
    
    return result


In [None]:
records = []
results = soup.find_all('div', {'data-component-type':'s-search-result'})

for item in results:
    record = extract_record(item)
    if record:
        records.append(record)

In [None]:
print(len(records))

In [None]:
for record in records:
    print(record[1])

In [None]:
## Getting to next page