In [43]:
import pandas as pd
import bs4 as bs
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import re
import time

# Globale Konstanten
product_item_class = "styles__StyledCardWrapper-sc-z8946b-0 lkvTCP h-padding-a-tight"
button_next_class = "BaseButton-sc-3v3oog-0 ButtonWithArrow__StyledButton-sc-nijk55-0 xuSzc jkTVyV Pagination__NextButtonWithArrow-sc-1nywsxy-1 kayfPw"
url = "https://www.target.com/c/tvs-home-theater-electronics/all-deals/-/N-5xtdjZakkos?Nao"

In [64]:
# Aktuelle URL f+r Electronics/TV & Home Theater/All Deals: TVs (03.07.2022)
browser = webdriver.Chrome()  # https://chromedriver.chromium.org/downloads
browser.get(url)
browser.maximize_window()

In [65]:
results = {
    "name": [],
    "brand": [],
    "deal_price": [],
    "regular_price": [],
    "product_highlights": [],
    "avg_overall_rating": [],
    "amount_of_ratings": []
}

In [66]:
def is_sales_deal(product_div):
    span_text_class_name = "h-display-inline-block h-text-red h-text-sm"
    try:
        deal_text_span = product_div.find_all("span", {"class":span_text_class_name})[0]
        return "Sale" in deal_text_span.get_text()
    except Exception:
        return False

def get_bullets(product_div):
    shown_bullets = [bullet.get_text() for bullet in product_div.find_all("li")]
    return shown_bullets

def get_rating(product_div):
    text = product_div.get_text()
    try:
        overall_rating = re.findall(r'Overall rating(\d\.*\d*) out of 5 stars with (\d+) ratings', text)[0]
    except IndexError:
        return None, None
    return overall_rating

def get_name(product_div):
    title_list = [title_tag["aria-label"] for title_tag in product_div.find_all("a") if title_tag.get("aria-label")]
    return title_list[0]

def get_brand(product_div):
    brand_class_name ="ProductCardBrandAndRibbonMessage__BrandAndRibbonWrapper-sc-zjf8tq-0 coUgrE"
    brand = product_div.find_all("div", {"class": brand_class_name})[0].get_text()
    return brand

def get_deal_price(product_div):
    deal_price_class = "h-text-red"
    return product_div.find_all("div", {"class": deal_price_class})[0].get_text()

def get_reg_price(product_div):
    reg_price_class = "h-text-grayDark h-text-sm"
    return product_div.find_all("div", {"class": reg_price_class})[0].get_text().split("reg ")[1]

def scrape_products(product_div_list, results):
    for product_div in product_div_list:
        # Deal könnte auch mit Target Gutscheinkarte sein, kein Direktrabatt
        if is_sales_deal(product_div):
            results["name"] += [get_name(product_div)]
            results["brand"] += [get_brand(product_div)]
            results["deal_price"] += [get_deal_price(product_div)]
            results["regular_price"] += [get_reg_price(product_div)]
            results["product_highlights"] += [get_bullets(product_div)]
            avg_overall_rating, amount_of_ratings = get_rating(product_div)
            results["avg_overall_rating"] += [avg_overall_rating]
            results["amount_of_ratings"] += [amount_of_ratings]

1080

In [67]:
while True:
    screen_height = browser.execute_script("return window.screen.availHeight")
    current = screen_height + browser.execute_script("return document.documentElement.scrollTop")
    bottom = browser.execute_script("return document.body.scrollHeight")
    print("Total height: " + str(bottom))
    print("Screen height: " + str(screen_height))
    while current < bottom:
        new_height = current
        print(new_height)
        if new_height > bottom:
            new_height = bottom
        browser.execute_script("window.scrollTo(0, " + str(new_height) + ");")
        current = screen_height + browser.execute_script("return document.documentElement.scrollTop")
        time.sleep(2) # warte bis Elemente geladen haben bevor weiter gescrollt wird

    # Hole Produktkacheln:
    html = browser.page_source
    soup = bs.BeautifulSoup(html, 'lxml')
    product_div_list = soup.find_all("div", {"class": product_item_class})

    # Scraping der Daten
    scrape_products(product_div_list, results)

    # Prüfe, ob es weitere Seiten gibt
    button_next = soup.find_all("button", {"class": button_next_class})[0]
    if button_next.get("disabled") != '':
        buttons = browser.find_elements_by_tag_name("button")
        button = [button for button in buttons if button.get_attribute("class") == button_next_class][0]
        # Button in Viewport bringen:
        browser.execute_script("arguments[0].scrollIntoView();", button)
        goal_height = browser.execute_script("return document.documentElement.scrollTop") - screen_height/2
        browser.execute_script("window.scrollTo(0, " + str(goal_height) + ");")
        # Klicken, warten, hochscrollen, warten
        button.click()
        time.sleep(2)
        browser.execute_script("window.scrollTo(0,0);")
        time.sleep(0.5)
    else:
        break

Total height: 10498
Screen height: 1080
1080
2160
3240
4320
5400
6480
7560
8640
9720
Total height: 10178
Screen height: 1080
1080
2160
3240
4320
5400
6480
7560
8640
9720
Total height: 10194
Screen height: 1080
1080
2160
3240
4320
5400
6480
7560
8640
9720
Total height: 5878
Screen height: 1080
1080
2160
3240
4320
5400


In [68]:
df = pd.DataFrame.from_dict(results)
df

Unnamed: 0,name,brand,deal_price,regular_price,product_highlights,avg_overall_rating,amount_of_ratings
0,"TCL 50"" Class 4-Series 4K UHD HDR Smart Roku T...",TCL,$299.99,$469.99,[Stunning 4K Ultra HD – 4K resolution delivers...,4,350
1,"VIZIO D-Series 40"" Class 1080p Full-Array LED ...",VIZIO,$199.99,$249.99,"[1080p High-Definition - Watch TV in crisp, cl...",4.4,10674
2,"Hisense 55"" Class- A6G Series 4K UHD Android S...",Hisense,$309.99,$429.99,"[4K Ultra HD, 1080p’s bigger, better looking b...",4,41
3,"VIZIO V-Series 50"" Class 4K HDR Smart TV - V50...",VIZIO,$299.99,$379.99,[4K Ultra HD - Over 8 million pixels for breat...,4.5,3848
4,"VIZIO D-Series 32"" Class 720p Full-Array LED H...",VIZIO,$159.99,$169.99,[720p HD Resolution - Watch TV in crystal-clea...,4.4,4409
...,...,...,...,...,...,...,...
73,Supersonic SC-2813 13.3-Inch Portable LED TV w...,Supersonic,$140.99,$199.99,"[13.3 in. widescreen LED TV, Built-in digital ...",,
74,Supersonic SC-195 7 TFT Portable Digital LCD T...,Supersonic,$97.99,$129.99,"[7 in. widescreen LCD TV, Built-in digital TV ...",,
75,"LG 43"" Class 4K UHD Smart LED TV - 43NANO75",LG Electronics,$449.99,$479.99,"[Real 4K NanoCell Display, α5 Gen 5 AI Process...",3.8,6
76,"LG 55"" Class 4K UHD Smart Mini LED TV - 55QNED...",LG ElectronicsNew at target¬,"$1,299.99","$1,399.99","[α7 Gen5 AI Processor 4K, Quantum Dot NanoCell...",4.3,4


In [69]:
df.to_csv('target_tvdeals_20220703.csv', index=False)