In [6]:
# !pip install selenium
# !pip install webdriver_manager

In [7]:
import base64
import hashlib
import time
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import date
import pandas as pd

In [8]:
# Get data
# https://www.davichmarket.com/products?category=04&ctgNo=4


options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123 Safari/537.36")
options.add_argument("--lang=en")
options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'})
driver = webdriver.Chrome(service=webdriver.ChromeService(ChromeDriverManager().install()), options=options)

driver.set_page_load_timeout(500)
driver.get("https://www.davichmarket.com/products?category=04&ctgNo=4")
time.sleep(10)

while True:
    try:
        WebDriverWait(driver, 10).until(
                        EC.element_to_be_clickable((By.XPATH, "/html/body/div[1]/div/div/div[2]/main/section[2]/div[5]/div/button"))).click()
        # view_more_button.click()
        time.sleep(10)

    except TimeoutException:
        print("Button not found. Exiting Loop...")
        break
    except NoSuchElementException:
        print("Button disappeared, stopping...")
        break


soup = BeautifulSoup(driver.page_source, "html.parser")

Button not found. Exiting Loop...


In [16]:
# Helper functions

def get_code(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__code"})
        if not code:
            code = 'NA'
        else:
            code = code.text
        arr.append(code)
    return arr


def get_sku_name(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__ttl"})
        if not code:
            code = 'NA'
        else:
            code = code.text
        arr.append(code)
    return arr


def get_price(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__price"}).find('span', {"class":"price f-spo"}).text
        if not code:
            code = 'NA'
        arr.append(code)
    return arr


def get_discount(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        
        try: 
            code = x.find('div', {"class": "product-list-v2__price"}).find('span', {"class":"sale f-spo"}).text
        except:
            code = 'NA'
        arr.append(code)
    return arr


def get_image(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__top-wrap"})
        code = code.find('img', {"class": "product-list-v2__img"})
        code = code['data-src']
        if not code:
            code = 'NA'
        arr.append(code)
    return arr


def get_image_discount(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__top-wrap"})
        code = code.find('div', {"class": "product-list-v2__badge"})
        code = code.find('img', {"class": "product-list-v2__img"})
        if not code:
            code = 'NA'
        else:
           code = code['data-src'] 

        arr.append(code)
    return arr


def get_heart_count(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__txt"}).find('div', {"class":"heart-count"}).find('span',{"class":"f-spo"}).text
        if not code:
            code = 'NA'
        arr.append(code)
    return arr

def get_star_rating(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__txt"}).find('div', {"class":"star-rating"}).find('span',{"class":"f-spo"}).text
        if not code:
            code = 'NA'
        arr.append(code)
    return arr


In [17]:
# Put data into csv file and save

def all_together(soup):
    d = {}
    d["code"] = get_code(soup)
    d["name"] = get_sku_name(soup)
    d["price"] = get_price(soup)
    d["discount"] = get_discount(soup)
    d["image"] = get_image(soup)
    d["discount_image"] = get_image_discount(soup)
    d["heart_count"] = get_heart_count(soup)
    d["star_rating"] = get_star_rating(soup)
    d["date_retrieved"] = date.today()
    return d

d = all_together(soup)

df = pd.DataFrame.from_dict(d)
df.to_csv(f"retrieved_davich_{date.today()}.csv", encoding='utf-8-sig', index=False)
df

Unnamed: 0,code,name,price,discount,image,discount_image,heart_count,star_rating,date_retrieved
0,쿠퍼비전,쿠퍼비젼 1DAY - 클래리티 난시 (30P),"24,000원",47%,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,2068,4.5,2024-05-13
1,쿠퍼비전,쿠퍼비젼 1DAY - 클래리티 근시 대용량 (90P),"49,000원",49%,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,1662,5,2024-05-13
2,쿠퍼비전,쿠퍼비젼 1DAY - 클래리티 원데이 근시 (30P),"21,000원",48%,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,1580,0,2024-05-13
3,알콘,알콘 1DAY - 토탈원 워터렌즈 근시 (30P),"36,000원",43%,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,762,5,2024-05-13
4,바슈롬,바슈롬 1DAY - 바이오트루 난시 (30P),"30,000원",40%,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,798,5,2024-05-13
...,...,...,...,...,...,...,...,...,...
83,네온,하드렌즈 - 네온 CLASSIC (1P),"220,000원",,https://img.davichmarket.com/image/image-view?...,,2,0,2024-05-13
84,네온,하드렌즈 - 네온 TEAR (1P),"145,000원",,https://img.davichmarket.com/image/image-view?...,,1,0,2024-05-13
85,네온,하드렌즈 - 네온 GREEN (1P),"185,000원",,https://img.davichmarket.com/image/image-view?...,,1,0,2024-05-13
86,,방패 렌즈 케이스,"2,500원",,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,3,0,2024-05-13
