In [1]:
# Chrome for testing (to replace driver)
# https://googlechromelabs.github.io/chrome-for-testing/

In [2]:
# !pip install selenium
# !pip install webdriver_manager

In [3]:
import base64
import hashlib
import time
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import date
import pandas as pd

In [4]:
# Get data
# https://www.davichmarket.com/products?category=04&ctgNo=4


options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123 Safari/537.36")
options.add_argument("--lang=en")
options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'})



# driver = webdriver.Chrome(service=webdriver.ChromeService(ChromeDriverManager().install()), options=options)


# Specify the path to your local chromedriver executable
chrome_driver_path = r"C:\Users\rm_an\OneDrive\Documents\Alcon\korea_scripts\chromedriver.exe"

# Create a Service object with the path to the local chromedriver
service = Service(chrome_driver_path)

# Initialize the Chrome WebDriver with the Service object and options
driver = webdriver.Chrome(service=service, options=options)


driver.set_window_size(1920, 1080)
driver.set_page_load_timeout(500)
driver.get("https://www.davichmarket.com/products?category=04&ctgNo=4")

time.sleep(10)

while True:
    try:
        WebDriverWait(driver, 10).until(
                        EC.element_to_be_clickable((By.XPATH, "/html/body/div[1]/div/div/div[2]/main/section[2]/div[5]/div/button"))).click()
        # view_more_button.click()
        time.sleep(10)

    except TimeoutException:
        print("Button not found. Exiting Loop...")
        break
    except NoSuchElementException:
        print("Button disappeared, stopping...")
        break


soup = BeautifulSoup(driver.page_source, "html.parser")

Button not found. Exiting Loop...


In [5]:
# Helper functions

def get_code(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__code"})
        if not code:
            code = 'NA'
        else:
            code = code.text
        arr.append(code)
    return arr


def get_sku_name(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__ttl"})
        if not code:
            code = 'NA'
        else:
            code = code.text
        arr.append(code)
    return arr


def get_price(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__price"}).find('span', {"class":"price f-spo"}).text
        if not code:
            code = 'NA'
        arr.append(code)
    return arr


def get_discount(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        
        try: 
            code = x.find('div', {"class": "product-list-v2__price"}).find('span', {"class":"sale f-spo"}).text
        except:
            code = 'NA'
        arr.append(code)
    return arr


def get_image(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__top-wrap"})
        code = code.find('img', {"class": "product-list-v2__img"})
        code = code['data-src']
        if not code:
            code = 'NA'
        arr.append(code)
    return arr


def get_image_discount(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__top-wrap"})
        code = code.find('div', {"class": "product-list-v2__badge"})
        code = code.find('img', {"class": "product-list-v2__img"})
        if not code:
            code = 'NA'
        else:
           code = code['data-src'] 

        arr.append(code)
    return arr


def get_heart_count(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__txt"}).find('div', {"class":"heart-count"}).find('span',{"class":"f-spo"}).text
        if not code:
            code = 'NA'
        arr.append(code)
    return arr

def get_star_rating(soup):
    arr = []
    for x in soup.find_all('div', {"class": "product-list-v2"}):
        code = x.find('div', {"class": "product-list-v2__txt"}).find('div', {"class":"star-rating"}).find('span',{"class":"f-spo"}).text
        if not code:
            code = 'NA'
        arr.append(code)
    return arr


In [6]:
from datetime import datetime

# Define the date string
date_str = '24/03/2025'

# Define the format of the date string
date_format = '%d/%m/%Y'

# Parse the date string into a datetime object
datetime_object = datetime.strptime(date_str, date_format)

# Format the datetime object as 'YYYY-MM-DD'
formatted_date = datetime_object.strftime('%Y-%m-%d')

# Print the formatted date
print(formatted_date) 

2025-03-24


In [7]:
# Put data into csv file and save



def all_together(soup):
    d = {}
    d["code"] = get_code(soup)
    d["name"] = get_sku_name(soup)
    d["price"] = get_price(soup)
    d["discount"] = get_discount(soup)
    d["image"] = get_image(soup)
    d["discount_image"] = get_image_discount(soup)
    d["heart_count"] = get_heart_count(soup)
    d["star_rating"] = get_star_rating(soup)
    d["date_retrieved"] = date.today()
    # d["date_retrieved"] = formatted_date
    return d

d = all_together(soup)

df = pd.DataFrame.from_dict(d)
df.to_csv(f"retrieved_davich_{date.today()}.csv", encoding='utf-8-sig', index=False)
# df.to_csv(f"retrieved_davich_{formatted_date}.csv", encoding='utf-8-sig', index=False)
df

Unnamed: 0,code,name,price,discount,image,discount_image,heart_count,star_rating,date_retrieved
0,쿠퍼비전,쿠퍼비젼 1DAY - 클래리티 난시 (30P),"27,000원",50%,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,2629,4.5,2025-05-26
1,라이트 컬렉션,1MONTH - 라이트 셀레네 블랙 (1P),"5,000원",,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,3480,0,2025-05-26
2,라이트 컬렉션,1MONTH - 라이트 오브제 초코 (1P),"5,000원",,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,6014,0,2025-05-26
3,쿠퍼비전,쿠퍼비젼 1DAY - 클래리티 근시 대용량 (90P),"58,000원",50%,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,2181,5,2025-05-26
4,라이트 컬렉션,1MONTH - 라이트 셀레네 브라운 (1P),"5,000원",,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,3094,0,2025-05-26
...,...,...,...,...,...,...,...,...,...
284,MYFiPN,1M 나비드 베이지 (2P),"20,000원",,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,5,0,2025-05-26
285,MYFiPN,1M 리아 옐로 (1P),"25,000원",,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,7,0,2025-05-26
286,MYFiPN,1M 티어 애쉬 (2P),"20,000원",,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,6,0,2025-05-26
287,MYFiPN,1M 티어 코퍼 (2P),"20,000원",,https://img.davichmarket.com/image/image-view?...,https://img.davichmarket.com/image/image-view?...,8,0,2025-05-26
