In [10]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchWindowException
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
import re

In [26]:
def get_chrome_driver():

    chrome_options = webdriver.ChromeOptions()
    
    driver = webdriver.Chrome(
        service = Service(ChromeDriverManager().install()),
        options = chrome_options
    
    )
    return driver


In [27]:
def get_unique_product_urls(driver):
    unique_urls = []
    for page_num in range(1, 11):  # 페이지 1부터 10까지
        kiwom_URL = f"https://www.kiwoom.com/wm/fnd/fs010/fndSrchView?dummyVal=0"
        driver.get(kiwom_URL)
        time.sleep(1)  # 페이지가 로드되기를 기다립니다.
        
        # 페이지 번호를 클릭하여 해당 페이지로 이동합니다.
        page_link = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, f"//a[text()='{page_num}']")))
        driver.execute_script("arguments[0].click();", page_link)
        
        page = driver.page_source
        soup = BeautifulSoup(page, 'html.parser')
        funds = soup.find_all(class_='fund-name')
        
        urls = []
        for fund in funds:
            if fund.has_attr('href'):
                urls.append(fund['href'])
            else:
                urls.append("No href attribute found")
        
        # 중복된 URL을 제거하고 unique_urls에 추가합니다.
        unique_urls.extend(list(set(urls)))
    return unique_urls

In [28]:
def extract_numbers_from_urls(urls):
    pattern = r"(\d+)(?:'\);$)"
    numbers = [re.search(pattern, url).group(1) for url in urls if re.search(pattern, url)]
    return numbers


In [32]:
def visit_popup_pages(numbers):
    popup_urls = []
    for number in numbers:
        url = f"https://www.kiwoom.com/wm/fnd/fs010/fndDetailPop?salFundCd={number}"
        popup_urls.append(url)
    return popup_urls

In [30]:
def download_documents(driver, popup_urls):
    for url in popup_urls:
        driver.get(url)
        time.sleep(3)
        
        try:
            # 약관 다운로드 버튼 클릭
            agreement_button = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contentMain > div > div > section > div.modal-body > div > div:nth-child(4) > div > div.pdf-list-content > ul > li:nth-child(1) > a")))
            agreement_button.click()

            # 투자 설명서 다운로드 버튼 클릭
            prospectus_button = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#contentMain > div > div > section > div.modal-body > div > div:nth-child(4) > div > div.pdf-list-content > ul > li:nth-child(2) > a")))
            prospectus_button.click()
     
        except Exception as e:
            print(f"페이지 {url}에서 약관 또는 투자 설명서를 찾을 수 없습니다.")
            print(e)

In [33]:
# main 함수 내에서 WebDriver를 visit_popup_pages 함수에 전달하지 않도록 수정합니다.
def main():
    # Chrome WebDriver 인스턴스를 가져옵니다.
    driver = get_chrome_driver()
    
    # 펀드 상품 URL을 수집합니다.
    unique_urls = get_unique_product_urls(driver)
    
    # 펀드 상품 번호를 추출합니다.
    numbers = extract_numbers_from_urls(unique_urls)
    
    # 팝업 페이지를 방문합니다.
    popup_urls = visit_popup_pages(numbers)
    
    # 문서를 다운로드합니다.
    download_documents(driver, popup_urls)
    
    # WebDriver를 종료합니다.
    driver.quit()

# 메인 함수 실행
if __name__ == "__main__":
    main()