In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from datetime import datetime
from selenium.common.exceptions import NoSuchElementException
import csv
import time
import re
import os

def safe_find_element(article, by, selector):
    """요소가 없을 때 None 반환"""
    try:
        return article.find_element(by, selector)
        
    except NoSuchElementException:
        return None

def get_news(driver, data, urls):
    # 기사 크롤링
    today = datetime.now().strftime('%Y%m%d')
    
    for url in urls:
        # 페이지 열기
        driver.get(url)
        time.sleep(3)

        articles = driver.find_elements(By.CSS_SELECTOR, '#list_W > li')
        
        for article in articles:
            try: 
                # 제목(title)
                title_element = safe_find_element(article, By.CSS_SELECTOR, '.cluster_text_headline21.t_reduce > a')
                title = title_element.text.strip() if title_element else None
    
                # 썸네일(thumbnail)
                thumbnail_element = safe_find_element(article, By.CSS_SELECTOR, '.cluster_thumb_link21 > a > img')
                thumbnail = thumbnail_element.get_attribute('src').strip() if thumbnail_element else None
    
                # 요약 내용(short_content)
                short_content_element = safe_find_element(article, By.CSS_SELECTOR, '.cluster_text_lede21.link_text2')
                short_content = short_content_element.text.strip() if short_content_element else None
    
                # 작성 시간(write_time)
                write_time_element = safe_find_element(article, By.CSS_SELECTOR, '.cluster_text_press21')
                write_time = write_time_element.text.strip() if write_time_element else None
    
                # 기사 링크(link) - href 수정
                link_element = safe_find_element(article, By.CSS_SELECTOR, '.cluster_text_headline21.t_reduce > a')
                link = link_element.get_attribute('href').strip() if link_element else None
    
                # 데이터 추가 (모든 데이터를 None 허용)
                data.append({
                    "title": title,
                    "thumbnail": thumbnail,
                    "short_content": short_content,
                    "write_time": write_time,
                    "link": link,
                    "company": "이투데이"
                })


            except Exception as e:
                print(f"데이터 수집 중 오류 발생: {e}")
                continue


def save_to_csv(data, fieldnames):
    # CSV 파일 저장
    folder_path = './Completed_csv'
    
    # 폴더가 없으면 생성
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"{folder_path} 폴더가 생성되었습니다.")

    # 파일 경로 설정
    filename = os.path.join(folder_path, 'Etoday.csv')
    
    # CSV 파일 저장
    try:
        with open(filename, mode='w', newline='', encoding='utf-8-sig') as file:
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writeheader()  # 헤더 작성
            for item in data:
                writer.writerow(item)
        print(f"데이터가 저장되었습니다.")
        
    except Exception as e:
        print(f"CSV 저장 중 오류 발생: {e}")

def main():
    options = Options()
    options.add_argument('--headless')  # GUI 없이 실행
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')

    # WebDriverManager를 사용하여 ChromeDriver 자동 관리
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    print("ChromeDriver 실행 완료")
    
    data = []
    urls = [
        'https://www.etoday.co.kr/news/section/?MID=1200', # 금융/증권
        'https://www.etoday.co.kr/news/section/?MID=1400', # 부동산
        'https://www.etoday.co.kr/news/section/?MID=1300', # 기업
        'https://www.etoday.co.kr/news/section/?MID=1100', # 정치/경제
        'https://www.etoday.co.kr/news/section/?MID=1800', # 문화
    ]
    
    fieldnames = ["title", "thumbnail", "short_content", "write_time", "link", "company"]
    
    get_news(driver, data, urls)
    
    driver.quit()
    
    # csv 파일로 저장
    save_to_csv(data, fieldnames)

if __name__ == '__main__':
    main()

ChromeDriver 실행 완료
데이터가 저장되었습니다.
