In [35]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from datetime import datetime
from selenium.common.exceptions import NoSuchElementException
import csv
import time
import re
import os

def safe_find_element(article, by, selector):
    """요소가 없을 때 None 반환"""
    try:
        return article.find_element(by, selector)
        
    except NoSuchElementException:
        return None

def get_politics(driver, data):
    # 기사 크롤링
    today = datetime.now().strftime('%Y%m%d')
    
    for i in range(1, 4):
        # 페이지 열기
        n = 20 * (i - 1) + 1
        url = f'https://www.donga.com/news/Politics/List?p={n}'
        driver.get(url)
        time.sleep(3)

        articles = driver.find_elements(By.CSS_SELECTOR, '.news_card')
        
        for article in articles:
            try:
                # 제목(title)
                title_element = safe_find_element(article, By.CSS_SELECTOR, '.tit > a')
                if not title_element:
                    continue
                title = title_element.text.strip() if title_element else None

                # ✅ 썸네일(thumbnail)
                thumbnail_element = safe_find_element(article, By.CSS_SELECTOR, '.img_area > img')
                thumbnail = thumbnail_element.get_attribute('src').strip() if thumbnail_element else None

                # ✅ 요약 내용(short_content)
                short_content_element = safe_find_element(article, By.CSS_SELECTOR, '.desc')
                short_content = short_content_element.text.strip() if short_content_element else None

                # ✅ 작성 시간(write_time)
                write_time_element = safe_find_element(article, By.CSS_SELECTOR, '.date')
                write_time = write_time_element.text.strip() if write_time_element else None
                
                # 기사 링크(link) - **href**로 수정
                link = title_element.get_attribute('href').strip() if title_element else None

                # 데이터 추가
                data.append({
                    "title": title,
                    "thumbnail": thumbnail,
                    "short_content": short_content,
                    "write_time": write_time,
                    "link": link,
                    "company": "DongAh"
                })


            except Exception as e:
                print(f"데이터 수집 중 오류 발생: {e}")
                continue


def get_economy(driver, data):
    # 기사 크롤링
    today = datetime.now().strftime('%Y%m%d')
    
    for i in range(1, 2):
        # 페이지 열기
        n = 20 * (i - 1) + 1
        url = f'https://www.donga.com/news/Economy/List?p={n}'
        driver.get(url)
        time.sleep(3)

        articles = driver.find_elements(By.CSS_SELECTOR, '.news_card')
        
        for article in articles:
            try:
                # 제목(title)
                title_element = safe_find_element(article, By.CSS_SELECTOR, '.tit > a')
                if not title_element:
                    continue
                title = title_element.text.strip() if title_element else None

                # ✅ 썸네일(thumbnail)
                thumbnail_element = safe_find_element(article, By.CSS_SELECTOR, '.img_area > img')
                thumbnail = thumbnail_element.get_attribute('src').strip() if thumbnail_element else None

                # ✅ 요약 내용(short_content)
                short_content_element = safe_find_element(article, By.CSS_SELECTOR, '.desc')
                short_content = short_content_element.text.strip() if short_content_element else None

                # ✅ 작성 시간(write_time)
                write_time_element = safe_find_element(article, By.CSS_SELECTOR, '.date')
                write_time = write_time_element.text.strip() if write_time_element else None
                
                # 기사 링크(link) - **href**로 수정
                link = title_element.get_attribute('href').strip() if title_element else None

                # 데이터 추가
                data.append({
                    "title": title,
                    "thumbnail": thumbnail,
                    "short_content": short_content,
                    "write_time": write_time,
                    "link": link,
                    "company": "DongAh"
                })


            except Exception as e:
                print(f"데이터 수집 중 오류 발생: {e}")
                continue


def get_entertainment(driver, data):
    # 기사 크롤링
    today = datetime.now().strftime('%Y%m%d')
    
    for i in range(1, 2):
        # 페이지 열기
        n = 20 * (i - 1) + 1
        url = f'https://www.donga.com/news/Entertainment/List?p={n}'
        driver.get(url)
        time.sleep(3)

        articles = driver.find_elements(By.CSS_SELECTOR, '.news_card')
        
        for article in articles:
            try:
                # 제목(title)
                title_element = safe_find_element(article, By.CSS_SELECTOR, '.tit > a')
                if not title_element:
                    continue
                title = title_element.text.strip() if title_element else None

                # ✅ 썸네일(thumbnail)
                thumbnail_element = safe_find_element(article, By.CSS_SELECTOR, '.img_area > img')
                thumbnail = thumbnail_element.get_attribute('src').strip() if thumbnail_element else None

                # ✅ 요약 내용(short_content)
                short_content_element = safe_find_element(article, By.CSS_SELECTOR, '.desc')
                short_content = short_content_element.text.strip() if short_content_element else None

                # ✅ 작성 시간(write_time)
                write_time_element = safe_find_element(article, By.CSS_SELECTOR, '.date')
                write_time = write_time_element.text.strip() if write_time_element else None
                
                # 기사 링크(link) - **href**로 수정
                link = title_element.get_attribute('href').strip() if title_element else None

                # 데이터 추가
                data.append({
                    "title": title,
                    "thumbnail": thumbnail,
                    "short_content": short_content,
                    "write_time": write_time,
                    "link": link,
                    "company": "DongAh"
                })


            except Exception as e:
                print(f"데이터 수집 중 오류 발생: {e}")
                continue

def get_sports(driver, data):
    # 기사 크롤링
    today = datetime.now().strftime('%Y%m%d')
    
    for i in range(1, 2):
        # 페이지 열기
        n = 20 * (i - 1) + 1
        url = f'https://www.donga.com/news/Sports/List?p={n}'
        driver.get(url)
        time.sleep(3)

        articles = driver.find_elements(By.CSS_SELECTOR, '.news_card')
        
        for article in articles:
            try:
                # 제목(title)
                title_element = safe_find_element(article, By.CSS_SELECTOR, '.tit > a')
                if not title_element:
                    continue
                title = title_element.text.strip() if title_element else None

                # ✅ 썸네일(thumbnail)
                thumbnail_element = safe_find_element(article, By.CSS_SELECTOR, '.img_area > img')
                thumbnail = thumbnail_element.get_attribute('src').strip() if thumbnail_element else None

                # ✅ 요약 내용(short_content)
                short_content_element = safe_find_element(article, By.CSS_SELECTOR, '.desc')
                short_content = short_content_element.text.strip() if short_content_element else None

                # ✅ 작성 시간(write_time)
                write_time_element = safe_find_element(article, By.CSS_SELECTOR, '.date')
                write_time = write_time_element.text.strip() if write_time_element else None
                
                # 기사 링크(link) - **href**로 수정
                link = title_element.get_attribute('href').strip() if title_element else None

                # 데이터 추가
                data.append({
                    "title": title,
                    "thumbnail": thumbnail,
                    "short_content": short_content,
                    "write_time": write_time,
                    "link": link,
                    "company": "DongAh"
                })


            except Exception as e:
                print(f"데이터 수집 중 오류 발생: {e}")
                continue


def get_culture(driver, data):
    # 기사 크롤링
    today = datetime.now().strftime('%Y%m%d')
    
    for i in range(1, 2):
        # 페이지 열기
        n = 20 * (i - 1) + 1
        url = f'https://www.donga.com/news/Culture/List?p={n}'
        driver.get(url)
        time.sleep(3)

        articles = driver.find_elements(By.CSS_SELECTOR, '.news_card')
        
        for article in articles:
            try:
                # 제목(title)
                title_element = safe_find_element(article, By.CSS_SELECTOR, '.tit > a')
                if not title_element:
                    continue
                title = title_element.text.strip() if title_element else None

                # ✅ 썸네일(thumbnail)
                thumbnail_element = safe_find_element(article, By.CSS_SELECTOR, '.img_area > img')
                thumbnail = thumbnail_element.get_attribute('src').strip() if thumbnail_element else None

                # ✅ 요약 내용(short_content)
                short_content_element = safe_find_element(article, By.CSS_SELECTOR, '.desc')
                short_content = short_content_element.text.strip() if short_content_element else None

                # ✅ 작성 시간(write_time)
                write_time_element = safe_find_element(article, By.CSS_SELECTOR, '.date')
                write_time = write_time_element.text.strip() if write_time_element else None
                
                # 기사 링크(link) - **href**로 수정
                link = title_element.get_attribute('href').strip() if title_element else None

                # 데이터 추가
                data.append({
                    "title": title,
                    "thumbnail": thumbnail,
                    "short_content": short_content,
                    "write_time": write_time,
                    "link": link,
                    "company": "DongAh"
                })


            except Exception as e:
                print(f"데이터 수집 중 오류 발생: {e}")
                continue


def save_to_csv(data, fieldnames):
    # CSV 파일 저장
    folder_path = '../Completed_csv'
    
    # 폴더가 없으면 생성
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"{folder_path} 폴더가 생성되었습니다.")

    # 파일 경로 설정
    filename = os.path.join(folder_path, 'DongAh.csv')
    
    # CSV 파일 저장
    try:
        with open(filename, mode='w', newline='', encoding='utf-8-sig') as file:
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writeheader()  # 헤더 작성
            for item in data:
                writer.writerow(item)
        print(f"데이터가 저장되었습니다.")
        
    except Exception as e:
        print(f"CSV 저장 중 오류 발생: {e}")

def main():
    driver = webdriver.Chrome(executable_path='../chromedriver-win64/chromedriver')
    print("실행 완료")
    
    data = []
    fieldnames = ["title", "thumbnail", "short_content", "write_time", "link", "company"]
    get_politics(driver, data)
    get_economy(driver, data)
    get_entertainment(driver, data)
    get_sports(driver, data)
    get_culture(driver, data)
    
    
    driver.quit()
    
    # csv 파일로 저장
    save_to_csv(data, fieldnames)

if __name__ == '__main__':
    main()

실행 완료
데이터가 저장되었습니다.
