In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from datetime import datetime
import csv
import time
import os

def get_news(driver):
    # 기사 크롤링
    fieldnames = ["title", "thumbnail", "short_content", "write_time", "link", "company"]
    data = []
    today = datetime.now().strftime('%Y%m%d')
    
    for i in range(1, 11):
        # 페이지 열기
        url = f'https://m.sedaily.com/RankAll/AL/{today}/{i}'
        driver.get(url)
        time.sleep(2)

        articles = driver.find_elements(By.CSS_SELECTOR, '#newsList > li')

        for article in articles:
            try:
                # 제목(title)
                title_element = article.find_element(By.CSS_SELECTOR, '.report_tit > a')
                title = title_element.text.strip()

                # 썸네일(thumbnail)
                thumbnail_element = article.find_elements(By.CSS_SELECTOR, '.thumb > a > span > img')
                thumbnail = thumbnail_element[0].get_attribute('src').strip() if thumbnail_element else None

                # 요약 내용(short_content) 없음
                short_content_element = article.find_elements(By.CSS_SELECTOR, 'X')
                short_content = short_content_element[0].text.strip() if short_content_element else None

                # 작성 시간(write_time)
                write_time_element = article.find_elements(By.CSS_SELECTOR, '.time')
                write_time = write_time_element[0].text.strip() if write_time_element else None

                # 기사 링크(link) - **href**로 수정
                link = title_element.get_attribute('href').strip()

                # 데이터 추가
                data.append({
                    "title": title,
                    "thumbnail": thumbnail,
                    "short_content": short_content,
                    "write_time": write_time,
                    "link": link,
                    "company": "서울경제"
                })

            except Exception as e:
                print(f"데이터 수집 중 오류 발생: {e}")
                continue

    # CSV 파일로 저장
    save_to_csv(data, fieldnames)

    driver.quit()

def save_to_csv(data, fieldnames):
    # CSV 파일 저장
    folder_path = './Completed_csv'
    
    # 폴더가 없으면 생성
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f"{folder_path} 폴더가 생성되었습니다.")

    # 파일 경로 설정
    filename = os.path.join(folder_path, 'SeoulGyeongJae.csv')
    
    # CSV 파일 저장
    try:
        with open(filename, mode='w', newline='', encoding='utf-8-sig') as file:
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writeheader()  # 헤더 작성
            for item in data:
                writer.writerow(item)
        print(f"데이터가 저장되었습니다.")
    except Exception as e:
        print(f"CSV 저장 중 오류 발생: {e}")

def main():
    options = Options()
    options.add_argument('--headless')  # GUI 없이 실행
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')

    # WebDriverManager를 사용하여 ChromeDriver 자동 관리
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    print("ChromeDriver 실행 완료")

    get_news(driver)

    driver.quit()

if __name__ == '__main__':
    main()


ChromeDriver 실행 완료
데이터가 저장되었습니다.
