In [1]:
import requests
from datetime import datetime
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import os
import time
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
today = datetime.today().strftime("%Y-%m-%d")
base_url = f"https://finance.naver.com/news/mainnews.naver?date={today}"

In [3]:
base_header = {
    "authority": "finance.naver.com",
    "method": "GET",
    "path": "/news/mainnews.naver?date=2025-07-18",
    "scheme": "https",
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "accept-encoding": "gzip, deflate, br, zstd",
    "accept-language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
    "cache-control": "no-cache",
    "cookie": "",
    "pragma": "no-cache",
    "priority": "u=0, i",
    "referer": f"https://finance.naver.com/news/mainnews.naver?date={today}",
    "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": "Windows",
    "sec-fetch-dest": "document",
    "sec-fetch-mode": "navigate",
    "sec-fetch-site": "same-origin",
    "sec-fetch-user": "?1",
    "upgrade-insecure-requests": "1",
    "user-agent": os.getenv("USER_AGENT"),
}

In [4]:
req = requests.get(base_url, headers=base_header)
soup = BeautifulSoup(req.text, "html.parser")

In [6]:
soup

<!--  global include -->
<html lang="ko">
<head>
<title>네이버페이 증권</title>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<meta content="text/javascript" http-equiv="Content-Script-Type"/>
<meta content="text/css" http-equiv="Content-Style-Type"/>
<meta content="네이버페이 증권" name="apple-mobile-web-app-title">
<meta content="http://finance.naver.com/news/mainnews.naver" property="og:url">
<meta content="주요뉴스 : 네이버페이 증권" property="og:title"/>
<meta content="관심종목의 실시간 주가를 가장 빠르게 확인하는 곳" property="og:description"/>
<meta content="https://ssl.pstatic.net/static/m/stock/im/2016/08/og_stock-200.png" property="og:image"/>
<meta content="article" property="og:type"/>
<meta content="" property="og:article:thumbnailUrl"/>
<meta content="네이버페이 증권" property="og:article:author"/>
<meta content="http://FINANCE.NAVER.COM" property="og:article:author:url"/>
<link href="https://ssl.pstatic.net/imgstock/static.pc/20250716142544/css/finance_header.css" rel="stylesheet" type="text/css"/>
<

In [7]:
# soup 객체에서 뉴스 제목, 링크, 날짜/시간, 언론사 정보를 추출하여 딕셔너리 리스트로 만듭니다.

news_list = []

# 네이버 증권 주요뉴스 영역의 뉴스 리스트를 찾음
news_items = soup.select("ul.newsList > li")  # 실제 구조에 따라 selector를 조정해야 할 수 있음

for item in news_items:
    # <dl> 태그 내부에서 제목, 링크, 날짜, 언론사 추출
    dl_tag = item.select_one("dl")
    if not dl_tag:
        continue

    # 제목과 링크는 <dd class="articleSubject"> > a
    subject_tag = dl_tag.select_one("dd.articleSubject > a")
    title = subject_tag.get_text(strip=True) if subject_tag else ""
    link = "https://finance.naver.com" + subject_tag["href"] if subject_tag and subject_tag.has_attr("href") else ""

    # 날짜/시간과 언론사는 <dd class="articleSummary"> 내부에 있음
    summary_tag = dl_tag.select_one("dd.articleSummary")
    if summary_tag:
        # 언론사
        press_tag = summary_tag.select_one("span.press")
        press = press_tag.get_text(strip=True) if press_tag else ""
        # 날짜/시간
        date_tag = summary_tag.select_one("span.wdate")
        date = date_tag.get_text(strip=True) if date_tag else ""
    else:
        press = ""
        date = ""

    news_dict = {
        "title": title,
        "link": link,
        "date": date,
        "press": press
    }
    news_list.append(news_dict)

news_list  # 결과 출력

[{'title': '美법안 통과에 날개 단 가상화폐, 시가총액 4조달러 첫 돌파',
  'link': 'https://finance.naver.com/news/news_read.naver?article_id=0015517229&office_id=001&mode=mainnews&type=&date=2025-07-19&page=1',
  'date': '2025-07-19 20:58:04',
  'press': '연합뉴스'},
 {'title': '"초보자도 돈 벌 수 있다" 떠들썩…요즘 뜨는 투자법',
  'link': 'https://finance.naver.com/news/news_read.naver?article_id=0005160038&office_id=015&mode=mainnews&type=&date=2025-07-19&page=1',
  'date': '2025-07-19 20:35:10',
  'press': '한국경제'},
 {'title': '아바타 3D 영상도 실시간으로 본다…XR 기술 본격화',
  'link': 'https://finance.naver.com/news/news_read.naver?article_id=0001897712&office_id=057&mode=mainnews&type=&date=2025-07-19&page=1',
  'date': '2025-07-19 20:19:06',
  'press': 'MBN'},
 {'title': '"조종사가 엔진 잘못 끈 듯"…제주항공 참사 유족 \'반발\'',
  'link': 'https://finance.naver.com/news/news_read.naver?article_id=0001897709&office_id=057&mode=mainnews&type=&date=2025-07-19&page=1',
  'date': '2025-07-19 20:13:06',
  'press': 'MBN'},
 {'title': '"車 대신 도로가 주도"…역발상 자율주행 현장 가보니 [영상]',


In [None]:
news_url = "https://n.news.naver.com/mnews/article/243/0000081564"