In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from IPython.display import Image, display

section_dict = {
    '최신뉴스': 'n0100',
    '정치': 'n0200',
    '경제': 'n0300',
    '사회': 'n0400',
    '세계': 'n0500',
    'IT/과학': 'n0600'
}

def print_news_with_images(section_name):
    base_url = "https://news.nate.com"
    section_code = section_dict[section_name]
    url = f"{base_url}/recent?mid={section_code}"

    headers = {
        'User-Agent': 'Mozilla/5.0'
    }

    res = requests.get(url, headers=headers)
    res.encoding = 'EUC-KR'

    if res.ok:
        soup = BeautifulSoup(res.text, 'html.parser')
        news_list = soup.select('div.mlt01')

        for news in news_list:
            a_tag = news.find('a')
            if not a_tag:
                continue

            # 제목
            title_tag = a_tag.find('h2', class_='tit')
            title = title_tag.text.strip() if title_tag else "제목 없음"

            # 링크
            link = urljoin(base_url, a_tag['href'])

            # 이미지 (조건 3: 존재 여부 체크 후 출력)
            img_tag = a_tag.find('img')
            if img_tag and img_tag.get('src'):
                img_url = urljoin(base_url, img_tag['src'])

                # 출력
                print(f"제목: {title}")
                print(f"링크: {link}")
                print(f"이미지 URL: {img_url}")
                display(Image(url=img_url))
                print("-" * 50)
            else:
                # 이미지 없는 뉴스는 무시
                continue

# 사용 예시
print_news_with_images('최신뉴스')
print_news_with_images('정치')
print_news_with_images('경제')
print_news_with_images('사회')
print_news_with_images('세계')
print_news_with_images('IT/과학')

## 2-1 nate 최신뉴스 추출하기 정답코드

In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from IPython.display import Image, display

url = 'https://news.nate.com/recent?mid=n0100'
print(url)

req_header = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
}

res = requests.get(url, headers=req_header)
print(type(res))
print(res.status_code)

if res.ok:
    res.encoding = 'euc-kr'
    html = res.text
    soup = BeautifulSoup(html, 'html.parser')
    
    tags = soup.select("div.postListType.noListTitle div.mlt01")
    print(len(tags))

    for idx,div_tag in enumerate(tags,1): 
        print(f'============>> {idx}')       
        a_tag = div_tag.find('a')
        a_join_url = urljoin(url,a_tag['href'])
        print(a_join_url)

        img_tag = div_tag.select_one('span.ib img')
        if img_tag:
            photo_url = urljoin(url,img_tag['src'])
            print(photo_url)
            display(Image(url=photo_url))

        h2_tag = div_tag.select_one('span.tb h2.tit')
        title = h2_tag.text
        print(title)

else:
    print(f'에러코드 = {res.status_code}')

## 2-2. Nate 뉴스제목과 Image 출력하기
함수로 선언하여 처리하기

In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from IPython.display import Image, display

section_dict = {'최신뉴스':'n0100','정치':'n0200','경제':'n0300','사회':'n0400',\
                '세계':'n0500','IT/과학':'n0600'}
print(section_dict)

def print_news(section_name):    
    m_id = section_dict[section_name]

    url = f'https://news.nate.com/recent?mid={m_id}'
    print(url)

    req_header = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
    }

    res = requests.get(url, headers=req_header)
    print(type(res))
    print(res.status_code)

    if res.ok:
        res.encoding = 'euc-kr'
        html = res.text
        soup = BeautifulSoup(html, 'html.parser')

        print(f'======> {url} {section_name} 뉴스 <======')   

        tags = soup.select("div.postListType.noListTitle div.mlt01")
        print(len(tags))

        for idx,div_tag in enumerate(tags,1): 
            print(f'============>> {idx}')       
            a_tag = div_tag.find('a')
            a_join_url = urljoin(url,a_tag['href'])
            print(a_join_url)

            img_tag = div_tag.select_one('span.ib img')
            if img_tag:
                photo_url = urljoin(url,img_tag['src'])
                print(photo_url)
                display(Image(url=photo_url))

            h2_tag = div_tag.select_one('span.tb h2.tit')
            title = h2_tag.text
            print(title)

    else:
        print(f'에러코드 = {res.status_code}')

print_news('IT/과학')