In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows

def parse_relative_time(time_str):
    """Parse relative time string and return absolute datetime."""
    now = datetime.now()
    
    if "분전" in time_str:
        minutes = int(time_str.replace("분전", "").strip())
        return now - timedelta(minutes=minutes)
    elif "시간전" in time_str:
        hours = int(time_str.replace("시간전", "").strip())
        return now - timedelta(hours=hours)
    elif "일전" in time_str:
        days = int(time_str.replace("일전", "").strip())
        return now - timedelta(days=days)
    elif "주전" in time_str:
        weeks = int(time_str.replace("주전", "").strip())
        return now - timedelta(weeks=weeks)
    else:
        return now  # Default to now if unknown format

def extract_data(soup):
    
    result = []

    div = soup.find("div", class_="list_body newsflash_body")
    if div:
        for dt in div.find_all("dt", class_="photo"):
            link = title = news_agency = date = "N/A"

            a_tag = dt.find("a")
            if a_tag and "href" in a_tag.attrs:
                link = a_tag["href"]
                #print("Link: ",link)
                
                title_tag = a_tag.find("img")
                if title_tag and "alt" in title_tag.attrs:
                    title = title_tag["alt"]
                    #print("Title: ",title)
                
            subtag_dd = dt.find_next_sibling("dd")
            if subtag_dd:
                news_agency_tag = subtag_dd.find("span",class_ = "writing")
                news_agency = news_agency_tag.get_text(strip=True) if news_agency_tag else "N/A"
                #print("News Agency: ",news_agency)

                date_tag = subtag_dd.find("span", class_="date is_new")
                if date_tag:
                    relative_time = date_tag.get_text(strip=True)
                    date = parse_relative_time(relative_time).strftime('%Y-%m-%d %H:%M:00')
                else:
                    date = "N/A"
                #print("Date: ",date)

            result.append([title,link,news_agency,date])


    else:
        print("No div with class 'list_body newsflash_body' found")

    return result

def get_request(section):
    # 입력된 분야에 맞는 request 객체를 반환
    # 아래 url에 쿼리를 적용한 것을 반환
    custom_header = {
        'referer' : 'https://www.naver.com/',
        'user-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'
    }

    url = "https://news.naver.com/main/list.nhn"

    sections = {
        "전체" : 0,
        "정치" : 100,
        "경제" : 101,
        "사회" : 102,
        "생활" : 103,
        "세계" : 104,
        "과학" : 105
    }

    if section not in sections:
        print("Invalid section. Please choose from: 전체, 정치, 경제, 사회, 생활, 세계, 과학.")
        return None
    
    try:
        req = {}
        if sections[section] == 0:
            for sect in sections.keys():
                if sect != "전체":
                    response = (requests.get(url, headers=custom_header, params={"sid1": sections[sect]}))
                    response.raise_for_status()
                    req[sect] = response
        else:
            response = (requests.get(url, headers=custom_header, params={"sid1": sections[section]}))
            response.raise_for_status()
            req[section] = response
        return req
    # Raise an error for bad status codes
    except requests.RequestException as e:
        print(f"Request failed: {e}")
        return None
    
def save_to_excel(dic_act):
    excel_path = "result.xlsx"
    with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
        for title, data in dic_act.items():
            df = pd.DataFrame(data, columns=['뉴스제목', '링크', '뉴스사', '게시일'])
            df.to_excel(writer, sheet_name=title, index=False)
            
    wb = load_workbook(excel_path)
    for sheet_name in wb.sheetnames:
        ws = wb[sheet_name]
        for col in ws.columns:
            max_length = 0
            column_letter = col[0].column_letter
            for cell in col:
                try:
                    if len(str(cell.value)) > max_length:
                        max_length = len(cell.value)
                except:
                    pass
            adjusted_width = (max_length + 5)  # Add some extra space
            ws.column_dimensions[column_letter].width = adjusted_width
    wb.save(excel_path)
    print("Data has been saved to 'result.xlsx' with multiple sheets.")

def main():
    dic_act = {}

    section = input('"전체","정치","경제","사회","생활","세계","과학" 중 하나를 입력하세요.\n > ')

    req = get_request(section)

    if req:
        for subreq in req.keys():
            soup = BeautifulSoup(req[subreq].text, "html.parser")
            dic_act[subreq] = extract_data(soup)
        if dic_act:
            save_to_excel(dic_act)
    else:
        print("Failed to retrieve data.")

if __name__ == "__main__":
    main()

Data has been saved to 'result.xlsx' with multiple sheets.
