In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows

def parse_relative_time(time_str):
    """Parse relative time string and return absolute datetime."""
    now = datetime.now()
    
    if "분 전" in time_str:
        minutes = int(time_str.replace("분 전", "").strip())
        return now - timedelta(minutes=minutes)
    elif "시간 전" in time_str:
        hours = int(time_str.replace("시간 전", "").strip())
        return now - timedelta(hours=hours)
    elif "일 전" in time_str:
        days = int(time_str.replace("일 전", "").strip())
        return now - timedelta(days=days)
    elif "주 전" in time_str:
        weeks = int(time_str.replace("주 전", "").strip())
        return now - timedelta(weeks=weeks)
    else:
        return now  # Default to now if unknown format

def extract_data(soup):
    
    result = []

    div = soup.find("div", class_="group_news")
    if div:
        for subdiv in div.find_all("div", class_="news_area"):
            link = title = news_agency = date = "N/A"

            contents_tag = subdiv.find("a",class_="news_tit")
            if contents_tag and "href" in contents_tag.attrs and "title" in contents_tag.attrs:
                link = contents_tag["href"]
                #print("Link: ",link)
                title = contents_tag["title"]
                #print("Title: ",title)
                
            info_tag = subdiv.find("div",class_="info_group")
            if info_tag:
                news_agency_tag = info_tag.find("a",class_ = "info press")
                news_agency = news_agency_tag.get_text(strip=True) if news_agency_tag else "N/A"
                #print("News Agency: ",news_agency)

                date_tag = info_tag.find("span", class_="info")
                if date_tag:
                    relative_time = date_tag.get_text(strip=True)
                    if "분" in relative_time or "시간" in relative_time:
                        date = parse_relative_time(relative_time).strftime('%Y-%m-%d %H:%M:00')
                    else:
                        date = parse_relative_time(relative_time).strftime('%Y-%m-%d 00:00:00')
                else:
                    date = "N/A"
                #print("Date: ",date)

            result.append([title,news_agency,date,link])


    else:
        print("No div with class 'list_body newsflash_body' found")

    return result

def get_request(keyword, sortidx, start=1):
    # 입력된 분야에 맞는 request 객체를 반환
    # 아래 url에 쿼리를 적용한 것을 반환
    custom_header = {
        'referer' : 'https://www.naver.com/',
        'user-agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'
    }

    url = f"https://search.naver.com/search.naver?where=news&sort={sortidx}"
    
    try:
        req = requests.get(url, headers=custom_header, params={"query": keyword, "start": start})
        req.raise_for_status()
        return req
    # Raise an error for bad status codes
    except requests.RequestException as e:
        print(f"Request failed: {e}")
        return None
    
def save_to_excel(list_act,keyword):
    excel_path = "result.xlsx"
    sheet_name = ''.join(char for char in keyword if char.isalnum())[:31]

    with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
        df = pd.DataFrame(list_act, columns=['뉴스제목', '뉴스사', '게시일', '링크'])
        df.to_excel(writer, sheet_name=sheet_name, index=False)
            
    wb = load_workbook(excel_path)
    ws = wb[sheet_name]
    for sheet_name in wb.sheetnames:
        for col in ws.columns:
            max_length = 0
            column_letter = col[0].column_letter
            for cell in col:
                try:
                    if len(str(cell.value)) > max_length:
                        max_length = len(cell.value)
                except:
                    pass
            adjusted_width = (max_length + 5)  # Add some extra space
            ws.column_dimensions[column_letter].width = adjusted_width
    wb.save(excel_path)
    print("Data has been saved to 'result.xlsx' with multiple sheets.")

def main():
    all_results = []
    keyword = input('키워드를 입력하세요.\n > ')
    max_pages = 3  # 원하는 페이지 수
    results_per_page = 10
    sortidx = 0 #최신순1

    for page in range(1, max_pages + 1):
        start = (page - 1) * results_per_page + 1
        req = get_request(keyword, sortidx, start=start)
        
        if req:
            soup = BeautifulSoup(req.text, "html.parser")
            results = extract_data(soup)
            all_results.extend(results)
        else:
            print("Failed to retrieve data.")
            break

    if all_results:
        save_to_excel(all_results, keyword)
    else:
        print("No data to save.")

if __name__ == "__main__":
    main()