In [1]:
import os
from dotenv import load_dotenv
from slack_sdk import WebClient

load_dotenv()

SLACK_BOT_TOKEN = os.getenv("SLACK_BOT_TOKEN")
CHANNEL_NAME = os.getenv("CHANNEL_NAME")


In [None]:
import time
import re
import datetime
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
import json

# Slack 클라이언트 초기화
client = WebClient(token=SLACK_BOT_TOKEN)  # 직접 토큰 입력 (환경 변수 대신)

# Slack message 전송하는 부분
def send_message(message: str, channel: str = CHANNEL_NAME):
    try:
        client.chat_postMessage(channel=channel, text=message)
    except Exception as e:
        print(f"오류 발생: {e}")        

오류 발생: The request to the Slack API failed. (url: https://slack.com/api/files.completeUploadExternal)
The server responded with: {'ok': False, 'error': 'channel_not_found'}


In [None]:
# 웹 크롤링
def crawl_with_selenium() -> str:
    driver = webdriver.Chrome()
    driver.get("https://coinness.com/article")
    time.sleep(5)  # JS 로딩 대기
    
    css = "#root > div > div.Wrap-sc-v065lx-0.hwmGSB > div > main > div.ContentContainer-sc-91rcal-0.jJHYjq > div.ArticleListContainer-sc-cj3rkv-0.fkxjqP"
    try:
        container = driver.find_element(By.CSS_SELECTOR, css)
        return container.text
    except Exception as e:
        print(f"[ERROR] 요소 찾기 실패: {e}")
        return ""
    finally:
        driver.quit()

# 파싱
def parse_news_text(raw_text: str) -> pd.DataFrame:
    lines = [line.strip() for line in raw_text.splitlines() if line.strip()]
    time_pattern = re.compile(r'^\d{2}:\d{2}$')

    articles = []
    current_time = None
    current_date = None
    current_title = None
    content_lines = []

    def save_article():
        if current_time and current_date and current_title:
            articles.append({
                "time": current_time,
                "date": current_date,
                "title": current_title,
                "content": "\n".join(content_lines).strip()
            })

    state = "idle"
    for line in lines:
        # (1) 새로운 시간(기사 시작 신호) 감지
        if time_pattern.match(line):
            save_article()
            current_time = line
            current_date = None
            current_title = None
            content_lines = []
            state = "got_time"
            continue
        
        # (2) got_time → 날짜
        if state == "got_time":
            current_date = line
            state = "got_date"
            continue
        
        # (3) got_date → 제목
        if state == "got_date":
            current_title = line
            state = "got_title"
            continue
        
        # (4) 나머지 줄은 본문
        content_lines.append(line)
        state = "collecting_content"

    # 마지막 기사 저장
    save_article()

    df = pd.DataFrame(articles, columns=["time", "date", "title", "content"])
    return df

def process_and_send(new_df: pd.DataFrame, csv_path: str, channel_name: str):
    if new_df.empty:
        print("[INFO] 새로 추출된 기사가 없습니다.")
        return

    if os.path.exists(csv_path):
        # ---방법1: try-except 로 처리---
        try:
            existing_df = pd.read_csv(csv_path)
        except pd.errors.EmptyDataError:
            # 파일이 있지만 내용이 비어있으면 빈 DF로 초기화
            existing_df = pd.DataFrame(columns=["time", "date", "title", "content"])
    else:
        existing_df = pd.DataFrame(columns=["time", "date", "title", "content"])
    
    # 이하 로직은 동일
    existing_signatures = {
        (row["time"], row["date"], row["title"], row["content"])
        for _, row in existing_df.iterrows()
    }
    
    new_articles_list = []
    for _, row in new_df.iterrows():
        sig = (row["time"], row["date"], row["title"], row["content"])
        if sig not in existing_signatures:
            new_articles_list.append(row)
            existing_signatures.add(sig)
    
    if not new_articles_list:
        print("[INFO] 추가된 새 기사가 없습니다.")
    else:
        # Slack 메시지 전송
        message_parts = []
        for row in new_articles_list:
            part = (
                f"[{row['time']}][{row['date']}]\n"
                f"{row['title']}\n"
                f"{row['content']}\n"
            )
            message_parts.append(part)
        final_msg = "".join(message_parts)
        send_message(final_msg, channel_name)
        
        print(f"[INFO] 새 기사 {len(new_articles_list)}건이 Slack에 전송되었습니다.")

        new_articles_df = pd.DataFrame(new_articles_list, columns=["time", "date", "title", "content"])
        #json_output = convert_articles_to_json(new_articles_df)
        # 필요 시 json_output 사용
        
    # 최종 CSV 저장
    new_articles_df = pd.DataFrame(new_articles_list, columns=["time", "date", "title", "content"])
    final_df = pd.concat([existing_df, new_articles_df], ignore_index=True)
    final_df.drop_duplicates(subset=["time", "date", "title", "content"], inplace=True)
    final_df.to_csv(csv_path, index=False, encoding="utf-8-sig")
    print(f"[INFO] CSV 저장 완료 (총 {len(final_df)}건).")



def run_autoupdate(interval = 300, channel_name: str = CHANNEL_NAME):
    news_folder = "NEWS"
    os.makedirs(news_folder, exist_ok=True)
    csv_path = os.path.join(news_folder, "my_news.csv")
    
    print(f"[INFO] 자동 업데이트 시작 ({int(interval/60)}분 간격)")
    last_date = datetime.date.today()

    try:
        while True:
            now = datetime.datetime.now()
            today_date = now.date()
            
            # (A) 날짜가 바뀌었으면 CSV 초기화
            if today_date != last_date:
                if os.path.exists(csv_path):
                    os.remove(csv_path)
                    print(f"[INFO] 날짜 변경으로 CSV({csv_path})를 초기화했습니다.")
                last_date = today_date

            print(f"\n[INFO] 뉴스 확인 중... ({now.strftime('%Y-%m-%d %H:%M:%S')})")
            
            # (B) 크롤링 -> 파싱
            raw_text = crawl_with_selenium()
            df_new = parse_news_text(raw_text)
            
            # (C) 새 기사만 Slack 전송 후, CSV 병합 저장
            process_and_send(df_new, csv_path, channel_name)

            print(f"[INFO] {int(interval/60)}분 뒤 다음 업데이트 진행...")
            time.sleep(interval)
            
    except KeyboardInterrupt:
        print("[INFO] 자동 업데이트 종료 (KeyboardInterrupt).")

# def convert_articles_to_json(df):
#     """
#     DataFrame을 JSON 문자열로 변환.
#     (한글 깨짐 방지를 위해 ensure_ascii=False 사용)
#     """
#     # DataFrame -> list of dict
#     data_list = df.to_dict(orient="records")

#     # dict/list -> JSON
#     json_str = json.dumps(data_list, ensure_ascii=False, indent=2)
#     return json_str


# 메인 실행
if __name__ == "__main__":
    run_autoupdate(interval=600, channel_name=CHANNEL_NAME)


[INFO] 자동 업데이트 시작 (10분 간격)

[INFO] 뉴스 확인 중... (2025-01-26 17:15:02)
[INFO] 새 기사 1건이 Slack에 전송되었습니다.
[INFO] CSV 저장 완료 (총 11건).
[INFO] 10분 뒤 다음 업데이트 진행...
