# Data Scheduling 실습

python schedule 모듈은 파이썬에서 작업(함수)을 일정 주기로 자동 실행할 수 있게 해주는 간단하고 직관적인 스케줄러 라이브러리입니다. 

In [None]:
import schedule
import time
from datetime import datetime

def say_hello():
    print(f"[{datetime.now()}] 안녕하세요! 3초마다 실행 중입니다 😊")

# 3초마다 실행 예약
schedule.every(3).seconds.do(say_hello)
# schedule.every().day.at("09:00").do(job)

while True:
    schedule.run_pending()  
    time.sleep(1)           

In [None]:
from dotenv import load_dotenv
import os
load_dotenv()
import requests

YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
YOUTUBE_API_URL = 'https://www.googleapis.com/youtube/v3'

def search_youtube_videos(query: str) :
    """유튜브에서 특정 키워드로 동영상을 검색하고 세부 정보를 가져옵니다"""
    try:
        # 1. 동영상 검색
        max_results: int = 5
        search_url = f"{YOUTUBE_API_URL}/search?part=snippet&q={requests.utils.quote(query)}&type=video&maxResults={max_results}&order=date&key={YOUTUBE_API_KEY}"
        print(f"Searching YouTube with URL: {search_url}")

        search_response = requests.get(search_url)
        search_data = search_response.json()
        video_ids = [item['id']['videoId'] for item in search_data.get('items', [])]

        if not video_ids:
            print("No videos found for the query.")
            return []

        video_details_url = f"{YOUTUBE_API_URL}/videos?part=snippet,statistics&id={','.join(video_ids)}&key={YOUTUBE_API_KEY}"
        print(f"영상 정보 가져오는 중: {video_details_url}")
        details_response = requests.get(video_details_url)
        details_response.raise_for_status()
        details_data = details_response.json()

        # 2. 동영상별 제목, 게시일, 채널명, 채널ID, 썸네일 URL, 조회수, 좋아요수, URL 추출
        videos = []
        for item in details_data.get('items', []):
            snippet = item.get('snippet', {})
            statistics = item.get('statistics', {})
            thumbnails = snippet.get('thumbnails', {})
            high_thumbnail = thumbnails.get('high', {}) 
            view_count = statistics.get('viewCount')
            like_count = statistics.get('likeCount')

            video_card = {
                "title": snippet.get('title', 'N/A'),
                "publishedDate": snippet.get('publishedAt', ''),
                "channelName": snippet.get('channelTitle', 'N/A'),
                "channelId": snippet.get('channelId', ''),
                "thumbnailUrl": high_thumbnail.get('url', ''),
                "viewCount": int(view_count) if view_count is not None else None,
                "likeCount": int(like_count) if like_count is not None else None,
                "url": f"https://www.youtube.com/watch?v={item.get('id', '')}",
            }
            videos.append(video_card)

        if not videos:
            print("No video details could be fetched.")
            return []

        return videos

    except Exception as e:
        print(f"Error: {e}")
        return []

In [None]:
import os
import time
import requests
import pandas as pd
import schedule
from datetime import datetime

def auto_collect_youtube_news():
    keyword = "챌린지"
    print(f"[{datetime.now()}] '{keyword}' 관련 영상 수집 중...")

    videos = search_youtube_videos(keyword)
    if not videos:
        print("새로운 영상이 없습니다.")
        return

    df_new = pd.DataFrame(videos)
    df_new["collected_at"] = datetime.now()

    csv_path = "youtube_ai_news.csv"

    if os.path.exists(csv_path):
        df_existing = pd.read_csv(csv_path)
        df_new = df_new[~df_new["url"].isin(df_existing["url"])]

    if not df_new.empty:
        df_new.to_csv(csv_path, mode="a", index=False, header=not os.path.exists(csv_path))
        print(f"{len(df_new)}개의 새 영상 저장 완료.")
    else:
        print("저장할 새로운 영상이 없습니다.")

schedule.clear()
schedule.every(1).minutes.do(auto_collect_youtube_news)

while True:
    try:
        schedule.run_pending()
        time.sleep(60)
    except Exception as e:
        print(f"에러 발생: {e}")
        time.sleep(60)
