<a href="https://colab.research.google.com/github/SeohyeonSunny/Crawling/blob/main/Youtube_%ED%81%AC%EB%A1%A4%EB%9F%AC_%EB%B9%84%EB%94%94%EC%98%A4_%EC%A0%95%EB%B3%B4_%2B_%EB%8C%93%EA%B8%80_%EC%A0%95%EB%B3%B4(id%2C_like).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import time
import itertools
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.colab import files
import calendar
from datetime import datetime

# 여러 프로젝트의 API 키 설정
API_KEYS = ['AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c', 'AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg', 'AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw', 'AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U']  # 필요한 만큼 추가
api_key_iter = itertools.cycle(API_KEYS)  # API 키 순환

# 각 키의 사용량 추적
api_usage = {key: 0 for key in API_KEYS}
api_quota = 10000  # YouTube Data API 할당량 (일반적으로 10,000)

def build_youtube_api():
    api_key = next(api_key_iter)
    print(f"Using API key: {api_key} (usage: {api_usage[api_key]}/{api_quota})")
    return build('youtube', 'v3', developerKey=api_key), api_key

def search_videos(query, start_date, end_date, region_code='KR'):
    videos = []
    next_page_token = None
    page_count = 0
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.search().list(
                part='snippet',
                q=query,
                type='video',
                publishedAfter=start_date.isoformat() + 'Z',
                publishedBefore=end_date.isoformat() + 'Z',
                maxResults=50,
                pageToken=next_page_token,
                regionCode=region_code  # 한국으로 제한
            )
            response = request.execute()
            videos.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 100  # 예시: 한 번의 요청이 100 쿼리를 사용한다고 가정
            page_count += 1
            print(f"Page {page_count} processed, total videos collected: {len(videos)}")
            if not next_page_token:
                break
            time.sleep(1)
        except HttpError as e:
            if e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)  # 잠시 대기 후 재시도
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)  # 잠시 대기 후 재시도
            else:
                raise e
    return videos

def get_video_info(video_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.videos().list(
                part='snippet,statistics',
                id=video_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def get_channel_info(channel_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.channels().list(
                part='snippet,statistics',
                id=channel_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def extract_comment_data(comments):
    comment_data = []
    for comment in comments:
        top_comment = comment['snippet']['topLevelComment']['snippet']
        comment_text = top_comment['textDisplay']
        comment_date = top_comment['publishedAt']
        author_channel_id = top_comment.get('authorChannelId', {}).get('value', None)
        like_count = top_comment.get('likeCount', 0)
        comment_data.append({
            'Comment': comment_text,
            'Comment Published At': comment_date,
            'Author Channel ID': author_channel_id,
            'Comment Likes': like_count
        })

        # 대댓글 가져오기
        if 'replies' in comment:
            for reply in comment['replies']['comments']:
                reply_snippet = reply['snippet']
                reply_text = reply_snippet['textDisplay']
                reply_date = reply_snippet['publishedAt']
                reply_author_channel_id = reply_snippet.get('authorChannelId', {}).get('value', None)
                reply_like_count = reply_snippet.get('likeCount', 0)
                comment_data.append({
                    'Comment': reply_text,
                    'Comment Published At': reply_date,
                    'Author Channel ID': reply_author_channel_id,
                    'Comment Likes': reply_like_count
                })

    return comment_data

def get_comments(video_id, max_comments=None):
    comments = []
    next_page_token = None
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                maxResults=100,  # 한 페이지에 최대 100개의 댓글
                pageToken=next_page_token
            )
            response = request.execute()
            comments.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            if not next_page_token or (max_comments and len(comments) >= max_comments):
                break
            time.sleep(1)
        except HttpError as e:
            error_reason = e.error_details[0]['reason']
            if error_reason == 'commentsDisabled':
                print(f"Comments are disabled for video ID {video_id}.")
                break
            elif e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e
    return comments

def save_data_to_csv(video_data, filename='youtube_combined_data.csv'):
    all_data = []
    for video in video_data:
        base_info = {
            'Video ID': video['Video ID'],
            'Video Title': video['Video Title'],
            'Channel Title': video['Channel Title'],
            'Views': video['Views'],
            'Likes': video['Likes'],
            'Comments': video['Comments'],
            'Published At': video['Published At'],
            'Subscribers': video['Subscribers']
        }
        for comment in video['Comments Data']:
            data = base_info.copy()
            data.update(comment)
            all_data.append(data)
        if not video['Comments Data']:  # 댓글이 없는 경우 기본 정보만 추가
            all_data.append(base_info)

    df_combined = pd.DataFrame(all_data)

    # CSV 파일로 저장 (UTF-8 인코딩, lineterminator 설정)
    df_combined.to_csv(filename, index=False, encoding='utf-8', lineterminator='\n')

    # 파일 다운로드
    files.download(filename)

def read_csv_in_chunks(filename, chunksize=10000):
    chunks = []
    for chunk in pd.read_csv(filename, chunksize=chunksize, encoding='utf-8', engine='python'):
        chunks.append(chunk)
    return pd.concat(chunks, ignore_index=True)

def contains_query(text, query):
    return query in text

def main():
    search_query = '난민'  # 검색할 키워드
    year = 2021  # 검색할 연도
    month = 8  # 검색할 월

    start_date = datetime(year, month, 1)
    last_day = calendar.monthrange(year, month)[1]
    end_date = datetime(year, month, last_day)

    print(f"Searching videos from {start_date} to {end_date}")

    # 새로운 데이터 수집을 위한 비어 있는 리스트 초기화
    all_videos = []
    total_videos_found = 0
    filtered_videos_found = 0

    try:
        videos = search_videos(search_query, start_date, end_date, region_code='KR')
        query_videos = [video for video in videos if contains_query(video['snippet']['title'], search_query)]
        all_videos.extend(query_videos)
        total_videos_found += len(videos)  # 검색된 모든 비디오 개수
        filtered_videos_found += len(query_videos)  # 쿼리에 포함된 비디오 개수
        print(f"Total videos found: {total_videos_found}")
        print(f"Filtered videos found: {filtered_videos_found}")

        # 주기적으로 데이터를 저장
        if len(all_videos) % 300 == 0:  # 예시로 300개 단위로 저장
            save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
    except HttpError as e:
        print(f"An error occurred: {e}")
        print("Saving collected data so far...")
        save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
        return

    video_data = []
    try:
        for index, video in enumerate(all_videos):
            video_info = {}
            video_id = video['id']['videoId']
            snippet_info = video['snippet']
            statistics_info = get_video_info(video_id)['statistics']
            channel_info = get_channel_info(snippet_info['channelId'])['statistics']

            # 비디오 정보 추가
            video_info['Video ID'] = video_id
            video_info['Video Title'] = snippet_info['title']
            video_info['Channel Title'] = snippet_info['channelTitle']
            video_info['Views'] = statistics_info.get('viewCount', 0)
            video_info['Likes'] = statistics_info.get('likeCount', 0)
            video_info['Comments'] = statistics_info.get('commentCount', 0)
            video_info['Published At'] = snippet_info['publishedAt']
            video_info['Subscribers'] = channel_info.get('subscriberCount', 0)

            print(f"Video Title: {video_info['Video Title']}")

            # 댓글 정보 추가
            comments = get_comments(video_id)
            video_comments = extract_comment_data(comments)

            video_info['Comments Data'] = video_comments
            video_data.append(video_info)

            # 주기적으로 데이터를 저장
            if (index + 1) % 100 == 0:  # 예시로 100개 단위로 저장
                save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')

    except HttpError as e:
        print(f"An error occurred while processing videos: {e}")
        print("Saving collected video data so far...")
        save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')
        return

    # 모든 비디오 정보를 데이터프레임으로 저장하고 파일 다운로드
    save_data_to_csv(video_data, f'youtube_combined_data_{year}_{month}.csv')

    # CSV 파일에 저장된 비디오 개수 확인
    df_combined = read_csv_in_chunks(f'youtube_combined_data_{year}_{month}.csv')
    csv_video_count = len(df_combined['Video ID'].unique())

    # 총 비디오 개수와 CSV 파일에 저장된 비디오 개수 비교
    print(f"Filtered videos found: {filtered_videos_found}")
    print(f"Total unique videos saved in CSV: {csv_video_count}")

if __name__ == "__main__":
    main()


Searching videos from 2021-08-01 00:00:00 to 2021-08-31 00:00:00
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 0/10000)
Page 1 processed, total videos collected: 50
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 0/10000)
Page 2 processed, total videos collected: 100
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 0/10000)
Page 3 processed, total videos collected: 150
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 0/10000)
Page 4 processed, total videos collected: 200
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 100/10000)
Page 5 processed, total videos collected: 250
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 100/10000)
Page 6 processed, total videos collected: 300
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 100/10000)
Page 7 processed, total videos collected: 350
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 100/10000)
Page 8 processed, total videos

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 406/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 307/10000)
Video Title: [평택]아프간 난민 평택으로 들끓는 지역 여론
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 307/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 407/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 407/10000)
Video Title: 미 “아프간 난민, 한국 등 미군기지 수용”…주한미군 “지시 받은 바 없어” / KBS 2021.08.22.
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 308/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 308/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 408/10000)
Video Title: [김주하의 &#39;그런데&#39;] 사전 분양이 만든 &#39;청약 난민&#39; - 2021.08.27 [MBN 종합뉴스]
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 408/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 309/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 



Video Title: 아프간 사태+난민+엠바고까지 논리의 흐름을 알아봅시다. | 로스쿨 면접 시사이슈 |  메가로스쿨 김종수 라이브 방송 | 로스쿨 준비생 | 법학전문대학원 면접 준비
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 427/10000)
Comments are disabled for video ID kpBKJLxhKys.
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 328/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 328/10000)
Video Title: 아프간 난민 한국 오나…수용 의견 엇갈려 [GOODTV NEWS 20210823]
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 428/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 427/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 329/10000)
Video Title: 아프카니스탄 난민 수용지는 / 중국 출산장려정책으로 전환 / 바이든 정부 한국주재대사는 누가? [황기식의 국제시장][210823 부산MBC 라디오 자갈치아지매]
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 329/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 429/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 428/10000)
Video Title: 올해만 55만



Video Title: 주일학교 친구들을 위한 난민과 이민 이야기(feat.동화책 읽어주는 신부님).
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 439/10000)
Comments are disabled for video ID yZFyD6bAIEw.
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 438/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 340/10000)
Video Title: [이슈톡] 에어비앤비의 선행, 아프간 난민 2만 명에 임시 숙소 (2021.08.26/뉴스투데이/MBC)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 340/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 439/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 439/10000)
Video Title: 위험지역인데 왜 선교하냐구요? 우리도 아프간 난민 받자구요?(아프간 선교 전문가 박종상 목사 인터뷰④)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 341/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 341/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 440/10000)
Video Title: 정부, 한국 도운 아프간인 국내 이송 대책 강구 중...난민 수용은 신중 / YTN
Using API key: AIzaSyCWyty



Video Title: [What is a refugee?]#refugee #난민 #영어책읽기
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 360/10000)
Comments are disabled for video ID qqDvAaTjYt0.
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 360/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 459/10000)
Video Title: [사사건건 플러스] 아프간 협력자 내일 입국…난민은?_210825(수)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 459/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 360/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 361/10000)
Video Title: 오현주 &quot;&#39;난민&#39; 두 글자 지우는 법무부 출입국관리법 시행령 개정은 꼼수이자 비겁한 행위&quot;
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 460/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 460/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 361/10000)
Video Title: 독서논술책이랑:난민 소년과 수상한 이웃-난민소년을 캠프로 돌려보내지 않고 함께 살려는 변호사와 이웃주민들의 따뜻한 마음~☺
Using API key: A



Comments are disabled for video ID pcFyKYF9Geo.
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 482/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 482/10000)
Video Title: 고향을 잃은 난민들 | 트위치 클립 일일 하이라이트 | 클립라이트
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 383/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 383/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 483/10000)
Video Title: [2021-08-23 월요 시사] 시청자 배심원 : &#39;아프가니스탄 난민 국내수용&#39; 찬반 토론
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 483/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 384/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 384/10000)
Video Title: [Busan Worldwide] 미얀마 난민 소녀가수 #완이화 | The Interview
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 484/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 484/10000)
Using API key: AIzaSyAIuT

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 489/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 390/10000)
Video Title: 평온한 일상 브이로그 | 난민 친구들이랑 영어 공부. 메밀국수. MANGO 언박싱. 투움바 파스타. 여름의 끝자락에 마치 폭풍전야같은 일상
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 390/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 490/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 490/10000)
Video Title: 버지니아, 매릴랜드 등 20여개주 아프간 난민 도착 시작
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 391/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 391/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 491/10000)
Video Title: [국회톡] 아프간 난민 수용 협의... &quot;가능성 전혀 없다&quot;
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 491/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 392/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 392/

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Filtered videos found: 290
Total unique videos saved in CSV: 611


In [None]:
import pandas as pd
import time
import itertools
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.colab import files
import calendar
from datetime import datetime

# 여러 프로젝트의 API 키 설정
API_KEYS = ['AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c', 'AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg', 'AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw', 'AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U']  # 필요한 만큼 추가
api_key_iter = itertools.cycle(API_KEYS)  # API 키 순환

# 각 키의 사용량 추적
api_usage = {key: 0 for key in API_KEYS}
api_quota = 10000  # YouTube Data API 할당량 (일반적으로 10,000)

def build_youtube_api():
    api_key = next(api_key_iter)
    print(f"Using API key: {api_key} (usage: {api_usage[api_key]}/{api_quota})")
    return build('youtube', 'v3', developerKey=api_key), api_key

def search_videos(query, start_date, end_date, region_code='KR'):
    videos = []
    next_page_token = None
    page_count = 0
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.search().list(
                part='snippet',
                q=query,
                type='video',
                publishedAfter=start_date.isoformat() + 'Z',
                publishedBefore=end_date.isoformat() + 'Z',
                maxResults=50,
                pageToken=next_page_token,
                regionCode=region_code  # 한국으로 제한
            )
            response = request.execute()
            videos.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 100  # 예시: 한 번의 요청이 100 쿼리를 사용한다고 가정
            page_count += 1
            print(f"Page {page_count} processed, total videos collected: {len(videos)}")
            if not next_page_token:
                break
            time.sleep(1)
        except HttpError as e:
            if e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)  # 잠시 대기 후 재시도
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)  # 잠시 대기 후 재시도
            else:
                raise e
    return videos

def get_video_info(video_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.videos().list(
                part='snippet,statistics',
                id=video_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def get_channel_info(channel_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.channels().list(
                part='snippet,statistics',
                id=channel_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def extract_comment_data(comments):
    comment_data = []
    for comment in comments:
        top_comment = comment['snippet']['topLevelComment']['snippet']
        comment_text = top_comment['textDisplay']
        comment_date = top_comment['publishedAt']
        author_channel_id = top_comment.get('authorChannelId', {}).get('value', None)
        like_count = top_comment.get('likeCount', 0)
        comment_data.append({
            'Comment': comment_text,
            'Comment Published At': comment_date,
            'Author Channel ID': author_channel_id,
            'Comment Likes': like_count
        })

        # 대댓글 가져오기
        if 'replies' in comment:
            for reply in comment['replies']['comments']:
                reply_snippet = reply['snippet']
                reply_text = reply_snippet['textDisplay']
                reply_date = reply_snippet['publishedAt']
                reply_author_channel_id = reply_snippet.get('authorChannelId', {}).get('value', None)
                reply_like_count = reply_snippet.get('likeCount', 0)
                comment_data.append({
                    'Comment': reply_text,
                    'Comment Published At': reply_date,
                    'Author Channel ID': reply_author_channel_id,
                    'Comment Likes': reply_like_count
                })

    return comment_data

def get_comments(video_id, max_comments=None):
    comments = []
    next_page_token = None
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                maxResults=100,  # 한 페이지에 최대 100개의 댓글
                pageToken=next_page_token
            )
            response = request.execute()
            comments.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            if not next_page_token or (max_comments and len(comments) >= max_comments):
                break
            time.sleep(1)
        except HttpError as e:
            error_reason = e.error_details[0]['reason']
            if error_reason == 'commentsDisabled':
                print(f"Comments are disabled for video ID {video_id}.")
                break
            elif e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e
    return comments

def save_data_to_csv(video_data, filename='youtube_combined_data.csv'):
    all_data = []
    for video in video_data:
        base_info = {
            'Video ID': video['Video ID'],
            'Video Title': video['Video Title'],
            'Channel Title': video['Channel Title'],
            'Views': video['Views'],
            'Likes': video['Likes'],
            'Comments': video['Comments'],
            'Published At': video['Published At'],
            'Subscribers': video['Subscribers']
        }
        for comment in video['Comments Data']:
            data = base_info.copy()
            data.update(comment)
            all_data.append(data)
        if not video['Comments Data']:  # 댓글이 없는 경우 기본 정보만 추가
            all_data.append(base_info)

    df_combined = pd.DataFrame(all_data)

    # CSV 파일로 저장 (UTF-8 인코딩, lineterminator 설정)
    df_combined.to_csv(filename, index=False, encoding='utf-8', lineterminator='\n')

    # 파일 다운로드
    files.download(filename)

def read_csv_in_chunks(filename, chunksize=10000):
    chunks = []
    for chunk in pd.read_csv(filename, chunksize=chunksize, encoding='utf-8', engine='python'):
        chunks.append(chunk)
    return pd.concat(chunks, ignore_index=True)

def contains_query(text, query):
    return query in text

def main():
    search_query = '난민'  # 검색할 키워드
    year = 2017  # 검색할 연도
    month = 4  # 검색할 월

    start_date = datetime(year, month, 1)
    last_day = calendar.monthrange(year, month)[1]
    end_date = datetime(year, month, last_day)

    print(f"Searching videos from {start_date} to {end_date}")

    # 새로운 데이터 수집을 위한 비어 있는 리스트 초기화
    all_videos = []
    total_videos_found = 0
    filtered_videos_found = 0

    try:
        videos = search_videos(search_query, start_date, end_date, region_code='KR')
        query_videos = [video for video in videos if contains_query(video['snippet']['title'], search_query)]
        all_videos.extend(query_videos)
        total_videos_found += len(videos)  # 검색된 모든 비디오 개수
        filtered_videos_found += len(query_videos)  # 쿼리에 포함된 비디오 개수
        print(f"Total videos found: {total_videos_found}")
        print(f"Filtered videos found: {filtered_videos_found}")

        # 주기적으로 데이터를 저장
        if len(all_videos) % 300 == 0:  # 예시로 300개 단위로 저장
            save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
    except HttpError as e:
        print(f"An error occurred: {e}")
        print("Saving collected data so far...")
        save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
        return

    video_data = []
    try:
        for index, video in enumerate(all_videos):
            video_info = {}
            video_id = video['id']['videoId']
            snippet_info = video['snippet']
            statistics_info = get_video_info(video_id)['statistics']
            channel_info = get_channel_info(snippet_info['channelId'])['statistics']

            # 비디오 정보 추가
            video_info['Video ID'] = video_id
            video_info['Video Title'] = snippet_info['title']
            video_info['Channel Title'] = snippet_info['channelTitle']
            video_info['Views'] = statistics_info.get('viewCount', 0)
            video_info['Likes'] = statistics_info.get('likeCount', 0)
            video_info['Comments'] = statistics_info.get('commentCount', 0)
            video_info['Published At'] = snippet_info['publishedAt']
            video_info['Subscribers'] = channel_info.get('subscriberCount', 0)

            print(f"Video Title: {video_info['Video Title']}")

            # 댓글 정보 추가
            comments = get_comments(video_id)
            video_comments = extract_comment_data(comments)

            video_info['Comments Data'] = video_comments
            video_data.append(video_info)

            # 주기적으로 데이터를 저장
            if (index + 1) % 100 == 0:  # 예시로 100개 단위로 저장
                save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')

    except HttpError as e:
        print(f"An error occurred while processing videos: {e}")
        print("Saving collected video data so far...")
        save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')
        return

    # 모든 비디오 정보를 데이터프레임으로 저장하고 파일 다운로드
    save_data_to_csv(video_data, f'youtube_combined_data_{year}_{month}.csv')

    # CSV 파일에 저장된 비디오 개수 확인
    df_combined = read_csv_in_chunks(f'youtube_combined_data_{year}_{month}.csv')
    csv_video_count = len(df_combined['Video ID'].unique())

    # 총 비디오 개수와 CSV 파일에 저장된 비디오 개수 비교
    print(f"Filtered videos found: {filtered_videos_found}")
    print(f"Total unique videos saved in CSV: {csv_video_count}")

if __name__ == "__main__":
    main()


Searching videos from 2017-04-01 00:00:00 to 2017-04-30 00:00:00
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 0/10000)
Page 1 processed, total videos collected: 50
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 0/10000)
Page 2 processed, total videos collected: 100
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 0/10000)
Page 3 processed, total videos collected: 150
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 0/10000)
Page 4 processed, total videos collected: 200
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 100/10000)
Page 5 processed, total videos collected: 250
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 100/10000)
Page 6 processed, total videos collected: 300
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 100/10000)
Page 7 processed, total videos collected: 350
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 100/10000)
Page 8 processed, total videos

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Filtered videos found: 28
Total unique videos saved in CSV: 27


In [None]:
import pandas as pd
import time
import itertools
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.colab import files
import calendar
from datetime import datetime

# 여러 프로젝트의 API 키 설정
API_KEYS = ['AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c', 'AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg', 'AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw', 'AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U']  # 필요한 만큼 추가
api_key_iter = itertools.cycle(API_KEYS)  # API 키 순환

# 각 키의 사용량 추적
api_usage = {key: 0 for key in API_KEYS}
api_quota = 10000  # YouTube Data API 할당량 (일반적으로 10,000)

def build_youtube_api():
    api_key = next(api_key_iter)
    print(f"Using API key: {api_key} (usage: {api_usage[api_key]}/{api_quota})")
    return build('youtube', 'v3', developerKey=api_key), api_key

def search_videos(query, start_date, end_date, region_code='KR'):
    videos = []
    next_page_token = None
    page_count = 0
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.search().list(
                part='snippet',
                q=query,
                type='video',
                publishedAfter=start_date.isoformat() + 'Z',
                publishedBefore=end_date.isoformat() + 'Z',
                maxResults=50,
                pageToken=next_page_token,
                regionCode=region_code  # 한국으로 제한
            )
            response = request.execute()
            videos.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 100  # 예시: 한 번의 요청이 100 쿼리를 사용한다고 가정
            page_count += 1
            print(f"Page {page_count} processed, total videos collected: {len(videos)}")
            if not next_page_token:
                break
            time.sleep(1)
        except HttpError as e:
            if e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)  # 잠시 대기 후 재시도
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)  # 잠시 대기 후 재시도
            else:
                raise e
    return videos

def get_video_info(video_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.videos().list(
                part='snippet,statistics',
                id=video_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def get_channel_info(channel_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.channels().list(
                part='snippet,statistics',
                id=channel_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def extract_comment_data(comments):
    comment_data = []
    for comment in comments:
        top_comment = comment['snippet']['topLevelComment']['snippet']
        comment_text = top_comment['textDisplay']
        comment_date = top_comment['publishedAt']
        author_channel_id = top_comment.get('authorChannelId', {}).get('value', None)
        like_count = top_comment.get('likeCount', 0)
        comment_data.append({
            'Comment': comment_text,
            'Comment Published At': comment_date,
            'Author Channel ID': author_channel_id,
            'Comment Likes': like_count
        })

        # 대댓글 가져오기
        if 'replies' in comment:
            for reply in comment['replies']['comments']:
                reply_snippet = reply['snippet']
                reply_text = reply_snippet['textDisplay']
                reply_date = reply_snippet['publishedAt']
                reply_author_channel_id = reply_snippet.get('authorChannelId', {}).get('value', None)
                reply_like_count = reply_snippet.get('likeCount', 0)
                comment_data.append({
                    'Comment': reply_text,
                    'Comment Published At': reply_date,
                    'Author Channel ID': reply_author_channel_id,
                    'Comment Likes': reply_like_count
                })

    return comment_data

def get_comments(video_id, max_comments=None):
    comments = []
    next_page_token = None
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                maxResults=100,  # 한 페이지에 최대 100개의 댓글
                pageToken=next_page_token
            )
            response = request.execute()
            comments.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            if not next_page_token or (max_comments and len(comments) >= max_comments):
                break
            time.sleep(1)
        except HttpError as e:
            error_reason = e.error_details[0]['reason']
            if error_reason == 'commentsDisabled':
                print(f"Comments are disabled for video ID {video_id}.")
                break
            elif e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e
    return comments

def save_data_to_csv(video_data, filename='youtube_combined_data.csv'):
    all_data = []
    for video in video_data:
        base_info = {
            'Video ID': video['Video ID'],
            'Video Title': video['Video Title'],
            'Channel Title': video['Channel Title'],
            'Views': video['Views'],
            'Likes': video['Likes'],
            'Comments': video['Comments'],
            'Published At': video['Published At'],
            'Subscribers': video['Subscribers']
        }
        for comment in video['Comments Data']:
            data = base_info.copy()
            data.update(comment)
            all_data.append(data)
        if not video['Comments Data']:  # 댓글이 없는 경우 기본 정보만 추가
            all_data.append(base_info)

    df_combined = pd.DataFrame(all_data)

    # CSV 파일로 저장 (UTF-8 인코딩, lineterminator 설정)
    df_combined.to_csv(filename, index=False, encoding='utf-8', lineterminator='\n')

    # 파일 다운로드
    files.download(filename)

def read_csv_in_chunks(filename, chunksize=10000):
    chunks = []
    for chunk in pd.read_csv(filename, chunksize=chunksize, encoding='utf-8', engine='python'):
        chunks.append(chunk)
    return pd.concat(chunks, ignore_index=True)

def contains_query(text, query):
    return query in text

def main():
    search_query = '난민'  # 검색할 키워드
    year = 2017  # 검색할 연도
    month = 5  # 검색할 월

    start_date = datetime(year, month, 1)
    last_day = calendar.monthrange(year, month)[1]
    end_date = datetime(year, month, last_day)

    print(f"Searching videos from {start_date} to {end_date}")

    # 새로운 데이터 수집을 위한 비어 있는 리스트 초기화
    all_videos = []
    total_videos_found = 0
    filtered_videos_found = 0

    try:
        videos = search_videos(search_query, start_date, end_date, region_code='KR')
        query_videos = [video for video in videos if contains_query(video['snippet']['title'], search_query)]
        all_videos.extend(query_videos)
        total_videos_found += len(videos)  # 검색된 모든 비디오 개수
        filtered_videos_found += len(query_videos)  # 쿼리에 포함된 비디오 개수
        print(f"Total videos found: {total_videos_found}")
        print(f"Filtered videos found: {filtered_videos_found}")

        # 주기적으로 데이터를 저장
        if len(all_videos) % 300 == 0:  # 예시로 300개 단위로 저장
            save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
    except HttpError as e:
        print(f"An error occurred: {e}")
        print("Saving collected data so far...")
        save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
        return

    video_data = []
    try:
        for index, video in enumerate(all_videos):
            video_info = {}
            video_id = video['id']['videoId']
            snippet_info = video['snippet']
            statistics_info = get_video_info(video_id)['statistics']
            channel_info = get_channel_info(snippet_info['channelId'])['statistics']

            # 비디오 정보 추가
            video_info['Video ID'] = video_id
            video_info['Video Title'] = snippet_info['title']
            video_info['Channel Title'] = snippet_info['channelTitle']
            video_info['Views'] = statistics_info.get('viewCount', 0)
            video_info['Likes'] = statistics_info.get('likeCount', 0)
            video_info['Comments'] = statistics_info.get('commentCount', 0)
            video_info['Published At'] = snippet_info['publishedAt']
            video_info['Subscribers'] = channel_info.get('subscriberCount', 0)

            print(f"Video Title: {video_info['Video Title']}")

            # 댓글 정보 추가
            comments = get_comments(video_id)
            video_comments = extract_comment_data(comments)

            video_info['Comments Data'] = video_comments
            video_data.append(video_info)

            # 주기적으로 데이터를 저장
            if (index + 1) % 100 == 0:  # 예시로 100개 단위로 저장
                save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')

    except HttpError as e:
        print(f"An error occurred while processing videos: {e}")
        print("Saving collected video data so far...")
        save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')
        return

    # 모든 비디오 정보를 데이터프레임으로 저장하고 파일 다운로드
    save_data_to_csv(video_data, f'youtube_combined_data_{year}_{month}.csv')

    # CSV 파일에 저장된 비디오 개수 확인
    df_combined = read_csv_in_chunks(f'youtube_combined_data_{year}_{month}.csv')
    csv_video_count = len(df_combined['Video ID'].unique())

    # 총 비디오 개수와 CSV 파일에 저장된 비디오 개수 비교
    print(f"Filtered videos found: {filtered_videos_found}")
    print(f"Total unique videos saved in CSV: {csv_video_count}")

if __name__ == "__main__":
    main()


Searching videos from 2017-05-01 00:00:00 to 2017-05-31 00:00:00
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 0/10000)
Page 1 processed, total videos collected: 50
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 0/10000)
Page 2 processed, total videos collected: 100
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 0/10000)
Page 3 processed, total videos collected: 150
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 0/10000)
Page 4 processed, total videos collected: 174
Total videos found: 174
Filtered videos found: 16
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 100/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 100/10000)
Video Title: [세상교과서] 지금 난민에게 가장 필요한 것은? / YTN
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 100/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 100/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 101/10000

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Filtered videos found: 16
Total unique videos saved in CSV: 16


In [None]:
import pandas as pd
import time
import itertools
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.colab import files
import calendar
from datetime import datetime

# 여러 프로젝트의 API 키 설정
API_KEYS = ['AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c', 'AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg', 'AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw', 'AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U']  # 필요한 만큼 추가
api_key_iter = itertools.cycle(API_KEYS)  # API 키 순환

# 각 키의 사용량 추적
api_usage = {key: 0 for key in API_KEYS}
api_quota = 10000  # YouTube Data API 할당량 (일반적으로 10,000)

def build_youtube_api():
    api_key = next(api_key_iter)
    print(f"Using API key: {api_key} (usage: {api_usage[api_key]}/{api_quota})")
    return build('youtube', 'v3', developerKey=api_key), api_key

def search_videos(query, start_date, end_date, region_code='KR'):
    videos = []
    next_page_token = None
    page_count = 0
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.search().list(
                part='snippet',
                q=query,
                type='video',
                publishedAfter=start_date.isoformat() + 'Z',
                publishedBefore=end_date.isoformat() + 'Z',
                maxResults=50,
                pageToken=next_page_token,
                regionCode=region_code  # 한국으로 제한
            )
            response = request.execute()
            videos.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 100  # 예시: 한 번의 요청이 100 쿼리를 사용한다고 가정
            page_count += 1
            print(f"Page {page_count} processed, total videos collected: {len(videos)}")
            if not next_page_token:
                break
            time.sleep(1)
        except HttpError as e:
            if e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)  # 잠시 대기 후 재시도
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)  # 잠시 대기 후 재시도
            else:
                raise e
    return videos

def get_video_info(video_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.videos().list(
                part='snippet,statistics',
                id=video_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def get_channel_info(channel_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.channels().list(
                part='snippet,statistics',
                id=channel_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def extract_comment_data(comments):
    comment_data = []
    for comment in comments:
        top_comment = comment['snippet']['topLevelComment']['snippet']
        comment_text = top_comment['textDisplay']
        comment_date = top_comment['publishedAt']
        author_channel_id = top_comment.get('authorChannelId', {}).get('value', None)
        like_count = top_comment.get('likeCount', 0)
        comment_data.append({
            'Comment': comment_text,
            'Comment Published At': comment_date,
            'Author Channel ID': author_channel_id,
            'Comment Likes': like_count
        })

        # 대댓글 가져오기
        if 'replies' in comment:
            for reply in comment['replies']['comments']:
                reply_snippet = reply['snippet']
                reply_text = reply_snippet['textDisplay']
                reply_date = reply_snippet['publishedAt']
                reply_author_channel_id = reply_snippet.get('authorChannelId', {}).get('value', None)
                reply_like_count = reply_snippet.get('likeCount', 0)
                comment_data.append({
                    'Comment': reply_text,
                    'Comment Published At': reply_date,
                    'Author Channel ID': reply_author_channel_id,
                    'Comment Likes': reply_like_count
                })

    return comment_data

def get_comments(video_id, max_comments=None):
    comments = []
    next_page_token = None
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                maxResults=100,  # 한 페이지에 최대 100개의 댓글
                pageToken=next_page_token
            )
            response = request.execute()
            comments.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            if not next_page_token or (max_comments and len(comments) >= max_comments):
                break
            time.sleep(1)
        except HttpError as e:
            error_reason = e.error_details[0]['reason']
            if error_reason == 'commentsDisabled':
                print(f"Comments are disabled for video ID {video_id}.")
                break
            elif e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e
    return comments

def save_data_to_csv(video_data, filename='youtube_combined_data.csv'):
    all_data = []
    for video in video_data:
        base_info = {
            'Video ID': video['Video ID'],
            'Video Title': video['Video Title'],
            'Channel Title': video['Channel Title'],
            'Views': video['Views'],
            'Likes': video['Likes'],
            'Comments': video['Comments'],
            'Published At': video['Published At'],
            'Subscribers': video['Subscribers']
        }
        for comment in video['Comments Data']:
            data = base_info.copy()
            data.update(comment)
            all_data.append(data)
        if not video['Comments Data']:  # 댓글이 없는 경우 기본 정보만 추가
            all_data.append(base_info)

    df_combined = pd.DataFrame(all_data)

    # CSV 파일로 저장 (UTF-8 인코딩, lineterminator 설정)
    df_combined.to_csv(filename, index=False, encoding='utf-8', lineterminator='\n')

    # 파일 다운로드
    files.download(filename)

def read_csv_in_chunks(filename, chunksize=10000):
    chunks = []
    for chunk in pd.read_csv(filename, chunksize=chunksize, encoding='utf-8', engine='python'):
        chunks.append(chunk)
    return pd.concat(chunks, ignore_index=True)

def contains_query(text, query):
    return query in text

def main():
    search_query = '난민'  # 검색할 키워드
    year = 2017  # 검색할 연도
    month = 6  # 검색할 월

    start_date = datetime(year, month, 1)
    last_day = calendar.monthrange(year, month)[1]
    end_date = datetime(year, month, last_day)

    print(f"Searching videos from {start_date} to {end_date}")

    # 새로운 데이터 수집을 위한 비어 있는 리스트 초기화
    all_videos = []
    total_videos_found = 0
    filtered_videos_found = 0

    try:
        videos = search_videos(search_query, start_date, end_date, region_code='KR')
        query_videos = [video for video in videos if contains_query(video['snippet']['title'], search_query)]
        all_videos.extend(query_videos)
        total_videos_found += len(videos)  # 검색된 모든 비디오 개수
        filtered_videos_found += len(query_videos)  # 쿼리에 포함된 비디오 개수
        print(f"Total videos found: {total_videos_found}")
        print(f"Filtered videos found: {filtered_videos_found}")

        # 주기적으로 데이터를 저장
        if len(all_videos) % 300 == 0:  # 예시로 300개 단위로 저장
            save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
    except HttpError as e:
        print(f"An error occurred: {e}")
        print("Saving collected data so far...")
        save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
        return

    video_data = []
    try:
        for index, video in enumerate(all_videos):
            video_info = {}
            video_id = video['id']['videoId']
            snippet_info = video['snippet']
            statistics_info = get_video_info(video_id)['statistics']
            channel_info = get_channel_info(snippet_info['channelId'])['statistics']

            # 비디오 정보 추가
            video_info['Video ID'] = video_id
            video_info['Video Title'] = snippet_info['title']
            video_info['Channel Title'] = snippet_info['channelTitle']
            video_info['Views'] = statistics_info.get('viewCount', 0)
            video_info['Likes'] = statistics_info.get('likeCount', 0)
            video_info['Comments'] = statistics_info.get('commentCount', 0)
            video_info['Published At'] = snippet_info['publishedAt']
            video_info['Subscribers'] = channel_info.get('subscriberCount', 0)

            print(f"Video Title: {video_info['Video Title']}")

            # 댓글 정보 추가
            comments = get_comments(video_id)
            video_comments = extract_comment_data(comments)

            video_info['Comments Data'] = video_comments
            video_data.append(video_info)

            # 주기적으로 데이터를 저장
            if (index + 1) % 100 == 0:  # 예시로 100개 단위로 저장
                save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')

    except HttpError as e:
        print(f"An error occurred while processing videos: {e}")
        print("Saving collected video data so far...")
        save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')
        return

    # 모든 비디오 정보를 데이터프레임으로 저장하고 파일 다운로드
    save_data_to_csv(video_data, f'youtube_combined_data_{year}_{month}.csv')

    # CSV 파일에 저장된 비디오 개수 확인
    df_combined = read_csv_in_chunks(f'youtube_combined_data_{year}_{month}.csv')
    csv_video_count = len(df_combined['Video ID'].unique())

    # 총 비디오 개수와 CSV 파일에 저장된 비디오 개수 비교
    print(f"Filtered videos found: {filtered_videos_found}")
    print(f"Total unique videos saved in CSV: {csv_video_count}")

if __name__ == "__main__":
    main()


Searching videos from 2017-06-01 00:00:00 to 2017-06-30 00:00:00
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 0/10000)
Page 1 processed, total videos collected: 50
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 0/10000)
Page 2 processed, total videos collected: 100
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 0/10000)
Page 3 processed, total videos collected: 150
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 0/10000)
Page 4 processed, total videos collected: 200
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 100/10000)
Page 5 processed, total videos collected: 241
Total videos found: 241
Filtered videos found: 40
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 100/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 100/10000)
Video Title: 브로커가 보트 엔진 훔쳐 지중해 난민 126명 사망 / SBS
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 100/10000)
Using API key: AIzaSyAd6

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Filtered videos found: 40
Total unique videos saved in CSV: 39


In [None]:
import pandas as pd
import time
import itertools
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.colab import files
import calendar
from datetime import datetime

# 여러 프로젝트의 API 키 설정
API_KEYS = ['AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c', 'AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg', 'AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw', 'AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U']  # 필요한 만큼 추가
api_key_iter = itertools.cycle(API_KEYS)  # API 키 순환

# 각 키의 사용량 추적
api_usage = {key: 0 for key in API_KEYS}
api_quota = 10000  # YouTube Data API 할당량 (일반적으로 10,000)

def build_youtube_api():
    api_key = next(api_key_iter)
    print(f"Using API key: {api_key} (usage: {api_usage[api_key]}/{api_quota})")
    return build('youtube', 'v3', developerKey=api_key), api_key

def search_videos(query, start_date, end_date, region_code='KR'):
    videos = []
    next_page_token = None
    page_count = 0
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.search().list(
                part='snippet',
                q=query,
                type='video',
                publishedAfter=start_date.isoformat() + 'Z',
                publishedBefore=end_date.isoformat() + 'Z',
                maxResults=50,
                pageToken=next_page_token,
                regionCode=region_code  # 한국으로 제한
            )
            response = request.execute()
            videos.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 100  # 예시: 한 번의 요청이 100 쿼리를 사용한다고 가정
            page_count += 1
            print(f"Page {page_count} processed, total videos collected: {len(videos)}")
            if not next_page_token:
                break
            time.sleep(1)
        except HttpError as e:
            if e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)  # 잠시 대기 후 재시도
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)  # 잠시 대기 후 재시도
            else:
                raise e
    return videos

def get_video_info(video_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.videos().list(
                part='snippet,statistics',
                id=video_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def get_channel_info(channel_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.channels().list(
                part='snippet,statistics',
                id=channel_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def extract_comment_data(comments):
    comment_data = []
    for comment in comments:
        top_comment = comment['snippet']['topLevelComment']['snippet']
        comment_text = top_comment['textDisplay']
        comment_date = top_comment['publishedAt']
        author_channel_id = top_comment.get('authorChannelId', {}).get('value', None)
        like_count = top_comment.get('likeCount', 0)
        comment_data.append({
            'Comment': comment_text,
            'Comment Published At': comment_date,
            'Author Channel ID': author_channel_id,
            'Comment Likes': like_count
        })

        # 대댓글 가져오기
        if 'replies' in comment:
            for reply in comment['replies']['comments']:
                reply_snippet = reply['snippet']
                reply_text = reply_snippet['textDisplay']
                reply_date = reply_snippet['publishedAt']
                reply_author_channel_id = reply_snippet.get('authorChannelId', {}).get('value', None)
                reply_like_count = reply_snippet.get('likeCount', 0)
                comment_data.append({
                    'Comment': reply_text,
                    'Comment Published At': reply_date,
                    'Author Channel ID': reply_author_channel_id,
                    'Comment Likes': reply_like_count
                })

    return comment_data

def get_comments(video_id, max_comments=None):
    comments = []
    next_page_token = None
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                maxResults=100,  # 한 페이지에 최대 100개의 댓글
                pageToken=next_page_token
            )
            response = request.execute()
            comments.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            if not next_page_token or (max_comments and len(comments) >= max_comments):
                break
            time.sleep(1)
        except HttpError as e:
            error_reason = e.error_details[0]['reason']
            if error_reason == 'commentsDisabled':
                print(f"Comments are disabled for video ID {video_id}.")
                break
            elif e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e
    return comments

def save_data_to_csv(video_data, filename='youtube_combined_data.csv'):
    all_data = []
    for video in video_data:
        base_info = {
            'Video ID': video['Video ID'],
            'Video Title': video['Video Title'],
            'Channel Title': video['Channel Title'],
            'Views': video['Views'],
            'Likes': video['Likes'],
            'Comments': video['Comments'],
            'Published At': video['Published At'],
            'Subscribers': video['Subscribers']
        }
        for comment in video['Comments Data']:
            data = base_info.copy()
            data.update(comment)
            all_data.append(data)
        if not video['Comments Data']:  # 댓글이 없는 경우 기본 정보만 추가
            all_data.append(base_info)

    df_combined = pd.DataFrame(all_data)

    # CSV 파일로 저장 (UTF-8 인코딩, lineterminator 설정)
    df_combined.to_csv(filename, index=False, encoding='utf-8', lineterminator='\n')

    # 파일 다운로드
    files.download(filename)

def read_csv_in_chunks(filename, chunksize=10000):
    chunks = []
    for chunk in pd.read_csv(filename, chunksize=chunksize, encoding='utf-8', engine='python'):
        chunks.append(chunk)
    return pd.concat(chunks, ignore_index=True)

def contains_query(text, query):
    return query in text

def main():
    search_query = '난민'  # 검색할 키워드
    year = 2017  # 검색할 연도
    month = 7  # 검색할 월

    start_date = datetime(year, month, 1)
    last_day = calendar.monthrange(year, month)[1]
    end_date = datetime(year, month, last_day)

    print(f"Searching videos from {start_date} to {end_date}")

    # 새로운 데이터 수집을 위한 비어 있는 리스트 초기화
    all_videos = []
    total_videos_found = 0
    filtered_videos_found = 0

    try:
        videos = search_videos(search_query, start_date, end_date, region_code='KR')
        query_videos = [video for video in videos if contains_query(video['snippet']['title'], search_query)]
        all_videos.extend(query_videos)
        total_videos_found += len(videos)  # 검색된 모든 비디오 개수
        filtered_videos_found += len(query_videos)  # 쿼리에 포함된 비디오 개수
        print(f"Total videos found: {total_videos_found}")
        print(f"Filtered videos found: {filtered_videos_found}")

        # 주기적으로 데이터를 저장
        if len(all_videos) % 300 == 0:  # 예시로 300개 단위로 저장
            save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
    except HttpError as e:
        print(f"An error occurred: {e}")
        print("Saving collected data so far...")
        save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
        return

    video_data = []
    try:
        for index, video in enumerate(all_videos):
            video_info = {}
            video_id = video['id']['videoId']
            snippet_info = video['snippet']
            statistics_info = get_video_info(video_id)['statistics']
            channel_info = get_channel_info(snippet_info['channelId'])['statistics']

            # 비디오 정보 추가
            video_info['Video ID'] = video_id
            video_info['Video Title'] = snippet_info['title']
            video_info['Channel Title'] = snippet_info['channelTitle']
            video_info['Views'] = statistics_info.get('viewCount', 0)
            video_info['Likes'] = statistics_info.get('likeCount', 0)
            video_info['Comments'] = statistics_info.get('commentCount', 0)
            video_info['Published At'] = snippet_info['publishedAt']
            video_info['Subscribers'] = channel_info.get('subscriberCount', 0)

            print(f"Video Title: {video_info['Video Title']}")

            # 댓글 정보 추가
            comments = get_comments(video_id)
            video_comments = extract_comment_data(comments)

            video_info['Comments Data'] = video_comments
            video_data.append(video_info)

            # 주기적으로 데이터를 저장
            if (index + 1) % 100 == 0:  # 예시로 100개 단위로 저장
                save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')

    except HttpError as e:
        print(f"An error occurred while processing videos: {e}")
        print("Saving collected video data so far...")
        save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')
        return

    # 모든 비디오 정보를 데이터프레임으로 저장하고 파일 다운로드
    save_data_to_csv(video_data, f'youtube_combined_data_{year}_{month}.csv')

    # CSV 파일에 저장된 비디오 개수 확인
    df_combined = read_csv_in_chunks(f'youtube_combined_data_{year}_{month}.csv')
    csv_video_count = len(df_combined['Video ID'].unique())

    # 총 비디오 개수와 CSV 파일에 저장된 비디오 개수 비교
    print(f"Filtered videos found: {filtered_videos_found}")
    print(f"Total unique videos saved in CSV: {csv_video_count}")

if __name__ == "__main__":
    main()


Searching videos from 2017-07-01 00:00:00 to 2017-07-31 00:00:00
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 0/10000)
Page 1 processed, total videos collected: 50
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 0/10000)
Page 2 processed, total videos collected: 100
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 0/10000)
Page 3 processed, total videos collected: 150
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 0/10000)
Page 4 processed, total videos collected: 200
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 100/10000)
Page 5 processed, total videos collected: 200
Total videos found: 200
Filtered videos found: 51
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 100/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 100/10000)
Video Title: 미얀마 난민 “KBS 드라마 보며 새로운 삶 꿈꿨어요”
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 100/10000)
Using API key: AIzaSyAd6idgr



Video Title: 월드비전  &#39;난민이 된 아이들&#39;_난민이해교육 캠페인 애니메이션_샌드아트 박진아
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 112/10000)
Comments are disabled for video ID 3AJpTaylbio.
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 212/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 113/10000)
Video Title: [유엔난민기구] 2016 정우성 친선대사 레바논 미션 - Day 3
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 113/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 112/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 213/10000)
Video Title: 🚨 옥스팜 이라크 난민 긴급구호 #내전 #전쟁
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 114/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 114/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 113/10000)
Video Title: [유엔난민기구] 난민 아이들이 미래의 꿈에 다가갈 수 있도록 도와주세요
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 214/10000)




Comments are disabled for video ID tcnwiKbHndg.
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 115/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 115/10000)
Video Title: [유엔난민기구] 작가 할레드 호세이니가 한국 후원자들에게 보내는 메시지
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 114/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 214/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 116/10000)
Video Title: [유엔난민기구] 유엔난민기구의 성폭력 예방 및 대응
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 116/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 115/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 215/10000)
Video Title: 레바논 내 시리아 난민 150만 명@CGN투데이(2017.7.12)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 117/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 117/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 116

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Filtered videos found: 51
Total unique videos saved in CSV: 52


In [None]:
import pandas as pd
import time
import itertools
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.colab import files
import calendar
from datetime import datetime

# 여러 프로젝트의 API 키 설정
API_KEYS = ['AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c', 'AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg', 'AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw', 'AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U']  # 필요한 만큼 추가
api_key_iter = itertools.cycle(API_KEYS)  # API 키 순환

# 각 키의 사용량 추적
api_usage = {key: 0 for key in API_KEYS}
api_quota = 10000  # YouTube Data API 할당량 (일반적으로 10,000)

def build_youtube_api():
    api_key = next(api_key_iter)
    print(f"Using API key: {api_key} (usage: {api_usage[api_key]}/{api_quota})")
    return build('youtube', 'v3', developerKey=api_key), api_key

def search_videos(query, start_date, end_date, region_code='KR'):
    videos = []
    next_page_token = None
    page_count = 0
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.search().list(
                part='snippet',
                q=query,
                type='video',
                publishedAfter=start_date.isoformat() + 'Z',
                publishedBefore=end_date.isoformat() + 'Z',
                maxResults=50,
                pageToken=next_page_token,
                regionCode=region_code  # 한국으로 제한
            )
            response = request.execute()
            videos.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 100  # 예시: 한 번의 요청이 100 쿼리를 사용한다고 가정
            page_count += 1
            print(f"Page {page_count} processed, total videos collected: {len(videos)}")
            if not next_page_token:
                break
            time.sleep(1)
        except HttpError as e:
            if e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)  # 잠시 대기 후 재시도
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)  # 잠시 대기 후 재시도
            else:
                raise e
    return videos

def get_video_info(video_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.videos().list(
                part='snippet,statistics',
                id=video_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def get_channel_info(channel_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.channels().list(
                part='snippet,statistics',
                id=channel_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def extract_comment_data(comments):
    comment_data = []
    for comment in comments:
        top_comment = comment['snippet']['topLevelComment']['snippet']
        comment_text = top_comment['textDisplay']
        comment_date = top_comment['publishedAt']
        author_channel_id = top_comment.get('authorChannelId', {}).get('value', None)
        like_count = top_comment.get('likeCount', 0)
        comment_data.append({
            'Comment': comment_text,
            'Comment Published At': comment_date,
            'Author Channel ID': author_channel_id,
            'Comment Likes': like_count
        })

        # 대댓글 가져오기
        if 'replies' in comment:
            for reply in comment['replies']['comments']:
                reply_snippet = reply['snippet']
                reply_text = reply_snippet['textDisplay']
                reply_date = reply_snippet['publishedAt']
                reply_author_channel_id = reply_snippet.get('authorChannelId', {}).get('value', None)
                reply_like_count = reply_snippet.get('likeCount', 0)
                comment_data.append({
                    'Comment': reply_text,
                    'Comment Published At': reply_date,
                    'Author Channel ID': reply_author_channel_id,
                    'Comment Likes': reply_like_count
                })

    return comment_data

def get_comments(video_id, max_comments=None):
    comments = []
    next_page_token = None
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                maxResults=100,  # 한 페이지에 최대 100개의 댓글
                pageToken=next_page_token
            )
            response = request.execute()
            comments.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            if not next_page_token or (max_comments and len(comments) >= max_comments):
                break
            time.sleep(1)
        except HttpError as e:
            error_reason = e.error_details[0]['reason']
            if error_reason == 'commentsDisabled':
                print(f"Comments are disabled for video ID {video_id}.")
                break
            elif e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e
    return comments

def save_data_to_csv(video_data, filename='youtube_combined_data.csv'):
    all_data = []
    for video in video_data:
        base_info = {
            'Video ID': video['Video ID'],
            'Video Title': video['Video Title'],
            'Channel Title': video['Channel Title'],
            'Views': video['Views'],
            'Likes': video['Likes'],
            'Comments': video['Comments'],
            'Published At': video['Published At'],
            'Subscribers': video['Subscribers']
        }
        for comment in video['Comments Data']:
            data = base_info.copy()
            data.update(comment)
            all_data.append(data)
        if not video['Comments Data']:  # 댓글이 없는 경우 기본 정보만 추가
            all_data.append(base_info)

    df_combined = pd.DataFrame(all_data)

    # CSV 파일로 저장 (UTF-8 인코딩, lineterminator 설정)
    df_combined.to_csv(filename, index=False, encoding='utf-8', lineterminator='\n')

    # 파일 다운로드
    files.download(filename)

def read_csv_in_chunks(filename, chunksize=10000):
    chunks = []
    for chunk in pd.read_csv(filename, chunksize=chunksize, encoding='utf-8', engine='python'):
        chunks.append(chunk)
    return pd.concat(chunks, ignore_index=True)

def contains_query(text, query):
    return query in text

def main():
    search_query = '난민'  # 검색할 키워드
    year = 2017  # 검색할 연도
    month = 8  # 검색할 월

    start_date = datetime(year, month, 1)
    last_day = calendar.monthrange(year, month)[1]
    end_date = datetime(year, month, last_day)

    print(f"Searching videos from {start_date} to {end_date}")

    # 새로운 데이터 수집을 위한 비어 있는 리스트 초기화
    all_videos = []
    total_videos_found = 0
    filtered_videos_found = 0

    try:
        videos = search_videos(search_query, start_date, end_date, region_code='KR')
        query_videos = [video for video in videos if contains_query(video['snippet']['title'], search_query)]
        all_videos.extend(query_videos)
        total_videos_found += len(videos)  # 검색된 모든 비디오 개수
        filtered_videos_found += len(query_videos)  # 쿼리에 포함된 비디오 개수
        print(f"Total videos found: {total_videos_found}")
        print(f"Filtered videos found: {filtered_videos_found}")

        # 주기적으로 데이터를 저장
        if len(all_videos) % 300 == 0:  # 예시로 300개 단위로 저장
            save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
    except HttpError as e:
        print(f"An error occurred: {e}")
        print("Saving collected data so far...")
        save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
        return

    video_data = []
    try:
        for index, video in enumerate(all_videos):
            video_info = {}
            video_id = video['id']['videoId']
            snippet_info = video['snippet']
            statistics_info = get_video_info(video_id)['statistics']
            channel_info = get_channel_info(snippet_info['channelId'])['statistics']

            # 비디오 정보 추가
            video_info['Video ID'] = video_id
            video_info['Video Title'] = snippet_info['title']
            video_info['Channel Title'] = snippet_info['channelTitle']
            video_info['Views'] = statistics_info.get('viewCount', 0)
            video_info['Likes'] = statistics_info.get('likeCount', 0)
            video_info['Comments'] = statistics_info.get('commentCount', 0)
            video_info['Published At'] = snippet_info['publishedAt']
            video_info['Subscribers'] = channel_info.get('subscriberCount', 0)

            print(f"Video Title: {video_info['Video Title']}")

            # 댓글 정보 추가
            comments = get_comments(video_id)
            video_comments = extract_comment_data(comments)

            video_info['Comments Data'] = video_comments
            video_data.append(video_info)

            # 주기적으로 데이터를 저장
            if (index + 1) % 100 == 0:  # 예시로 100개 단위로 저장
                save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')

    except HttpError as e:
        print(f"An error occurred while processing videos: {e}")
        print("Saving collected video data so far...")
        save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')
        return

    # 모든 비디오 정보를 데이터프레임으로 저장하고 파일 다운로드
    save_data_to_csv(video_data, f'youtube_combined_data_{year}_{month}.csv')

    # CSV 파일에 저장된 비디오 개수 확인
    df_combined = read_csv_in_chunks(f'youtube_combined_data_{year}_{month}.csv')
    csv_video_count = len(df_combined['Video ID'].unique())

    # 총 비디오 개수와 CSV 파일에 저장된 비디오 개수 비교
    print(f"Filtered videos found: {filtered_videos_found}")
    print(f"Total unique videos saved in CSV: {csv_video_count}")

if __name__ == "__main__":
    main()


Searching videos from 2017-08-01 00:00:00 to 2017-08-31 00:00:00
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 0/10000)
Page 1 processed, total videos collected: 50
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 0/10000)
Page 2 processed, total videos collected: 100
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 0/10000)
Page 3 processed, total videos collected: 150
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 0/10000)
Page 4 processed, total videos collected: 200
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 100/10000)
Page 5 processed, total videos collected: 250
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 100/10000)
Page 6 processed, total videos collected: 300
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 100/10000)
Page 7 processed, total videos collected: 350
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 100/10000)
Page 8 processed, total videos



Comments are disabled for video ID CwVgqHLatlI.
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 203/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 204/10000)
Video Title: 그리스 난민들이 거리로 나선 이유 / SBS
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 204/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 203/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 204/10000)
Video Title: &#39;아랍의 봄&#39; 시위 주도한 요르단 남성 난민 인정
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 205/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 205/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 204/10000)
Video Title: 난민 밀입국업자들, 이틀간 280명 예멘 바다에 빠뜨려 / YTN
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 205/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 206/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 206/10000)
V



Video Title: [세계시민교육] 초등학생 난민이해교육
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 220/10000)
Comments are disabled for video ID eBz0r1PdM94.
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 221/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 222/10000)
Video Title: 0802클릭정보와이드_&quot;난민체험캠프, 그 현장에서 우리 아이들&quot;
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 222/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 220/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 222/10000)
Video Title: 【 탬탬버린／21 Days 4화 】 - 난민들의 삶을 보여주는 교훈 스토리 갓겜
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 223/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 223/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 221/10000)
Video Title: 【 탬탬버린／21 Days 2화 】 - 난민들의 삶을 보여주는 교훈 스토리 갓겜
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 223/10000)
Usi

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Filtered videos found: 42
Total unique videos saved in CSV: 42


In [None]:
import pandas as pd
import time
import itertools
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.colab import files
import calendar
from datetime import datetime

# 여러 프로젝트의 API 키 설정
API_KEYS = ['AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c', 'AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg', 'AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw', 'AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U']  # 필요한 만큼 추가
api_key_iter = itertools.cycle(API_KEYS)  # API 키 순환

# 각 키의 사용량 추적
api_usage = {key: 0 for key in API_KEYS}
api_quota = 10000  # YouTube Data API 할당량 (일반적으로 10,000)

def build_youtube_api():
    api_key = next(api_key_iter)
    print(f"Using API key: {api_key} (usage: {api_usage[api_key]}/{api_quota})")
    return build('youtube', 'v3', developerKey=api_key), api_key

def search_videos(query, start_date, end_date, region_code='KR'):
    videos = []
    next_page_token = None
    page_count = 0
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.search().list(
                part='snippet',
                q=query,
                type='video',
                publishedAfter=start_date.isoformat() + 'Z',
                publishedBefore=end_date.isoformat() + 'Z',
                maxResults=50,
                pageToken=next_page_token,
                regionCode=region_code  # 한국으로 제한
            )
            response = request.execute()
            videos.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 100  # 예시: 한 번의 요청이 100 쿼리를 사용한다고 가정
            page_count += 1
            print(f"Page {page_count} processed, total videos collected: {len(videos)}")
            if not next_page_token:
                break
            time.sleep(1)
        except HttpError as e:
            if e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)  # 잠시 대기 후 재시도
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)  # 잠시 대기 후 재시도
            else:
                raise e
    return videos

def get_video_info(video_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.videos().list(
                part='snippet,statistics',
                id=video_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def get_channel_info(channel_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.channels().list(
                part='snippet,statistics',
                id=channel_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def extract_comment_data(comments):
    comment_data = []
    for comment in comments:
        top_comment = comment['snippet']['topLevelComment']['snippet']
        comment_text = top_comment['textDisplay']
        comment_date = top_comment['publishedAt']
        author_channel_id = top_comment.get('authorChannelId', {}).get('value', None)
        like_count = top_comment.get('likeCount', 0)
        comment_data.append({
            'Comment': comment_text,
            'Comment Published At': comment_date,
            'Author Channel ID': author_channel_id,
            'Comment Likes': like_count
        })

        # 대댓글 가져오기
        if 'replies' in comment:
            for reply in comment['replies']['comments']:
                reply_snippet = reply['snippet']
                reply_text = reply_snippet['textDisplay']
                reply_date = reply_snippet['publishedAt']
                reply_author_channel_id = reply_snippet.get('authorChannelId', {}).get('value', None)
                reply_like_count = reply_snippet.get('likeCount', 0)
                comment_data.append({
                    'Comment': reply_text,
                    'Comment Published At': reply_date,
                    'Author Channel ID': reply_author_channel_id,
                    'Comment Likes': reply_like_count
                })

    return comment_data

def get_comments(video_id, max_comments=None):
    comments = []
    next_page_token = None
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                maxResults=100,  # 한 페이지에 최대 100개의 댓글
                pageToken=next_page_token
            )
            response = request.execute()
            comments.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            if not next_page_token or (max_comments and len(comments) >= max_comments):
                break
            time.sleep(1)
        except HttpError as e:
            error_reason = e.error_details[0]['reason']
            if error_reason == 'commentsDisabled':
                print(f"Comments are disabled for video ID {video_id}.")
                break
            elif e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e
    return comments

def save_data_to_csv(video_data, filename='youtube_combined_data.csv'):
    all_data = []
    for video in video_data:
        base_info = {
            'Video ID': video['Video ID'],
            'Video Title': video['Video Title'],
            'Channel Title': video['Channel Title'],
            'Views': video['Views'],
            'Likes': video['Likes'],
            'Comments': video['Comments'],
            'Published At': video['Published At'],
            'Subscribers': video['Subscribers']
        }
        for comment in video['Comments Data']:
            data = base_info.copy()
            data.update(comment)
            all_data.append(data)
        if not video['Comments Data']:  # 댓글이 없는 경우 기본 정보만 추가
            all_data.append(base_info)

    df_combined = pd.DataFrame(all_data)

    # CSV 파일로 저장 (UTF-8 인코딩, lineterminator 설정)
    df_combined.to_csv(filename, index=False, encoding='utf-8', lineterminator='\n')

    # 파일 다운로드
    files.download(filename)

def read_csv_in_chunks(filename, chunksize=10000):
    chunks = []
    for chunk in pd.read_csv(filename, chunksize=chunksize, encoding='utf-8', engine='python'):
        chunks.append(chunk)
    return pd.concat(chunks, ignore_index=True)

def contains_query(text, query):
    return query in text

def main():
    search_query = '난민'  # 검색할 키워드
    year = 2017 # 검색할 연도
    month = 9 # 검색할 월

    start_date = datetime(year, month, 1)
    last_day = calendar.monthrange(year, month)[1]
    end_date = datetime(year, month, last_day)

    print(f"Searching videos from {start_date} to {end_date}")

    # 새로운 데이터 수집을 위한 비어 있는 리스트 초기화
    all_videos = []
    total_videos_found = 0
    filtered_videos_found = 0

    try:
        videos = search_videos(search_query, start_date, end_date, region_code='KR')
        query_videos = [video for video in videos if contains_query(video['snippet']['title'], search_query)]
        all_videos.extend(query_videos)
        total_videos_found += len(videos)  # 검색된 모든 비디오 개수
        filtered_videos_found += len(query_videos)  # 쿼리에 포함된 비디오 개수
        print(f"Total videos found: {total_videos_found}")
        print(f"Filtered videos found: {filtered_videos_found}")

        # 주기적으로 데이터를 저장
        if len(all_videos) % 300 == 0:  # 예시로 300개 단위로 저장
            save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
    except HttpError as e:
        print(f"An error occurred: {e}")
        print("Saving collected data so far...")
        save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
        return

    video_data = []
    try:
        for index, video in enumerate(all_videos):
            video_info = {}
            video_id = video['id']['videoId']
            snippet_info = video['snippet']
            statistics_info = get_video_info(video_id)['statistics']
            channel_info = get_channel_info(snippet_info['channelId'])['statistics']

            # 비디오 정보 추가
            video_info['Video ID'] = video_id
            video_info['Video Title'] = snippet_info['title']
            video_info['Channel Title'] = snippet_info['channelTitle']
            video_info['Views'] = statistics_info.get('viewCount', 0)
            video_info['Likes'] = statistics_info.get('likeCount', 0)
            video_info['Comments'] = statistics_info.get('commentCount', 0)
            video_info['Published At'] = snippet_info['publishedAt']
            video_info['Subscribers'] = channel_info.get('subscriberCount', 0)

            print(f"Video Title: {video_info['Video Title']}")

            # 댓글 정보 추가
            comments = get_comments(video_id)
            video_comments = extract_comment_data(comments)

            video_info['Comments Data'] = video_comments
            video_data.append(video_info)

            # 주기적으로 데이터를 저장
            if (index + 1) % 100 == 0:  # 예시로 100개 단위로 저장
                save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')

    except HttpError as e:
        print(f"An error occurred while processing videos: {e}")
        print("Saving collected video data so far...")
        save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')
        return

    # 모든 비디오 정보를 데이터프레임으로 저장하고 파일 다운로드
    save_data_to_csv(video_data, f'youtube_combined_data_{year}_{month}.csv')

    # CSV 파일에 저장된 비디오 개수 확인
    df_combined = read_csv_in_chunks(f'youtube_combined_data_{year}_{month}.csv')
    csv_video_count = len(df_combined['Video ID'].unique())

    # 총 비디오 개수와 CSV 파일에 저장된 비디오 개수 비교
    print(f"Filtered videos found: {filtered_videos_found}")
    print(f"Total unique videos saved in CSV: {csv_video_count}")

if __name__ == "__main__":
    main()


Searching videos from 2017-09-01 00:00:00 to 2017-09-30 00:00:00
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 0/10000)
Page 1 processed, total videos collected: 50
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 0/10000)
Page 2 processed, total videos collected: 100
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 0/10000)
Page 3 processed, total videos collected: 150
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 0/10000)
Page 4 processed, total videos collected: 200
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 100/10000)
Page 5 processed, total videos collected: 247
Total videos found: 247
Filtered videos found: 56
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 100/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 100/10000)
Video Title: &#39;난민 할당&#39; 놓고 EU와 동유럽 대립 심화 / YTN
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 100/10000)
Using API key: AIzaS



Comments are disabled for video ID q7Qxxpnmx4w.
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 215/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 116/10000)
Video Title: [난민, 그리고 한국]soㄴㄴㄴㄴs - Refugees, and Korea : SOㄴㄴㄴㄴS
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 116/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 115/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 216/10000)
Video Title: &#39;인종청소&#39; 미얀마 탈출 난민 43만…죽어가는 아이들 / SBS
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 117/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 117/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 116/10000)
Video Title: 국제 카리타스, 오늘부터 세계 난민 위한 글로벌 캠페인 전개
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 217/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 118/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJ



Comments are disabled for video ID WMnM-Cg_2x4.
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 218/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 119/10000)
Video Title: 유럽행 난민 사전 심사
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 119/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 117/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 219/10000)
Video Title: 로힝야족 난민 3명, 지뢰 폭발로 숨져 / YTN
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 120/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 120/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 118/10000)
Video Title: 래;코드 나눔의 공간에서 시리아 난민을 돕는 조금은 특별한 방법!
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 220/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 121/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 121/10000)
Video Title: 당신의 핸드폰은



Video Title: 어린이설교 &#39;난민&#39;
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 229/10000)
Comments are disabled for video ID mItuzPaaBJ4.
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 130/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 130/10000)
Video Title: 배설물 넘쳐나는 로힝야족 난민촌…&#39;보건 재앙&#39; 우려 / 연합뉴스TV (YonhapnewsTV)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 128/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 229/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 131/10000)
Video Title: 2015 09 24 아리랑TV 기획토론 UPFRONT, 77회 UN난민 난민해결을 위한 국제사회의 역할
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 131/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 129/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 230/10000)
Video Title: 우간다. 유엔난민선교간증. 배사랑목사. 아름다운선교방송
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 13



Comments are disabled for video ID qOp-sthkprY.
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 232/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 134/10000)
Video Title: [PUBG 스쿼드] 강북난민 메타 (소리없음)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 134/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 131/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 233/10000)
Video Title: 상명난민기구의 EXIT 게임 홍보영상 (11초)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 135/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 135/10000)




Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 132/10000)
Video Title: 김혜수, 죽음의 난민 루트를 가다…kbs &#39;다큐공감&#39;
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 234/10000)
Comments are disabled for video ID 7gga2ZZwx6c.
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 136/10000)




Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 136/10000)
Video Title: 김혜수, 죽음의 난민 루트를 가다…kbs &#39;다큐공감&#39;
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 133/10000)
Comments are disabled for video ID 7gga2ZZwx6c.
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 234/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 137/10000)
Video Title: 170402 케라틴즈 - 신곡 + 산 너머 저편 @차고스 난민 후원공연 /스트레인지프룻
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 137/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 133/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 235/10000)
Video Title: 日아소 &quot;北난민 몰려오면 체포할지 사살할지 고려&quot;..위기론 조장
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 138/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 138/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 134/10000)
Video Title: [세상 읽기] 백만 난민 받은 메르켈의 총선 승리

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Filtered videos found: 56
Total unique videos saved in CSV: 54


In [None]:
import pandas as pd
import time
import itertools
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.colab import files
import calendar
from datetime import datetime

# 여러 프로젝트의 API 키 설정
API_KEYS = ['AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c', 'AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg', 'AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw', 'AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U']  # 필요한 만큼 추가
api_key_iter = itertools.cycle(API_KEYS)  # API 키 순환

# 각 키의 사용량 추적
api_usage = {key: 0 for key in API_KEYS}
api_quota = 10000  # YouTube Data API 할당량 (일반적으로 10,000)

def build_youtube_api():
    api_key = next(api_key_iter)
    print(f"Using API key: {api_key} (usage: {api_usage[api_key]}/{api_quota})")
    return build('youtube', 'v3', developerKey=api_key), api_key

def search_videos(query, start_date, end_date, region_code='KR'):
    videos = []
    next_page_token = None
    page_count = 0
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.search().list(
                part='snippet',
                q=query,
                type='video',
                publishedAfter=start_date.isoformat() + 'Z',
                publishedBefore=end_date.isoformat() + 'Z',
                maxResults=50,
                pageToken=next_page_token,
                regionCode=region_code  # 한국으로 제한
            )
            response = request.execute()
            videos.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 100  # 예시: 한 번의 요청이 100 쿼리를 사용한다고 가정
            page_count += 1
            print(f"Page {page_count} processed, total videos collected: {len(videos)}")
            if not next_page_token:
                break
            time.sleep(1)
        except HttpError as e:
            if e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)  # 잠시 대기 후 재시도
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)  # 잠시 대기 후 재시도
            else:
                raise e
    return videos

def get_video_info(video_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.videos().list(
                part='snippet,statistics',
                id=video_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def get_channel_info(channel_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.channels().list(
                part='snippet,statistics',
                id=channel_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def extract_comment_data(comments):
    comment_data = []
    for comment in comments:
        top_comment = comment['snippet']['topLevelComment']['snippet']
        comment_text = top_comment['textDisplay']
        comment_date = top_comment['publishedAt']
        author_channel_id = top_comment.get('authorChannelId', {}).get('value', None)
        like_count = top_comment.get('likeCount', 0)
        comment_data.append({
            'Comment': comment_text,
            'Comment Published At': comment_date,
            'Author Channel ID': author_channel_id,
            'Comment Likes': like_count
        })

        # 대댓글 가져오기
        if 'replies' in comment:
            for reply in comment['replies']['comments']:
                reply_snippet = reply['snippet']
                reply_text = reply_snippet['textDisplay']
                reply_date = reply_snippet['publishedAt']
                reply_author_channel_id = reply_snippet.get('authorChannelId', {}).get('value', None)
                reply_like_count = reply_snippet.get('likeCount', 0)
                comment_data.append({
                    'Comment': reply_text,
                    'Comment Published At': reply_date,
                    'Author Channel ID': reply_author_channel_id,
                    'Comment Likes': reply_like_count
                })

    return comment_data

def get_comments(video_id, max_comments=None):
    comments = []
    next_page_token = None
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                maxResults=100,  # 한 페이지에 최대 100개의 댓글
                pageToken=next_page_token
            )
            response = request.execute()
            comments.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            if not next_page_token or (max_comments and len(comments) >= max_comments):
                break
            time.sleep(1)
        except HttpError as e:
            error_reason = e.error_details[0]['reason']
            if error_reason == 'commentsDisabled':
                print(f"Comments are disabled for video ID {video_id}.")
                break
            elif e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e
    return comments

def save_data_to_csv(video_data, filename='youtube_combined_data.csv'):
    all_data = []
    for video in video_data:
        base_info = {
            'Video ID': video['Video ID'],
            'Video Title': video['Video Title'],
            'Channel Title': video['Channel Title'],
            'Views': video['Views'],
            'Likes': video['Likes'],
            'Comments': video['Comments'],
            'Published At': video['Published At'],
            'Subscribers': video['Subscribers']
        }
        for comment in video['Comments Data']:
            data = base_info.copy()
            data.update(comment)
            all_data.append(data)
        if not video['Comments Data']:  # 댓글이 없는 경우 기본 정보만 추가
            all_data.append(base_info)

    df_combined = pd.DataFrame(all_data)

    # CSV 파일로 저장 (UTF-8 인코딩, lineterminator 설정)
    df_combined.to_csv(filename, index=False, encoding='utf-8', lineterminator='\n')

    # 파일 다운로드
    files.download(filename)

def read_csv_in_chunks(filename, chunksize=10000):
    chunks = []
    for chunk in pd.read_csv(filename, chunksize=chunksize, encoding='utf-8', engine='python'):
        chunks.append(chunk)
    return pd.concat(chunks, ignore_index=True)

def contains_query(text, query):
    return query in text

def main():
    search_query = '난민'  # 검색할 키워드
    year = 2017  # 검색할 연도
    month = 10  # 검색할 월

    start_date = datetime(year, month, 1)
    last_day = calendar.monthrange(year, month)[1]
    end_date = datetime(year, month, last_day)

    print(f"Searching videos from {start_date} to {end_date}")

    # 새로운 데이터 수집을 위한 비어 있는 리스트 초기화
    all_videos = []
    total_videos_found = 0
    filtered_videos_found = 0

    try:
        videos = search_videos(search_query, start_date, end_date, region_code='KR')
        query_videos = [video for video in videos if contains_query(video['snippet']['title'], search_query)]
        all_videos.extend(query_videos)
        total_videos_found += len(videos)  # 검색된 모든 비디오 개수
        filtered_videos_found += len(query_videos)  # 쿼리에 포함된 비디오 개수
        print(f"Total videos found: {total_videos_found}")
        print(f"Filtered videos found: {filtered_videos_found}")

        # 주기적으로 데이터를 저장
        if len(all_videos) % 300 == 0:  # 예시로 300개 단위로 저장
            save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
    except HttpError as e:
        print(f"An error occurred: {e}")
        print("Saving collected data so far...")
        save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
        return

    video_data = []
    try:
        for index, video in enumerate(all_videos):
            video_info = {}
            video_id = video['id']['videoId']
            snippet_info = video['snippet']
            statistics_info = get_video_info(video_id)['statistics']
            channel_info = get_channel_info(snippet_info['channelId'])['statistics']

            # 비디오 정보 추가
            video_info['Video ID'] = video_id
            video_info['Video Title'] = snippet_info['title']
            video_info['Channel Title'] = snippet_info['channelTitle']
            video_info['Views'] = statistics_info.get('viewCount', 0)
            video_info['Likes'] = statistics_info.get('likeCount', 0)
            video_info['Comments'] = statistics_info.get('commentCount', 0)
            video_info['Published At'] = snippet_info['publishedAt']
            video_info['Subscribers'] = channel_info.get('subscriberCount', 0)

            print(f"Video Title: {video_info['Video Title']}")

            # 댓글 정보 추가
            comments = get_comments(video_id)
            video_comments = extract_comment_data(comments)

            video_info['Comments Data'] = video_comments
            video_data.append(video_info)

            # 주기적으로 데이터를 저장
            if (index + 1) % 100 == 0:  # 예시로 100개 단위로 저장
                save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')

    except HttpError as e:
        print(f"An error occurred while processing videos: {e}")
        print("Saving collected video data so far...")
        save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')
        return

    # 모든 비디오 정보를 데이터프레임으로 저장하고 파일 다운로드
    save_data_to_csv(video_data, f'youtube_combined_data_{year}_{month}.csv')

    # CSV 파일에 저장된 비디오 개수 확인
    df_combined = read_csv_in_chunks(f'youtube_combined_data_{year}_{month}.csv')
    csv_video_count = len(df_combined['Video ID'].unique())

    # 총 비디오 개수와 CSV 파일에 저장된 비디오 개수 비교
    print(f"Filtered videos found: {filtered_videos_found}")
    print(f"Total unique videos saved in CSV: {csv_video_count}")

if __name__ == "__main__":
    main()


Searching videos from 2017-10-01 00:00:00 to 2017-10-31 00:00:00
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 0/10000)
Page 1 processed, total videos collected: 50
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 0/10000)
Page 2 processed, total videos collected: 100
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 0/10000)
Page 3 processed, total videos collected: 150
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 0/10000)
Page 4 processed, total videos collected: 169
Total videos found: 169
Filtered videos found: 64
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 100/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 100/10000)
Video Title: &quot;바늘구멍 통과보다 어려운 난민 신청&quot;...1.8%만 인정 / YTN
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 100/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 100/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (



Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 102/10000)
Video Title: 시리아 난민 초등학생 한국 적응중
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 102/10000)
Comments are disabled for video ID sEwg_QjGtM0.
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 103/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 103/10000)
Video Title: 하늘에서 본 &#39;끝없는 행렬&#39;…로힝야족 난민 다시 급증 / SBS
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 103/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 102/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 104/10000)
Video Title: 기독교로 개종한 이란 소년 &quot;난민 인정&quot; / YTN
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 104/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 104/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 103/10000)
Video Title: 기독교개종 무슬림 난민 인정..부작용은?
Using API key: AIzaSyAd6idgrZjjbD49



Comments are disabled for video ID 5mm67GJy5O8.
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 128/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 127/10000)
Video Title: PUBG: 멀티 킬 | 총없는 난민학살
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 129/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 128/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 129/10000)
Video Title: 심상정 의원 예리한 지적, 덴마크 실업급여 악용 사례는 없나요? ㅣ #행복난민 EP2 #02
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 128/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 130/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 129/10000)
Video Title: 육아를 자연스럽게 받아들이는 덴마크 남성들 그리고 그 배경!ㅣ #행복난민 EP3 #02
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 130/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 129/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Filtered videos found: 64
Total unique videos saved in CSV: 63


In [None]:
import pandas as pd
import time
import itertools
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.colab import files
import calendar
from datetime import datetime

# 여러 프로젝트의 API 키 설정
API_KEYS = ['AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c', 'AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg', 'AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw', 'AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U']  # 필요한 만큼 추가
api_key_iter = itertools.cycle(API_KEYS)  # API 키 순환

# 각 키의 사용량 추적
api_usage = {key: 0 for key in API_KEYS}
api_quota = 10000  # YouTube Data API 할당량 (일반적으로 10,000)

def build_youtube_api():
    api_key = next(api_key_iter)
    print(f"Using API key: {api_key} (usage: {api_usage[api_key]}/{api_quota})")
    return build('youtube', 'v3', developerKey=api_key), api_key

def search_videos(query, start_date, end_date, region_code='KR'):
    videos = []
    next_page_token = None
    page_count = 0
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.search().list(
                part='snippet',
                q=query,
                type='video',
                publishedAfter=start_date.isoformat() + 'Z',
                publishedBefore=end_date.isoformat() + 'Z',
                maxResults=50,
                pageToken=next_page_token,
                regionCode=region_code  # 한국으로 제한
            )
            response = request.execute()
            videos.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 100  # 예시: 한 번의 요청이 100 쿼리를 사용한다고 가정
            page_count += 1
            print(f"Page {page_count} processed, total videos collected: {len(videos)}")
            if not next_page_token:
                break
            time.sleep(1)
        except HttpError as e:
            if e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)  # 잠시 대기 후 재시도
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)  # 잠시 대기 후 재시도
            else:
                raise e
    return videos

def get_video_info(video_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.videos().list(
                part='snippet,statistics',
                id=video_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def get_channel_info(channel_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.channels().list(
                part='snippet,statistics',
                id=channel_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def extract_comment_data(comments):
    comment_data = []
    for comment in comments:
        top_comment = comment['snippet']['topLevelComment']['snippet']
        comment_text = top_comment['textDisplay']
        comment_date = top_comment['publishedAt']
        author_channel_id = top_comment.get('authorChannelId', {}).get('value', None)
        like_count = top_comment.get('likeCount', 0)
        comment_data.append({
            'Comment': comment_text,
            'Comment Published At': comment_date,
            'Author Channel ID': author_channel_id,
            'Comment Likes': like_count
        })

        # 대댓글 가져오기
        if 'replies' in comment:
            for reply in comment['replies']['comments']:
                reply_snippet = reply['snippet']
                reply_text = reply_snippet['textDisplay']
                reply_date = reply_snippet['publishedAt']
                reply_author_channel_id = reply_snippet.get('authorChannelId', {}).get('value', None)
                reply_like_count = reply_snippet.get('likeCount', 0)
                comment_data.append({
                    'Comment': reply_text,
                    'Comment Published At': reply_date,
                    'Author Channel ID': reply_author_channel_id,
                    'Comment Likes': reply_like_count
                })

    return comment_data

def get_comments(video_id, max_comments=None):
    comments = []
    next_page_token = None
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                maxResults=100,  # 한 페이지에 최대 100개의 댓글
                pageToken=next_page_token
            )
            response = request.execute()
            comments.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            if not next_page_token or (max_comments and len(comments) >= max_comments):
                break
            time.sleep(1)
        except HttpError as e:
            error_reason = e.error_details[0]['reason']
            if error_reason == 'commentsDisabled':
                print(f"Comments are disabled for video ID {video_id}.")
                break
            elif e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e
    return comments

def save_data_to_csv(video_data, filename='youtube_combined_data.csv'):
    all_data = []
    for video in video_data:
        base_info = {
            'Video ID': video['Video ID'],
            'Video Title': video['Video Title'],
            'Channel Title': video['Channel Title'],
            'Views': video['Views'],
            'Likes': video['Likes'],
            'Comments': video['Comments'],
            'Published At': video['Published At'],
            'Subscribers': video['Subscribers']
        }
        for comment in video['Comments Data']:
            data = base_info.copy()
            data.update(comment)
            all_data.append(data)
        if not video['Comments Data']:  # 댓글이 없는 경우 기본 정보만 추가
            all_data.append(base_info)

    df_combined = pd.DataFrame(all_data)

    # CSV 파일로 저장 (UTF-8 인코딩, lineterminator 설정)
    df_combined.to_csv(filename, index=False, encoding='utf-8', lineterminator='\n')

    # 파일 다운로드
    files.download(filename)

def read_csv_in_chunks(filename, chunksize=10000):
    chunks = []
    for chunk in pd.read_csv(filename, chunksize=chunksize, encoding='utf-8', engine='python'):
        chunks.append(chunk)
    return pd.concat(chunks, ignore_index=True)

def contains_query(text, query):
    return query in text

def main():
    search_query = '난민'  # 검색할 키워드
    year = 2017  # 검색할 연도
    month = 11  # 검색할 월

    start_date = datetime(year, month, 1)
    last_day = calendar.monthrange(year, month)[1]
    end_date = datetime(year, month, last_day)

    print(f"Searching videos from {start_date} to {end_date}")

    # 새로운 데이터 수집을 위한 비어 있는 리스트 초기화
    all_videos = []
    total_videos_found = 0
    filtered_videos_found = 0

    try:
        videos = search_videos(search_query, start_date, end_date, region_code='KR')
        query_videos = [video for video in videos if contains_query(video['snippet']['title'], search_query)]
        all_videos.extend(query_videos)
        total_videos_found += len(videos)  # 검색된 모든 비디오 개수
        filtered_videos_found += len(query_videos)  # 쿼리에 포함된 비디오 개수
        print(f"Total videos found: {total_videos_found}")
        print(f"Filtered videos found: {filtered_videos_found}")

        # 주기적으로 데이터를 저장
        if len(all_videos) % 300 == 0:  # 예시로 300개 단위로 저장
            save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
    except HttpError as e:
        print(f"An error occurred: {e}")
        print("Saving collected data so far...")
        save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
        return

    video_data = []
    try:
        for index, video in enumerate(all_videos):
            video_info = {}
            video_id = video['id']['videoId']
            snippet_info = video['snippet']
            statistics_info = get_video_info(video_id)['statistics']
            channel_info = get_channel_info(snippet_info['channelId'])['statistics']

            # 비디오 정보 추가
            video_info['Video ID'] = video_id
            video_info['Video Title'] = snippet_info['title']
            video_info['Channel Title'] = snippet_info['channelTitle']
            video_info['Views'] = statistics_info.get('viewCount', 0)
            video_info['Likes'] = statistics_info.get('likeCount', 0)
            video_info['Comments'] = statistics_info.get('commentCount', 0)
            video_info['Published At'] = snippet_info['publishedAt']
            video_info['Subscribers'] = channel_info.get('subscriberCount', 0)

            print(f"Video Title: {video_info['Video Title']}")

            # 댓글 정보 추가
            comments = get_comments(video_id)
            video_comments = extract_comment_data(comments)

            video_info['Comments Data'] = video_comments
            video_data.append(video_info)

            # 주기적으로 데이터를 저장
            if (index + 1) % 100 == 0:  # 예시로 100개 단위로 저장
                save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')

    except HttpError as e:
        print(f"An error occurred while processing videos: {e}")
        print("Saving collected video data so far...")
        save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')
        return

    # 모든 비디오 정보를 데이터프레임으로 저장하고 파일 다운로드
    save_data_to_csv(video_data, f'youtube_combined_data_{year}_{month}.csv')

    # CSV 파일에 저장된 비디오 개수 확인
    df_combined = read_csv_in_chunks(f'youtube_combined_data_{year}_{month}.csv')
    csv_video_count = len(df_combined['Video ID'].unique())

    # 총 비디오 개수와 CSV 파일에 저장된 비디오 개수 비교
    print(f"Filtered videos found: {filtered_videos_found}")
    print(f"Total unique videos saved in CSV: {csv_video_count}")

if __name__ == "__main__":
    main()


Searching videos from 2017-11-01 00:00:00 to 2017-11-30 00:00:00
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 0/10000)
Page 1 processed, total videos collected: 50
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 0/10000)
Page 2 processed, total videos collected: 100
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 0/10000)
Page 3 processed, total videos collected: 150
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 0/10000)
Page 4 processed, total videos collected: 200
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 100/10000)
Page 5 processed, total videos collected: 220
Total videos found: 220
Filtered videos found: 103
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 100/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 100/10000)
Video Title: 좋은 날씨의 역설…지중해서 아프리카 난민 참사 속출 / 연합뉴스TV (YonhapnewsTV)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 100/10000)
Using



Comments are disabled for video ID RNtWvjme0Jg.
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 241/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 142/10000)
Video Title: 로힝야 난민 사태 3개월…이주자들과 함께 하는 사람들
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 142/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 141/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 242/10000)
Video Title: 교육개혁의 시작은 특목고, 자사고 폐지부터이다?ㅣ #행복난민 EP5 #07
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 143/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 143/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 142/10000)
Video Title: [닭발군] 21일 후 (21 Days) - 시리아 난민 그리고 새로운 삶과 가족 1화
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 243/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 144/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7r



Video Title: &#39;난민 복서&#39; 이흑산, 25일 일본 선수와 첫 국제전
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 143/10000)
Comments are disabled for video ID uP_AXA_XprA.
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 244/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 145/10000)
Video Title: &#39;난민 복서&#39; 이흑산, 첫 국제전서 일본 선수에게 ko승
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 145/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 143/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 245/10000)
Video Title: &#39;난민 복서&#39; 이흑산 &quot;코리안 드림은 지금부터 시작&quot;
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 146/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 146/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 144/10000)
Video Title: 난민 여성 사산 방치한 스위스 국경 경비대원 법정행(종합)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 246/10000)
U

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 189/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 189/10000)
Video Title: `행복난민’ 6화, 덴마크 ‘쉼이 있는 교육’ 집중 조명
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 187/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 289/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 190/10000)
Video Title: 2017년 남수단 난민촌 사역
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 190/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 188/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 290/10000)
Video Title: ‘행복난민’ 최진기·서천석·신아영...덴마크 ‘삶을 위한 교육’ 조명 | 오늘의 뉴스
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 191/10000)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Filtered videos found: 103
Total unique videos saved in CSV: 170


In [None]:
import pandas as pd
import time
import itertools
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.colab import files
import calendar
from datetime import datetime

# 여러 프로젝트의 API 키 설정
API_KEYS = ['AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c', 'AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg', 'AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw', 'AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U']  # 필요한 만큼 추가
api_key_iter = itertools.cycle(API_KEYS)  # API 키 순환

# 각 키의 사용량 추적
api_usage = {key: 0 for key in API_KEYS}
api_quota = 10000  # YouTube Data API 할당량 (일반적으로 10,000)

def build_youtube_api():
    api_key = next(api_key_iter)
    print(f"Using API key: {api_key} (usage: {api_usage[api_key]}/{api_quota})")
    return build('youtube', 'v3', developerKey=api_key), api_key

def search_videos(query, start_date, end_date, region_code='KR'):
    videos = []
    next_page_token = None
    page_count = 0
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.search().list(
                part='snippet',
                q=query,
                type='video',
                publishedAfter=start_date.isoformat() + 'Z',
                publishedBefore=end_date.isoformat() + 'Z',
                maxResults=50,
                pageToken=next_page_token,
                regionCode=region_code  # 한국으로 제한
            )
            response = request.execute()
            videos.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 100  # 예시: 한 번의 요청이 100 쿼리를 사용한다고 가정
            page_count += 1
            print(f"Page {page_count} processed, total videos collected: {len(videos)}")
            if not next_page_token:
                break
            time.sleep(1)
        except HttpError as e:
            if e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)  # 잠시 대기 후 재시도
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)  # 잠시 대기 후 재시도
            else:
                raise e
    return videos

def get_video_info(video_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.videos().list(
                part='snippet,statistics',
                id=video_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def get_channel_info(channel_id):
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.channels().list(
                part='snippet,statistics',
                id=channel_id
            )
            response = request.execute()
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            return response['items'][0]
        except HttpError as e:
            if e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e

def extract_comment_data(comments):
    comment_data = []
    for comment in comments:
        top_comment = comment['snippet']['topLevelComment']['snippet']
        comment_text = top_comment['textDisplay']
        comment_date = top_comment['publishedAt']
        author_channel_id = top_comment.get('authorChannelId', {}).get('value', None)
        like_count = top_comment.get('likeCount', 0)
        comment_data.append({
            'Comment': comment_text,
            'Comment Published At': comment_date,
            'Author Channel ID': author_channel_id,
            'Comment Likes': like_count
        })

        # 대댓글 가져오기
        if 'replies' in comment:
            for reply in comment['replies']['comments']:
                reply_snippet = reply['snippet']
                reply_text = reply_snippet['textDisplay']
                reply_date = reply_snippet['publishedAt']
                reply_author_channel_id = reply_snippet.get('authorChannelId', {}).get('value', None)
                reply_like_count = reply_snippet.get('likeCount', 0)
                comment_data.append({
                    'Comment': reply_text,
                    'Comment Published At': reply_date,
                    'Author Channel ID': reply_author_channel_id,
                    'Comment Likes': reply_like_count
                })

    return comment_data

def get_comments(video_id, max_comments=None):
    comments = []
    next_page_token = None
    while True:
        try:
            youtube, api_key = build_youtube_api()
            request = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                maxResults=100,  # 한 페이지에 최대 100개의 댓글
                pageToken=next_page_token
            )
            response = request.execute()
            comments.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            api_usage[api_key] += 1  # 한 번의 요청이 1 쿼리를 사용한다고 가정
            if not next_page_token or (max_comments and len(comments) >= max_comments):
                break
            time.sleep(1)
        except HttpError as e:
            error_reason = e.error_details[0]['reason']
            if error_reason == 'commentsDisabled':
                print(f"Comments are disabled for video ID {video_id}.")
                break
            elif e.resp.status == 403:
                print("Quota exceeded for the current API key. Switching API key.")
                time.sleep(5)
            elif e.resp.status == 503:
                print("Service unavailable. Retrying...")
                time.sleep(5)
            else:
                raise e
    return comments

def save_data_to_csv(video_data, filename='youtube_combined_data.csv'):
    all_data = []
    for video in video_data:
        base_info = {
            'Video ID': video['Video ID'],
            'Video Title': video['Video Title'],
            'Channel Title': video['Channel Title'],
            'Views': video['Views'],
            'Likes': video['Likes'],
            'Comments': video['Comments'],
            'Published At': video['Published At'],
            'Subscribers': video['Subscribers']
        }
        for comment in video['Comments Data']:
            data = base_info.copy()
            data.update(comment)
            all_data.append(data)
        if not video['Comments Data']:  # 댓글이 없는 경우 기본 정보만 추가
            all_data.append(base_info)

    df_combined = pd.DataFrame(all_data)

    # CSV 파일로 저장 (UTF-8 인코딩, lineterminator 설정)
    df_combined.to_csv(filename, index=False, encoding='utf-8', lineterminator='\n')

    # 파일 다운로드
    files.download(filename)

def read_csv_in_chunks(filename, chunksize=10000):
    chunks = []
    for chunk in pd.read_csv(filename, chunksize=chunksize, encoding='utf-8', engine='python'):
        chunks.append(chunk)
    return pd.concat(chunks, ignore_index=True)

def contains_query(text, query):
    return query in text

def main():
    search_query = '난민'  # 검색할 키워드
    year = 2017  # 검색할 연도
    month = 12  # 검색할 월

    start_date = datetime(year, month, 1)
    last_day = calendar.monthrange(year, month)[1]
    end_date = datetime(year, month, last_day)

    print(f"Searching videos from {start_date} to {end_date}")

    # 새로운 데이터 수집을 위한 비어 있는 리스트 초기화
    all_videos = []
    total_videos_found = 0
    filtered_videos_found = 0

    try:
        videos = search_videos(search_query, start_date, end_date, region_code='KR')
        query_videos = [video for video in videos if contains_query(video['snippet']['title'], search_query)]
        all_videos.extend(query_videos)
        total_videos_found += len(videos)  # 검색된 모든 비디오 개수
        filtered_videos_found += len(query_videos)  # 쿼리에 포함된 비디오 개수
        print(f"Total videos found: {total_videos_found}")
        print(f"Filtered videos found: {filtered_videos_found}")

        # 주기적으로 데이터를 저장
        if len(all_videos) % 300 == 0:  # 예시로 300개 단위로 저장
            save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
    except HttpError as e:
        print(f"An error occurred: {e}")
        print("Saving collected data so far...")
        save_data_to_csv(all_videos, f'youtube_partial_data_{year}_{month}.csv')
        return

    video_data = []
    try:
        for index, video in enumerate(all_videos):
            video_info = {}
            video_id = video['id']['videoId']
            snippet_info = video['snippet']
            statistics_info = get_video_info(video_id)['statistics']
            channel_info = get_channel_info(snippet_info['channelId'])['statistics']

            # 비디오 정보 추가
            video_info['Video ID'] = video_id
            video_info['Video Title'] = snippet_info['title']
            video_info['Channel Title'] = snippet_info['channelTitle']
            video_info['Views'] = statistics_info.get('viewCount', 0)
            video_info['Likes'] = statistics_info.get('likeCount', 0)
            video_info['Comments'] = statistics_info.get('commentCount', 0)
            video_info['Published At'] = snippet_info['publishedAt']
            video_info['Subscribers'] = channel_info.get('subscriberCount', 0)

            print(f"Video Title: {video_info['Video Title']}")

            # 댓글 정보 추가
            comments = get_comments(video_id)
            video_comments = extract_comment_data(comments)

            video_info['Comments Data'] = video_comments
            video_data.append(video_info)

            # 주기적으로 데이터를 저장
            if (index + 1) % 100 == 0:  # 예시로 100개 단위로 저장
                save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')

    except HttpError as e:
        print(f"An error occurred while processing videos: {e}")
        print("Saving collected video data so far...")
        save_data_to_csv(video_data, f'youtube_partial_video_data_{year}_{month}.csv')
        return

    # 모든 비디오 정보를 데이터프레임으로 저장하고 파일 다운로드
    save_data_to_csv(video_data, f'youtube_combined_data_{year}_{month}.csv')

    # CSV 파일에 저장된 비디오 개수 확인
    df_combined = read_csv_in_chunks(f'youtube_combined_data_{year}_{month}.csv')
    csv_video_count = len(df_combined['Video ID'].unique())

    # 총 비디오 개수와 CSV 파일에 저장된 비디오 개수 비교
    print(f"Filtered videos found: {filtered_videos_found}")
    print(f"Total unique videos saved in CSV: {csv_video_count}")

if __name__ == "__main__":
    main()


Searching videos from 2017-12-01 00:00:00 to 2017-12-31 00:00:00
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 0/10000)
Page 1 processed, total videos collected: 50
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 0/10000)
Page 2 processed, total videos collected: 100
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 0/10000)
Page 3 processed, total videos collected: 150
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 0/10000)
Page 4 processed, total videos collected: 200
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 100/10000)
Page 5 processed, total videos collected: 250
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 100/10000)
Page 6 processed, total videos collected: 300
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 100/10000)
Page 7 processed, total videos collected: 350
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 100/10000)
Page 8 processed, total videos



Comments are disabled for video ID IqI6MdFLt7I.
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 247/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 347/10000)
Video Title: 대법원 &#39;여성 할례는 박해&#39; 난민 사유 인정했다
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 347/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 247/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 248/10000)
Video Title: 독일 묻지마 난민 수용 폐기로 전환 시리아 난민 이유와 향후전망은?  뉴스
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 348/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 348/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 248/10000)
Video Title: 독일 묻지마 난민 수용 폐기로 전환 시리아 난민 이유와 향후전망은?  엔터테인먼트
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 249/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 349/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VC



Comments are disabled for video ID IqI6MdFLt7I.
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 250/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 350/10000)
Video Title: 리비아 난민 사태에서 드러난 프랑스의 이중성
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 350/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 249/10000)
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 251/10000)
Video Title: 정우성 &quot;난민 로힝야족, 희망이 없어 안타까워&quot;(인터뷰④)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 351/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usage: 351/10000)
Using API key: AIzaSyAIuTm0sX-2LjD3FSQe8hEJwC9Q2wpC7rw (usage: 250/10000)
Video Title: 독일 묻지마 난민 수용 폐기로 전환 시리아 난민 이유와 향후전망은?  DKO 뉴스
Using API key: AIzaSyBIn1TN1KexUTxA_gZd4a19cTbICiCm_7U (usage: 252/10000)
Using API key: AIzaSyAd6idgrZjjbD49OZl21MehBz42nvZ4q8c (usage: 352/10000)
Using API key: AIzaSyCWytyUTBVFzYnAERvJxtPh3J467VCg_Wg (usa

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Filtered videos found: 66
Total unique videos saved in CSV: 66
