<a href="https://colab.research.google.com/github/Hanbin-git/kaggle/blob/main/colab_cook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import googleapiclient.discovery
from googleapiclient.errors import HttpError

import pandas as pd
import time
import tqdm

In [2]:
# YouTube API key
API_KEY = "AIzaSyCuH3T5axtPgZos57M8PXbPnwiIb8dKHl8" # YOUR-API-KEY
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=API_KEY)

In [3]:
# Get video ids for query. Youtube API allows only up to 50 videos

def get_video_ids(query, max_results=100):
    video_ids = []
    results_per_page = 50  # YouTube API maxResults
    pages = (max_results + results_per_page - 1) // results_per_page  # calculate #pages
    next_page_token = None

    for _ in range(pages): # call api as many times as #pages
        try:
            request = youtube.search().list(
                q=query,
                part="snippet",
                maxResults=results_per_page,
                type="video",
                pageToken=next_page_token
            )
            response = request.execute()

            # Only check if 'id' key exists and 'videoId' is accessible
            for item in response['items']:
                if isinstance(item, dict) and 'id' in item and 'videoId' in item['id']:
                    video_ids.append(item['id']['videoId'])

            next_page_token = response.get('nextPageToken')
            if not next_page_token:
                break

        except HttpError as e:
            error_reason = e.resp.get('reason')
            if error_reason == 'quotaExceeded':
                print("Quota exceeded. Saving collected data...")
                save_data_to_csv(video_comments)
                exit()
            else:
                print(f"An error occurred: {e}")

    return video_ids[:max_results]

In [4]:
# Get comments for 1 video. Youtube API allows only up to 100 comments per video
def get_top_korean_comments(video_id, max_results=100):
    comments = []
    try:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=max_results,
            textFormat="plainText"
        )
        response = request.execute()

        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            comments.append(comment)

    except HttpError as e:
        error_reason = e.resp.get('reason')
        if error_reason == 'commentsDisabled':
            print(f"Comments are disabled for video {video_id}. Skipping.")
        elif error_reason == 'quotaExceeded':
            print("Quota exceeded. Saving collected data...")
            save_data_to_csv(video_comments)
            exit()
        else:
            print(f"An error occurred: {e}")

    return comments

In [5]:
# Make data to dataframe
# video_comments looks like: {"4DUYBXdUYzA": ["와 재밌다", "재미없다", ]}
def save_data_to_csv(video_comments):

    data = {"Video_ID": [], "Comment": []}

    for video_id, comments in video_comments.items():
        for comment in comments:
            data["Video_ID"].append(video_id)
            data["Comment"].append(comment)

    df = pd.DataFrame(data)

    # Export to CSV
    df.to_csv("youtube_comments.csv", index=False)

In [6]:
participants = ["흑백요리사","백종원","안성재","에드워드 리","나폴리 맛피아","트리플스타","요리하는 돌아이","최현석","장호준","여경래","안유성","정지선","최강록","조은주","오세득","파브리치오 페라리","이영숙","선경 롱게스트","김도윤","박준우"]

In [7]:
video_comments = {}
# Ex: {"4DUYBXdUYzA": ["와 재밌다", "재미없다", ]}

start = time.time()
query_baisic = "흑백요리사"

for participant in tqdm.tqdm(participants):
    query = query_baisic + " " + participant

    try:
        video_ids = get_video_ids(query, max_results=50)

        for video_id in video_ids:
            comments = get_top_korean_comments(video_id)
            video_comments[video_id] = comments
    except HttpError as e:
        if e.resp.get('reason') == 'quotaExceeded':
            print("Quota exceeded. Saving collected data...")
            save_data_to_csv(video_comments)
            exit()

    end = time.time()
    print(f"{end - start}s for query: {query}")

save_data_to_csv(video_comments)

  5%|▌         | 1/20 [00:08<02:38,  8.35s/it]

8.382852554321289s for query: 흑백요리사 흑백요리사


 10%|█         | 2/20 [00:15<02:14,  7.47s/it]

15.234741687774658s for query: 흑백요리사 백종원


 15%|█▌        | 3/20 [00:21<01:55,  6.82s/it]

21.28472924232483s for query: 흑백요리사 안성재


 20%|██        | 4/20 [00:27<01:48,  6.78s/it]

27.996310234069824s for query: 흑백요리사 에드워드 리


 25%|██▌       | 5/20 [00:35<01:43,  6.91s/it]

35.15035605430603s for query: 흑백요리사 나폴리 맛피아


 30%|███       | 6/20 [00:41<01:35,  6.82s/it]

41.794718980789185s for query: 흑백요리사 트리플스타


 35%|███▌      | 7/20 [00:47<01:25,  6.56s/it]

47.82681703567505s for query: 흑백요리사 요리하는 돌아이


 40%|████      | 8/20 [00:54<01:18,  6.56s/it]

54.37746715545654s for query: 흑백요리사 최현석


 45%|████▌     | 9/20 [01:00<01:09,  6.36s/it]

60.28914451599121s for query: 흑백요리사 장호준


 50%|█████     | 10/20 [01:06<01:02,  6.22s/it]

66.2048978805542s for query: 흑백요리사 여경래


 55%|█████▌    | 11/20 [01:12<00:56,  6.33s/it]

72.77198100090027s for query: 흑백요리사 안유성


 60%|██████    | 12/20 [01:18<00:48,  6.04s/it]

78.13966178894043s for query: 흑백요리사 정지선


 65%|██████▌   | 13/20 [01:25<00:44,  6.30s/it]

85.06182074546814s for query: 흑백요리사 최강록


 70%|███████   | 14/20 [01:30<00:35,  5.93s/it]

90.14389657974243s for query: 흑백요리사 조은주


 75%|███████▌  | 15/20 [01:35<00:28,  5.63s/it]

95.05200099945068s for query: 흑백요리사 오세득


 80%|████████  | 16/20 [01:39<00:21,  5.41s/it]

99.94893622398376s for query: 흑백요리사 파브리치오 페라리


 85%|████████▌ | 17/20 [01:45<00:15,  5.32s/it]

105.0848138332367s for query: 흑백요리사 이영숙


 90%|█████████ | 18/20 [01:50<00:10,  5.31s/it]

110.34728360176086s for query: 흑백요리사 선경 롱게스트


 95%|█████████▌| 19/20 [01:55<00:05,  5.28s/it]

115.57490301132202s for query: 흑백요리사 김도윤


100%|██████████| 20/20 [01:59<00:00,  5.99s/it]

119.8968186378479s for query: 흑백요리사 박준우





In [8]:
comments = pd.read_csv("youtube_comments.csv")

In [9]:
comments.head()

Unnamed: 0,Video_ID,Comment
0,vebF7wUQLMo,"《흑백요리사: 요리 계급 전쟁》, 9월 17일 넷플릭스에서 시청하세요: https:..."
1,vebF7wUQLMo,빽햄요리사ㄷㄷ
2,vebF7wUQLMo,0:07
3,vebF7wUQLMo,백수저중에 옴진리교 교주가 있노 ㄷㄷㄷㄷ
4,vebF7wUQLMo,심사위원 등장씬은 대한민국 역대 등장씬 고트중에 하나다 ㄹㅇ
