# API 요청 테스트

## 환경설정

In [1]:
import json
import os

from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from oauth2client.tools import argparser
from dotenv import load_dotenv
import pandas as pd
# https://github.com/jdepoix/youtube-transcript-api/tree/master
from youtube_transcript_api import YouTubeTranscriptApi
from tqdm.notebook import tqdm_notebook as tqdm

In [2]:
load_dotenv()
DEVELOPER_KEY = os.environ.get("DEVELOPER_KEY")
YOUTUBE_API_SERVICE_NAME = os.environ.get("YOUTUBE_API_SERVICE_NAME")
YOUTUBE_API_VERSION = os.environ.get("YOUTUBE_API_VERSION")

In [3]:
REQUESTS = {
    "search_param" : {
        "part" : "snippet",
        "maxResults" : 50,
        "order" : "relevance",
        "q" : "인수분해",
        "type" : "video",
        # videoCaption = "closedCaption", # any
    },
    "videos_param" : {
        "part" : "snippet,contentDetails,statistics",
    }
}

In [4]:
# https://developers.google.com/youtube/v3?hl=ko
# https://www.dinolabs.ai/384
# https://han-py.tistory.com/432

with build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY) as service:
    request = service.search().list(
        **REQUESTS["search_param"]
    )

    try:
        response = request.execute()
    except HttpError as e:
        print("Error response status code : {0}, reason : {1}".format(e.status_code, e.error_details))

In [5]:
with open("data/youtube_api_search_result.json", "w") as fp:
    json.dump(response, fp, indent=4, ensure_ascii=False)

In [6]:
video_ids = [item["id"]["videoId"] for item in response["items"]]

with build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY) as service:
    request = service.videos().list(
        **REQUESTS["videos_param"],
        id = video_ids
    )

    try:
        response = request.execute()
    except HttpError as e:
        print("Error response status code : {0}, reason : {1}".format(e.status_code, e.error_details))

In [7]:
with open("data/youtube_api_videos_result.json", "w") as fp:
    json.dump(response, fp, indent=4, ensure_ascii=False)

In [8]:
df_videos = pd.DataFrame(response["items"])

In [9]:
for col in ["snippet", "contentDetails", "statistics"]:
    df_temp = pd.DataFrame(df_videos[col].tolist())
    df_videos.drop(col, axis=1, inplace=True)
    df_videos = pd.concat([df_videos, df_temp], axis=1)

In [10]:
df_videos.to_csv("data/videos.csv")

#  

In [22]:
df_transcript = None
for video_id in tqdm(video_ids):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = transcript_list.find_generated_transcript(["ko"])
        
        df_temp = pd.DataFrame(transcript.fetch())
        df_temp["video_id"] = video_id

        if df_transcript is None:
            df_transcript = df_temp
        else:
            df_transcript = pd.concat([df_transcript, df_temp]).reset_index(drop=True)
    except Exception as e:
        print(video_id)
        print(e)


  0%|          | 0/50 [00:00<?, ?it/s]

F16lLSu7bFA

Could not retrieve a transcript for the video https://www.youtube.com/watch?v=F16lLSu7bFA! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!
ktWNA_zrB8s

Could not retrieve a transcript for the video https://www.youtube.com/watch?v=ktWNA_zrB8s! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_

In [23]:
df_transcript.to_csv("data/transcripts.csv")