# API 요청 테스트

## 환경설정

In [1]:
import json
import os

from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from oauth2client.tools import argparser
from dotenv import load_dotenv
import pandas as pd
# https://github.com/jdepoix/youtube-transcript-api/tree/master
from youtube_transcript_api import YouTubeTranscriptApi
from tqdm.notebook import tqdm_notebook as tqdm

In [2]:
load_dotenv()
DEVELOPER_KEY = os.environ.get("DEVELOPER_KEY")
YOUTUBE_API_SERVICE_NAME = os.environ.get("YOUTUBE_API_SERVICE_NAME")
YOUTUBE_API_VERSION = os.environ.get("YOUTUBE_API_VERSION")

In [3]:
REQUESTS = {
    "search_param" : {
        "part" : "snippet",
        "maxResults" : 50,
        "order" : "relevance",
        "q" : "인수분해",
        "type" : "video",
        # videoCaption = "closedCaption", # any
    },
    "videos_param" : {
        "part" : "snippet,contentDetails,statistics",
    }
}

## 검색 기반 동영상 찾기

In [4]:
# https://developers.google.com/youtube/v3?hl=ko
# https://www.dinolabs.ai/384
# https://han-py.tistory.com/432

def search_videos(q:str = None):
    if q:
        REQUESTS["search_param"]["q"] = q
        
    with build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY) as service:
        request = service.search().list(
            **REQUESTS["search_param"],
        )

        try:
            response = request.execute()
        except HttpError as e:
            print("Error response status code : {0}, reason : {1}".format(e.status_code, e.error_details))
        return response

In [5]:
response_videos_search = search_videos()

In [6]:
with open("data/youtube_api_search_result.json", "w") as fp:
    json.dump(response_videos_search, fp, indent=4, ensure_ascii=False)

In [8]:
def get_videos_info(video_ids:list):
    with build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY) as service:
        request = service.videos().list(
            **REQUESTS["videos_param"],
            id = video_ids
        )

        try:
            response = request.execute()
        except HttpError as e:
            print("Error response status code : {0}, reason : {1}".format(e.status_code, e.error_details))
        
    return response

In [9]:
video_ids = [item["id"]["videoId"] for item in response_videos_search["items"]]

resoponse_videos_info = get_videos_info(video_ids)

In [17]:
with open("data/target_video.txt", "r") as fp:
    video_ids = fp.readlines()
    
video_ids = [video_id.replace("\n", '') for video_id in video_ids]

i = 0
resoponse_videos_infos = []

while len(video_ids) > i:
    resoponse_videos_infos += [get_videos_info(video_ids[i:i+50])]
    i += 50


In [10]:
with open("data/youtube_api_videos_result.json", "w") as fp:
    json.dump(resoponse_videos_info, fp, indent=4, ensure_ascii=False)

In [18]:
def make_df_videos(response:dict):
    df_videos = pd.DataFrame(resoponse_videos_info["items"])
    for col in ["snippet", "contentDetails", "statistics"]:
        df_temp = pd.DataFrame(df_videos[col].tolist())
        df_videos.drop(col, axis=1, inplace=True)
        df_videos = pd.concat([df_videos, df_temp], axis=1)
    return df_videos

In [26]:
df_videos = None
for resoponse_videos_info in resoponse_videos_infos:
    df_temp = make_df_videos(resoponse_videos_info)
    if df_videos is None:
        df_videos = df_temp
    else:
        df_videos = pd.concat([df_videos, df_temp]).reset_index(drop=True)

In [27]:
df_videos

Unnamed: 0,kind,etag,id,publishedAt,channelId,title,description,thumbnails,channelTitle,tags,...,dimension,definition,caption,licensedContent,contentRating,projection,viewCount,likeCount,favoriteCount,commentCount
0,youtube#video,TBGv2e0WPtDAnW8YYLZmioh25_E,Fgm5umQsHxM,2023-05-16T11:00:09Z,UCDkTanNuZGwYtf-Y6_xBmqw,※13분 순삭※ 인수분해의 세계에 오신 걸 환영합니다 I 완포자를 위한 정승제의 수학개념,"마침내, 이해됐다! '인수분해'\n\n#ebsi #정승제 #인수분해 #수포자\n#생...",{'default': {'url': 'https://i.ytimg.com/vi/Fg...,EBSi,"[EBS, EBSi, EBSi고교강의, EBSi 고교강의, 수능, 내신, 만점, 등...",...,2d,hd,false,True,{},rectangular,3809,96,0,10
1,youtube#video,lIcGcVz3xapcY5Ay-WmWSZSJn2o,LAQI9Kjiwkc,2020-02-14T11:03:42Z,UCudGH0bl44LtLfbj-YdTxvA,중3-1수학 [16강] 인수와 인수분해,중3-1수학 [16강] 인수와 인수분해\n\n이 강의는 교재에 구애받지 않는 강의입...,{'default': {'url': 'https://i.ytimg.com/vi/LA...,수학 맛집,"[중3수학, 제곱근, 인수, 인수분해, 중등수학, 수학인강, 합차공식, 곱셈공식, ...",...,2d,hd,false,True,{},rectangular,171124,2350,0,190
2,youtube#video,8kg8TSdnNtYSp6SGqLT3eXKX9UA,sEgcGnoufh0,2020-02-17T06:41:29Z,UCudGH0bl44LtLfbj-YdTxvA,중3-1수학 [17강] 인수분해 공식(1),중3-1수학 [17강] 인수분해 공식(1)\n\n이 강의는 교재에 구애받지 않는 강...,{'default': {'url': 'https://i.ytimg.com/vi/sE...,수학 맛집,"[중3수학, 무리수와 실수, 무리수, 실수, 은지영선생님, 제곱근, 루트, 근호, ...",...,2d,hd,false,True,{},rectangular,98893,1134,0,90
3,youtube#video,qRCm_G1_ue_HD_hXKo3WGMqUqeA,S1DO792UPiQ,2020-01-11T00:00:04Z,UCCq9w9rG51vZxWPxLNB04mQ,[EBS 수학의 답] 다항식의 인수분해 - 1. 공통인수를 이용한 인수분해,"중학 수학은 어렵고 답답하다? 그 고민, EBS 수학의 답으로 시~원하게 타파! \...",{'default': {'url': 'https://i.ytimg.com/vi/S1...,EBS Learning,"[EBS, EBS learning, EBS 교육, 수능, 시험, 수학의답, 중학수학...",...,2d,hd,false,True,{},rectangular,32341,277,0,0
4,youtube#video,VOgKZTgJqyoXY3-au54zuvMQS18,eT2gs_L8jxA,2023-02-23T11:15:03Z,UCufMvGtKg2hoTs0h1Ti5cxg,"10개 이상의 인수분해 공식 외우지 말고 이 영상부터 보세요 | 응용, 적용이 쉬워...","초등학생 나누기를 통한 인수분해, 나머지 정리 개념 확장\n\n놀면서❤️수학만점~ ...",{'default': {'url': 'https://i.ytimg.com/vi/eT...,인공지능수학 깨봉,"[깨봉, 수학, 인공지능수학, 조봉한, 인공지능, 깨봉수학, 초등수학, 인수분해, ...",...,2d,hd,false,True,{},rectangular,28880,553,0,41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,youtube#video,gnmSkugSFacuF8w1uflggUpZdXs,dj55QI0_rW0,2021-03-21T00:00:06Z,UC3HQg4-oJtWG0pgj7d_LYYQ,"[수학상] 이차방정식 - 근의 공식, 근과 계수와의 관계","[수학상] 이차방정식 - 근의 공식, 근과 계수와의 관계",{'default': {'url': 'https://i.ytimg.com/vi/dj...,입시수학쌤,"[수학상, 수학상개념, 수학개념설명, 근의공식, 근과계수와의관계]",...,2d,hd,false,True,{},rectangular,79,15,0,0
78,youtube#video,YH92H1pqyi2NlCsZdxjkU843NpM,A_u8xt3GgKk,2022-07-16T12:51:33Z,UCpxxDK9onpxDbj4XkFenM5Q,근의 공식 : 판별식/ 공식 유도 (중학수학),중등수학 강의 : 쉬운 설명 / 간단한 예시 / 원리 이해,{'default': {'url': 'https://i.ytimg.com/vi/A_...,청개구리유여사,"[중등수학 강의, 중등수학 특강, 중등수학 인강, 근의 공식 증명, 근의 공식 유도...",...,2d,hd,false,True,{},rectangular,5542,46,0,5
79,youtube#video,hToWANBpRN-ORnV-IiC58vTMmBg,fgwtaWvaMaw,2016-03-15T09:01:19Z,UCfp9Gz9g2qdzeGGgbSb3BIA,중3수학/이차방정식 근의공식 유도,평택수학전문 엑솔루션 학원입니다\n학원문의 : 657-4405\n블로그 : bbel...,{'default': {'url': 'https://i.ytimg.com/vi/fg...,이정엽,"[이차방정식, 근의공식, 평택학원, 평택수학, 평택수학학원]",...,2d,sd,false,False,{},rectangular,6343,63,0,10
80,youtube#video,WEH-xhoBDpKUB14LimLYCEVEatU,qORgrvsRBDM,2016-05-18T11:08:16Z,UClrSKQiYNB0vXK8f6ghSZfA,3학년 19차시 근의공식,,{'default': {'url': 'https://i.ytimg.com/vi/qO...,10분수학,[근의 공식],...,2d,hd,false,True,{},rectangular,6775,60,0,4


In [28]:
df_videos.to_csv("data/videos.csv")

#  

In [29]:
df_transcript = None
for video_id in tqdm(video_ids):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = transcript_list.find_generated_transcript(["ko"])
        
        df_temp = pd.DataFrame(transcript.fetch())
        df_temp["video_id"] = video_id

        if df_transcript is None:
            df_transcript = df_temp
        else:
            df_transcript = pd.concat([df_transcript, df_temp]).reset_index(drop=True)
    except Exception as e:
        print(video_id)
        print(e)


  0%|          | 0/82 [00:00<?, ?it/s]

ktWNA_zrB8s

Could not retrieve a transcript for the video https://www.youtube.com/watch?v=ktWNA_zrB8s! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!
4hFRaWh4pAY

Could not retrieve a transcript for the video https://www.youtube.com/watch?v=4hFRaWh4pAY! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_

In [30]:
df_transcript.to_csv("data/transcripts.csv")