# 유튜브 API를 통해 데이터 수집

In [None]:
# 필요한 유투브 패키지 설치 
%pip install --upgrade google-api-python-client
%pip install --upgrade google-auth-oauthlib google-auth-httplib2
%pip install oauth2client

In [1]:
# 시스템 관련 모듈
import os
import sys
import time

# 유튜브 API 연결 모듈
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from oauth2client.tools import argparser

# 데이터 분석 모듈
import numpy as np
import pandas as pd

In [2]:
# 내 API 키와 Youtube API 버전 셋팅
DEVELOPER_KEY='AIzaSyBAHBXw4EbaZmRXh0sEtf7A6JtuItVjXtE' 
YOUTUBE_API_SERVICE_NAME='youtube'
YOUTUBE_API_VERSION='v3'

youtube=build(YOUTUBE_API_SERVICE_NAME,YOUTUBE_API_VERSION,developerKey=DEVELOPER_KEY)

# 채널 선정

In [3]:
# 검색결과에는 API 요청에 지정된 검색 매개변수와 일치하는 YouTube 동영상, 채널 또는 재생목록의 정보
search_response = youtube.search().list(
    q="성시경", # q에 원하는 채널 이름 넣는다
    order='relevance',
    part='snippet',
    maxResults=50).execute()
    
search_response

{'kind': 'youtube#searchListResponse',
 'etag': 'UM3awMtA7Bdnaualm_BYqaMxoqA',
 'nextPageToken': 'CDIQAA',
 'regionCode': 'KR',
 'pageInfo': {'totalResults': 1000000, 'resultsPerPage': 50},
 'items': [{'kind': 'youtube#searchResult',
   'etag': '2lLkm5adOtBRwc_3_Up68EU4o5A',
   'id': {'kind': 'youtube#video', 'videoId': '3tc3F4M607s'},
   'snippet': {'publishedAt': '2023-01-12T09:00:08Z',
    'channelId': 'UCl23-Cci_SMqyGXE1T_LYUg',
    'title': '성시경의 먹을텐데 l 다나카상과 주막 _ 1편',
    'description': '[주막] 서울 강북구 월계로7나길 30-3 (미아동 54-259) #성시경먹을텐데 #성시경 #먹을텐데 #먹텐 #레시피 #성식영 #성그시경그 ...',
    'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/3tc3F4M607s/default.jpg',
      'width': 120,
      'height': 90},
     'medium': {'url': 'https://i.ytimg.com/vi/3tc3F4M607s/mqdefault.jpg',
      'width': 320,
      'height': 180},
     'high': {'url': 'https://i.ytimg.com/vi/3tc3F4M607s/hqdefault.jpg',
      'width': 480,
      'height': 360}},
    'channelTitle': '성시경 SUNG SI KYU

In [4]:
# 채널 ID만 따로 분류
channel_id = search_response['items'][0]['snippet']['channelId']
channel_id

'UCl23-Cci_SMqyGXE1T_LYUg'

# 재생목록 

In [6]:
# 채널관리자가 올려놓은 재생목록을 가져온다
playlists = youtube.playlists().list(
    channelId = channel_id, # 앞서 채널 ID 입력
    part='snippet',
    maxResults=50).execute()

playlists

{'kind': 'youtube#playlistListResponse',
 'etag': 'FYR_0a6qWV6F5rao1MoQqOX8wPA',
 'pageInfo': {'totalResults': 6, 'resultsPerPage': 50},
 'items': [{'kind': 'youtube#playlist',
   'etag': '0BwKiRB1O0bx6n1S8bEvaKWZKdo',
   'id': 'PLuMuHAJh9g_Pc_-tFDOb4zz1ejew00YSm',
   'snippet': {'publishedAt': '2022-04-02T04:56:59Z',
    'channelId': 'UCl23-Cci_SMqyGXE1T_LYUg',
    'title': '두두두두두',
    'description': '',
    'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/rI0Y5FxhAyk/default.jpg',
      'width': 120,
      'height': 90},
     'medium': {'url': 'https://i.ytimg.com/vi/rI0Y5FxhAyk/mqdefault.jpg',
      'width': 320,
      'height': 180},
     'high': {'url': 'https://i.ytimg.com/vi/rI0Y5FxhAyk/hqdefault.jpg',
      'width': 480,
      'height': 360},
     'standard': {'url': 'https://i.ytimg.com/vi/rI0Y5FxhAyk/sddefault.jpg',
      'width': 640,
      'height': 480},
     'maxres': {'url': 'https://i.ytimg.com/vi/rI0Y5FxhAyk/maxresdefault.jpg',
      'width': 1280,
      'hei

In [7]:
# 전체 플레이리스트 ID 불러오기
ids=[]
titles=[]

for i in playlists['items']:
    titles.append(i['snippet']['title'])
    ids.append(i['id'])
    
playlist_df = pd.DataFrame([titles,ids]).T
playlist_df.columns=['Titles','PlayLists']
playlist_df.head()

Unnamed: 0,Titles,PlayLists
0,두두두두두,PLuMuHAJh9g_Pc_-tFDOb4zz1ejew00YSm
1,성시경의 먹을텐데,PLuMuHAJh9g_Py_PSm8gmHdlcil6CQ9QCM
2,성시경(Sung Si Kyung) 8th Album [ㅅ(Siot)],PLuMuHAJh9g_Oa9UGli-MbL-yT12Wlcf6h
3,성시경 노래,PLuMuHAJh9g_P59QyqoaUCWD3xwav_oadF
4,성시경 레시피,PLuMuHAJh9g_OfmJ3Ml36TOWrPPVDe03HD


In [8]:
# 원하는 플레이리스트를 ID 인덱스로 선정
playlist_id = playlist_df['PlayLists'][3]
playlist_id

'PLuMuHAJh9g_P59QyqoaUCWD3xwav_oadF'

# 재생목록 항목별 영상리스트 추출

In [9]:
# 해당 플레이리스트 영상 목록안에 영상들 리스트로 가져오기
playlist_videos = youtube.playlistItems().list(
    playlistId=playlist_id,
    part='snippet',
    maxResults=50).execute()

nextPageToken = playlist_videos.get('nextPageToken')

# 한 번에 최대 50개 항목만 조회 가능
while('nextPageToken' in playlist_videos):
    nextPage = youtube.playlistItems().list(
        playlistId=playlist_id,
        part='snippet',
        maxResults = 50,
        pageToken = nextPageToken
    ).execute()
    playlist_videos['items'] = playlist_videos['items'] + nextPage['items']

    if 'nextPageToken' not in nextPage:
        playlist_videos.pop('nextPageToken',None)
    else:
        nextPageToken = nextPage['nextPageToken']

video_names=[]
video_ids=[]
date=[]

for v in playlist_videos['items']:
    video_names.append(v['snippet']['title'])
    video_ids.append(v['snippet']['resourceId']['videoId'])
    date.append(v['snippet']['publishedAt'])
    
vdf = pd.DataFrame([date,video_names,video_ids]).T
vdf.columns=['Date','Title','IDS']
print(vdf.shape, len(vdf))
vdf.tail()

(81, 3) 81


Unnamed: 0,Date,Title,IDS
76,2021-06-04T07:39:57Z,성시경 ’우리 한 때 사랑한 건’ Live Clip | Sung Si Kyung ...,Xrms0EYYes4
77,2021-06-04T07:32:26Z,성시경 ’방랑자’ Live Clip | Sung Si Kyung ‘Wanderer’,J5iZZ7S9yuY
78,2021-06-04T07:11:56Z,성시경 ’Mom and dad’ Live Clip | Sung Si Kyung ...,L_JQZ1fPs28
79,2021-04-25T10:22:46Z,Melody by 玉置浩二,EHDgb_EKQBA
80,2021-04-25T10:22:46Z,Have yourself a merry little Christmas 🎄,12C7P3injgw


In [10]:
category_id=[]
views=[]
likes=[]
comments=[]
title=[]
date=[]

for u in range(len(vdf)):
    request = youtube.videos().list(
    part='snippet,contentDetails,statistics',
    id = vdf['IDS'][u])
    
    response = request.execute()
    
    if response['items']==[]:
        ids.append('-')
        category_id.append('-')
        views.append('-')
        likes.append('-')
        comments.append('-')
        date.append('-')
        
    else :
        title.append(response['items'][0]['snippet']['title'])
        category_id.append(response['items'][0]['snippet']['categoryId'])
        views.append(response['items'][0]['statistics']['viewCount'])
        likes.append(response['items'][0]['statistics']['likeCount'])
        comments.append(response['items'][0]['statistics']['commentCount'])
        date.append(response['items'][0]['snippet']['publishedAt'])

video_df = pd.DataFrame([title,category_id,views,likes,comments,date]).T
video_df.columns=['title','category_id','views','likes','comments','date']
print(video_df.shape)
video_df.tail()

(81, 6)


Unnamed: 0,title,category_id,views,likes,comments,date
76,성시경 ’우리 한 때 사랑한 건’ Live Clip | Sung Si Kyung ...,10,812825,13392,1015,2021-06-07T01:00:04Z
77,성시경 ’방랑자’ Live Clip | Sung Si Kyung ‘Wanderer’,10,412778,8905,866,2021-06-07T01:00:06Z
78,성시경 ’Mom and dad’ Live Clip | Sung Si Kyung ...,10,175756,4360,378,2021-06-07T01:00:03Z
79,Melody by 玉置浩二,22,199491,5720,664,2021-03-14T12:16:12Z
80,Have yourself a merry little Christmas 🎄,10,182840,8290,1287,2020-12-22T11:18:12Z


In [11]:
# csv형태로 
video_df.to_csv("성시경.csv", index=False, encoding="utf-8-sig")

In [19]:
# 데이터 합치기
df1 = pd.read_csv('./성시경.csv')
df2 = pd.read_csv('./성시경2.csv')
seoung = pd.merge(df1, df2, on='title', how='left')
seoung = seoung.drop_duplicates(subset='title')
seoung.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 56 entries, 0 to 75
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   title        56 non-null     object
 1   category_id  56 non-null     int64 
 2   views        56 non-null     int64 
 3   likes        56 non-null     int64 
 4   comments     56 non-null     int64 
 5   date         56 non-null     object
 6   dislikes     52 non-null     object
 7   hashtag      49 non-null     object
dtypes: int64(4), object(4)
memory usage: 3.9+ KB


# 유투브 댓글 수집 (보류)

In [7]:
# 비디오 ID만 따로 분류
video_id = search_response['items'][0]['id']['videoId']
video_id

'3tc3F4M607s'

In [12]:
comments = []

response = youtube.commentThreads().list(
    part='snippet,replies', 
    videoId=video_id, 
    maxResults=50).execute()

while response:
    for item in response['items']:
        comment = item['snippet']['topLevelComment']['snippet']
        comments.append([comment['textDisplay'], comment['authorDisplayName'], comment['publishedAt'], comment['likeCount']])
 
        if item['snippet']['totalReplyCount'] > 0:
            for reply_item in item['replies']['comments']:
                reply = reply_item['snippet']
                comments.append([reply['textDisplay'], reply['authorDisplayName'], reply['publishedAt'], reply['likeCount']])
 
    if 'nextPageToken' in response:
        response = youtube.commentThreads().list(part='snippet,replies', 
                                                videoId=video_id, 
                                                pageToken=response['nextPageToken'], 
                                                maxResults=50).execute()
    else:
        break
comments

[['성시경그형그아 다나카 이나르 기억이가 사라져쏘🌹', '나몰라패밀리 핫쇼', '2023-01-12T09:33:04Z', 2375],
 ['너무 너무 소둥그하무니다🙆\u200d♀️', '민트', '2023-01-12T15:38:36Z', 0],
 ['산소임 ㅋㅋㅋ 양자역학ㅋㅋㅋㅋㅋㅋ', '마음의창', '2023-01-12T15:11:02Z', 0],
 ['미차 최고의 조합', 'Soo Jang', '2023-01-12T15:05:38Z', 0],
 ['田中大好き〜🌹', 'luck vvf', '2023-01-12T14:55:25Z', 0],
 ['댓글마저 기엽다 ㅋㅋㅋㅋㅋㅋ🌹', '꿀땅콩', '2023-01-12T14:46:08Z', 0],
 ['시경님과 다나카상의 만남은 너무르 재밋그있으요그~~ㅋㅋㅋㅋ', 'SY', '2023-01-12T15:41:00Z', 0],
 ['시경그니무 목소리가 술을 무르무니다.다나카에게  퐁당그 빠지겠스무니다.🧡', '민트', '2023-01-12T15:40:42Z', 0],
 ['다나카😁❤💚', 'ffland', '2023-01-12T15:40:35Z', 0],
 ['불편ㅜㅜ', '강ᅢᄂ호', '2023-01-12T15:38:59Z', 0],
 ['다나카상 내가루 선태그한기루가 제일 멋이쓰므니다', '김도율', '2023-01-12T15:37:41Z', 0],
 ['성그시그경상이랑 수르리라니 쓰고이데쓰네!!^^ 얏빠리 독그도와 칸코크진데쓰네!! 이토 히로부미 빠가데쓰',
  'sys you',
  '2023-01-12T15:35:10Z',
  0],
 ['정말 보고 싶었던 두분입니다 최고최고', '솜이', '2023-01-12T15:35:08Z', 0],
 ['쩐다…. 다나카 진짜 린정!!!!!!', '연어크림치즈베이글', '2023-01-12T15:35:03Z', 0],
 ['ㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋㅋ<a href="https://www.youtube.com/watch?