In [1]:
# 시간 관리 및 데이터 정제
from datetime import datetime
import time
import re

# 데이터 관리
import pandas as pd
import sqlite3

# 유튜브 정보 추출
from apiclient.discovery import build
from pytube import YouTube
from youtube_transcript_api import YouTubeTranscriptApi

In [2]:
def get_comments(video_id, youtube_build, max_results=20):
    '''동영상의 comment 관련도 높은 순으로 가져오기
    
    video_id : 동영상의 id
    
    youtube_build : youtubeapi의 build 객체
    
    max_results : 최대 결과 개수, 최소 1개 ~ 최대 100개, default : 20
    '''
    try:
        comment_info = youtube_build.commentThreads().list(videoId=video_id,
                                                     part='snippet',
                                                     maxResults=max_results,
                                                     order='relevance').execute()

        comment_list = [item['snippet']['topLevelComment']['snippet']['textOriginal']
                        for item in comment_info['items']]
    except:
        comment_list = None
    
    return comment_list

In [3]:
def add_comments_df(youtube_df, api_key, results=20):
    '''youtube_crawling했던 DataFrame에 comments 열 추가하기
    
    youtube_df : 크롤링했던 DataFrame
    
    api_key : youtube api에서 발급받은 key
    
    results : 최대 결과 개수, 최소 1개 ~ 최대 100개, default : 20
    '''
    
    youtube = build('youtube', 'v3', developerKey=api_key)
    
    video_id_list = youtube_df['video_id']
    
    comments_list = [[get_comments(video_id, youtube, max_results=results)]
                    for video_id in video_id_list]

    comments_df = pd.DataFrame(comments_list, columns=['comments'])
    
    return pd.concat([youtube_df, comments_df], axis=1)

In [4]:
channel_a = pd.read_csv('youtube_crawling_0801_0807/csv_file/channel_a_video_info_0801_0807.csv', index_col=0)
jtbc = pd.read_csv('youtube_crawling_0801_0807/csv_file/jtbc_video_info_0801_0807.csv', index_col=0)
kbs = pd.read_csv('youtube_crawling_0801_0807/csv_file/kbs_video_info_0801_0807.csv', index_col=0)
mbc = pd.read_csv('youtube_crawling_0801_0807/csv_file/mbc_video_info_0801_0807.csv', index_col=0)
mbn = pd.read_csv('youtube_crawling_0801_0807/csv_file/mbn_video_info_0801_0807.csv', index_col=0)
sbs = pd.read_csv('youtube_crawling_0801_0807/csv_file/sbs_video_info_0801_0807.csv', index_col=0)
yoenhab = pd.read_csv('youtube_crawling_0801_0807/csv_file/yoenhab_video_info_0801_0807.csv', index_col=0)
ytn = pd.read_csv('youtube_crawling_0801_0807/csv_file/ytn_video_info_0801_0807.csv', index_col=0)

In [5]:
my_api_key = 'AIzaSyAGPpzlcDacxNGCoffrH3eK7sOWOEIMUtg'

In [7]:
channel_a_comments = add_comments_df(youtube_df=channel_a,
                                     api_key=my_api_key,
                                     results=20)
channel_a_comments.to_csv('youtube_crawling_0801_0807/add_comments_csv_file/channel_a_0801_0807.csv')

In [6]:
jtbc_comments = add_comments_df(youtube_df=jtbc,
                                api_key=my_api_key,
                                results=20)
jtbc_comments.to_csv('youtube_crawling_0801_0807/add_comments_csv_file/jtbc_0801_0807.csv')

In [8]:
kbs_comments = add_comments_df(youtube_df=kbs,
                               api_key=my_api_key,
                               results=20)
kbs_comments.to_csv('youtube_crawling_0801_0807/add_comments_csv_file/kbs_0801_0807.csv')

In [9]:
mbc_comments = add_comments_df(youtube_df=mbc,
                               api_key=my_api_key,
                               results=20)
mbc_comments.to_csv('youtube_crawling_0801_0807/add_comments_csv_file/mbc_0801_0807.csv')

In [10]:
mbn_comments = add_comments_df(youtube_df=mbn,
                               api_key=my_api_key,
                               results=20)
mbn_comments.to_csv('youtube_crawling_0801_0807/add_comments_csv_file/mbn_0801_0807.csv')

In [11]:
sbs_comments = add_comments_df(youtube_df=sbs,
                               api_key=my_api_key,
                               results=20)
sbs_comments.to_csv('youtube_crawling_0801_0807/add_comments_csv_file/sbs_0801_0807.csv')

In [12]:
yoenhab_comments = add_comments_df(youtube_df=yoenhab,
                                   api_key=my_api_key,
                                   results=20)
yoenhab_comments.to_csv('youtube_crawling_0801_0807/add_comments_csv_file/yoenhab_0801_0807.csv')

In [13]:
ytn_comments = add_comments_df(youtube_df=ytn,
                               api_key=my_api_key,
                               results=20)
ytn_comments.to_csv('youtube_crawling_0801_0807/add_comments_csv_file/ytn_0801_0807.csv')