In [3]:
#Import Library Google API & Tools
from googleapiclient.discovery import build
from dateutil import parser
import pandas as pd
from IPython.display import JSON
import json

#Data Visualisasion 
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

#Other Library
import re

In [4]:
jsonFile = open('./credential.json','r');
data = json.load(jsonFile);
tokenAPI = data['youtubeToken'];
channelId = data['channelId'];
jsonFile.close()

In [5]:
#Variable Credentials
apiYoutube = tokenAPI
idYoutube = channelId
serviceName = 'youtube'
apiVersion = 'v3'

In [6]:
#Create API Youtube Channel
youtube = build(serviceName,apiVersion,developerKey=apiYoutube);

In [7]:
# Function To Get Data API Youtube
def get_channel_stats(youtube, channel_ids):
    """
    Get channel statistics: title, subscriber count, view count, video count, upload playlist
    Params:

    youtube: the build object from googleapiclient.discovery
    channels_ids: list of channel IDs

    Returns:
    Dataframe containing the channel statistics for all channels in the provided list: title, subscriber count, view count, video count, upload playlist

    """
    all_data = []
    request = youtube.channels().list(
        part='snippet,contentDetails,statistics',
        id=','.join(channel_ids))
    response = request.execute()

    for i in range(len(response['items'])):
        data = dict(channelName=response['items'][i]['snippet']['title'],
                    subscribers=response['items'][i]['statistics']['subscriberCount'],
                    views=response['items'][i]['statistics']['viewCount'],
                    totalVideos=response['items'][i]['statistics']['videoCount'],
                    playlistId=response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
        all_data.append(data)

    return pd.DataFrame(all_data)


def get_video_ids(youtube, playlist_id):
    """
    Get list of video IDs of all videos in the given playlist
    Params:

    youtube: the build object from googleapiclient.discovery
    playlist_id: playlist ID of the channel

    Returns:
    List of video IDs of all videos in the playlist

    """

    request = youtube.playlistItems().list(
        part='contentDetails',
        playlistId=playlist_id,
        maxResults=50)
    response = request.execute()

    video_ids = []

    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])

    next_page_token = response.get('nextPageToken')
    more_pages = True

    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId=playlist_id,
                maxResults=50,
                pageToken=next_page_token)
            response = request.execute()

            for i in range(len(response['items'])):
                video_ids.append(response['items'][i]
                                 ['contentDetails']['videoId'])

            next_page_token = response.get('nextPageToken')

    return video_ids


def get_video_details(youtube, video_ids):
    """
    Get video statistics of all videos with given IDs
    Params:

    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs

    Returns:
    Dataframe with statistics of videos, i.e.:
        'channelTitle', 'title', 'description', 'tags', 'publishedAt'
        'viewCount', 'likeCount', 'favoriteCount', 'commentCount'
        'duration', 'definition', 'caption'
    """

    all_video_info = []

    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute()

        for video in response['items']:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                             'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                             'contentDetails': ['duration', 'definition', 'caption']
                             }
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)

    return pd.DataFrame(all_video_info)


def get_comments_in_videos(youtube, video_ids):
    """
    Get top level comments as text from all videos with given IDs (only the first 10 comments due to quote limit of Youtube API)
    Params:

    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs

    Returns:
    Dataframe with video IDs and associated top level comment in text.

    """
    all_comments = []

    for video_id in video_ids:
        try:
            request = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id
            )
            response = request.execute()

            comments_in_video = [comment['snippet']['topLevelComment']
                                 ['snippet']['textOriginal'] for comment in response['items'][0:10]]
            comments_in_video_info = {
                'video_id': video_id, 'comments': comments_in_video}

            all_comments.append(comments_in_video_info)

        except:
            # When error occurs - most likely because comments are disabled on a video
            print('Could not get comments for video ' + video_id)

    return pd.DataFrame(all_comments)


def YTDurationToSeconds(duration):
    match = re.match('PT(\d+H)?(\d+M)?(\d+S)?', duration).groups()
    hours = _js_parseInt(match[0]) if match[0] else 0
    minutes = _js_parseInt(match[1]) if match[1] else 0
    seconds = _js_parseInt(match[2]) if match[2] else 0
    return hours * 3600 + minutes * 60 + seconds


def YTDurationToSecondsArray(duration):

    answer = []

    for duration in duration:
        match = re.match('PT(\d+H)?(\d+M)?(\d+S)?', duration).groups()
        hours = _js_parseInt(match[0]) if match[0] else 0
        minutes = _js_parseInt(match[1]) if match[1] else 0
        seconds = _js_parseInt(match[2]) if match[2] else 0
        answer.append(hours * 3600 + minutes * 60 + seconds)

    return pd.DataFrame(answer)

# js-like parseInt
# https://gist.github.com/douglasmiranda/2174255


def _js_parseInt(string):
    return int(''.join([x for x in string if x.isdigit()]))


In [8]:
#Showing Stats Data Channel Youtube
channelStats = get_channel_stats(youtube,idYoutube);
channelStats

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,Caveine,67400,12316207,148,UUpw2MGdcV_a-QyDzXTLitDg
1,Dea Afrizal,224000,14640892,354,UUU7YluxOYon-yofPxfGHVog


In [9]:
#Playlist ID
playlistId = 'PL_mdEMo1RZrsiHbUe43drlb_92i9L8BuQ'
videoId = get_video_ids(youtube,playlistId);
videoId

['Zc_-8sTJzZs',
 'C3onJ0TL-Yc',
 'vnM76nvlAsM',
 'WBpQCvOG2Zc',
 'Hyhwj_PrTmk',
 '-wPL0t_XrXs',
 'ZMcAPdY1sfU',
 'W0XHZawX7Jc',
 'RJRWzF9Ad2k',
 'KIhu7wVSdbA',
 'nABipurmgqU',
 'xROQiH_LI7s',
 'YHqei2Ca-eY',
 'fW2eqaiG3Lk',
 '87nTobKODFA',
 'vBkrnbt4Cmo',
 'ZaKVyEnR9og']

In [10]:
#Get Detail Video from Video ID
videoYoutubeDetail = get_video_details(youtube,videoId);
# videoYoutubeDetail.to_csv(r'data.csv')
videoYoutubeDetail

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption
0,Zc_-8sTJzZs,Papilo Gaming,Katanya Most Wanted tuh seru ya...,Gameplay: Need for Speed Most Wanted [Episode ...,"[NEED FOR SPEED, MOST WANTED, GAME, PS2, WINDA...",2021-02-23T12:49:48Z,260290,9464,,1773,PT9M44S,hd,False
1,C3onJ0TL-Yc,Papilo Gaming,Polisinya jadi susah gini cuy....,Gameplay: Need for Speed Most Wanted [Episode ...,"[NEED FOR SPEED, MOST WANTED, GAME, PS2, WINDA...",2021-03-18T12:45:43Z,248375,8725,,1566,PT8M39S,hd,False
2,vnM76nvlAsM,Papilo Gaming,Lexus gua udah sangar nih boss!!,Gameplay: Need for Speed Most Wanted [Episode ...,"[NEED FOR SPEED, MOST WANTED, GAME, PS2, WINDA...",2021-03-27T12:46:38Z,253527,8665,,1391,PT8M58S,hd,False
3,WBpQCvOG2Zc,Papilo Gaming,belajar main NFS Most Wanted dari komen kalian,Gameplay: Need for Speed Most Wanted [Episode ...,"[NEED FOR SPEED, MOST WANTED, GAME, PS2, WINDA...",2021-04-05T11:36:11Z,178801,7230,,1303,PT8M13S,hd,False
4,Hyhwj_PrTmk,Papilo Gaming,di seruduk pembalap UFC gw...,Gameplay: Need for Speed Most Wanted [Episode ...,"[NEED FOR SPEED, MOST WANTED, GAME, PS2, WINDA...",2021-04-13T09:52:02Z,228758,7845,,1097,PT9M1S,hd,False
5,-wPL0t_XrXs,Papilo Gaming,boss yang bikin ngantuk saat mengemudi wkwk,Gameplay: Need for Speed Most Wanted [Episode ...,"[NEED FOR SPEED, MOST WANTED, GAME, PS2, WINDA...",2021-05-05T10:25:54Z,231104,8792,,1354,PT8M12S,hd,False
6,ZMcAPdY1sfU,Papilo Gaming,boss paling bergaya di most wanted,Gameplay: Need for Speed Most Wanted [Episode ...,"[NEED FOR SPEED, MOST WANTED, GAME, PS2, WINDA...",2021-05-22T11:43:28Z,274361,9273,,1258,PT8M38S,hd,False
7,W0XHZawX7Jc,Papilo Gaming,bro ('_')??,Gameplay: Need for Speed Most Wanted [Episode ...,"[NEED FOR SPEED, MOST WANTED, GAME, PS2, WINDA...",2021-06-07T10:45:14Z,262728,9692,,1213,PT8M43S,hd,False
8,RJRWzF9Ad2k,Papilo Gaming,Efek kelamaan gak main nih game...,Gameplay: Need for Speed Most Wanted [Episode ...,"[NEED FOR SPEED, MOST WANTED, GAME, PS2, WINDA...",2021-07-16T12:26:56Z,313282,11093,,1253,PT9M19S,hd,False
9,KIhu7wVSdbA,Papilo Gaming,Ada yang tau ini kenapa?,Gameplay: Need for Speed Most Wanted [Episode ...,"[NEED FOR SPEED, MOST WANTED, GAME, PS2, WINDA...",2021-08-14T11:42:14Z,103491,5913,,942,PT2M33S,hd,False


In [11]:
#Get Comment From Videos
videoComments = get_comments_in_videos(youtube,videoId)
videoComments

Unnamed: 0,video_id,comments
0,Zc_-8sTJzZs,[gua beneran baru pertama kali mainin nih game...
1,C3onJ0TL-Yc,[wkwkwk lexus gua sampe dihujat dimana mana do...
2,vnM76nvlAsM,[wkwk gua bukan tipe player yang bisa main gam...
3,WBpQCvOG2Zc,[hmm halo guys jadi karena banyak dari kalian ...
4,Hyhwj_PrTmk,[ez wkwk ~(˘▾˘~)\nyuk yang mau ngobrolin hal r...
5,-wPL0t_XrXs,[butuh musuh yang lebih susah asli wkwk yang i...
6,ZMcAPdY1sfU,[weehhehehe gak kerasa udah 2 hari gak upload ...
7,W0XHZawX7Jc,"[hmm jadi gini ges, karena instagram gua udah ..."
8,RJRWzF9Ad2k,"[tamatnya masih lama ternyata ya wkwk, Hadeh i..."
9,KIhu7wVSdbA,"[wkwk bantu gw ges :""D\n\nUpdate: masalahnya u..."


In [12]:
#Convert Duration Youtube to second integer
youtubeDuration = YTDurationToSecondsArray(videoYoutubeDetail['duration'])

data = videoYoutubeDetail.assign(second=youtubeDuration)

df = data[['title','second']]


sortData = df.sort_values(by='second')

print(sortData.to_string(index=False))


                                         title  second
                      Ada yang tau ini kenapa?     153
   boss yang bikin ngantuk saat mengemudi wkwk     492
belajar main NFS Most Wanted dari komen kalian     493
  Boss paling kasian yang pernah gw lawan wkwk     500
                  Curang adalah jalan ninjaku!     516
            boss paling bergaya di most wanted     518
             Polisinya jadi susah gini cuy....     519
                                   bro ('_')??     523
              Lexus gua udah sangar nih boss!!     538
                 di seruduk pembalap UFC gw...     541
                   Mobilnya gw bikin "Ming"kem     555
                walau ngebug tetep menang bro!     556
            Efek kelamaan gak main nih game...     559
            Katanya Most Wanted tuh seru ya...     584
                 Ngepush sampe ke blacklist 2!     586
Udah hampir setahun belom juga tamat (´ ∀ ` *)     645
                            Balikin mobil gua!     669


In [13]:
print("Video Dengan Durasi Terlama Di Playlist")
df.max()


Video Dengan Durasi Terlama Di Playlist


title     walau ngebug tetep menang bro!
second                               669
dtype: object

In [14]:

print("Video Dengan Durasi Terlama Di Playlist")
df.min()

Video Dengan Durasi Terlama Di Playlist


title     Ada yang tau ini kenapa?
second                         153
dtype: object

In [None]:
print("Testing");