In [1]:
from googleapiclient.discovery import build
import pandas as pd
import seaborn as sns
import itertools
import warnings
warnings.filterwarnings("ignore")
import sys
import os
from pathlib import Path

user_home_path = str(Path.home())
sys.path.append(f"{user_home_path}/OrbitNext/api-data-download")


API_KEY_3 = 'AIzaSyC_G34pzth_j1dnilV93Dy1VnXCrEAbZqg'

youtube_builder = build('youtube', 'v3', developerKey=API_KEY_3)

example_channel = [
    ("UCBJycsmduvYEL83R_U4JriQ", "MKBHD"), 
    ("UCVYamHliCI9rw1tHR1xbkfw", "Dave2D"),
]

In [2]:
playist_id_df = pd.read_csv(f'{user_home_path}/OrbitNext/api-data-download/data_center/public_stats/playist_id/playist_id.csv')
playist_id_df = playist_id_df.loc[:, ~playist_id_df.columns.str.contains('^Unnamed')]

In [3]:
playist_id_df

Unnamed: 0,channel_name,playlist_id
0,Marques Brownlee,UUBJycsmduvYEL83R_U4JriQ
1,Dave2D,UUVYamHliCI9rw1tHR1xbkfw


In [4]:
len(playist_id_df) 

2

In [16]:
# func

def get_channel_stats(youtube_builder, channel_pool):

    requests = youtube_builder.channels().list(
        part = 'snippet, contentDetails, statistics',
        id = ','.join(channel_pool)
    )
    response = requests.execute()
    all_channel_data = []
    all_playlist_id_data = []
    all_description_data = []
    for i in range(len(response['items'])):
        # channel_data
        channel_data = dict(
            channel_name = response['items'][i]['snippet']['title'],
            subscribers = response['items'][i]['statistics']['subscriberCount'],
            view_count = response['items'][i]['statistics']['viewCount'],
            video_count = response['items'][i]['statistics']['videoCount'],
            playlist_id = response['items'][i]['contentDetails']['relatedPlaylists']['uploads'],
            etag =  response['items'][i]['etag'],
            publish_date = response['items'][i]['snippet']['publishedAt'],)
        
        # playlist_id_data
        playlist_id_data = dict(
            channel_name = response['items'][i]['snippet']['title'],
            playlist_id = response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
        
        # description_data
        description_data = dict(
            channel_name = response['items'][i]['snippet']['title'],              
            description = response['items'][i]['snippet']['description'])
        
        all_channel_data.append(channel_data)
        all_channel_data = pd.DataFrame(all_channel_data)
        all_channel_data['subscribers'] = pd.to_numeric(all_channel_data['subscribers'])
        all_channel_data['view_count'] = pd.to_numeric(all_channel_data['view_count'])
        all_channel_data['publish_date'] = pd.to_datetime(all_channel_data['publish_date'])# maybe just need yy-mm-dd
        
        all_playlist_id_data.append(playlist_id_data)
        all_playlist_id_data = pd.DataFrame(all_playlist_id_data)
        
        all_description_data.append(description_data)
        all_description_data = pd.DataFrame(all_description_data)
        
    return all_channel_data, all_description_data, all_playlist_id_data


def get_video_ids(youtube, playlist_id):

    requests = youtube_builder.playlistItems().list(
        part = "contentDetails", 
        playlistId = playlist_id,
        maxResults = 50
    )
    response = requests.execute()
    video_ids = []
    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])
    next_page_token = response.get('nextPageToken')
    more_pages = True

    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            requests = youtube_builder.playlistItems().list(
                    part = "contentDetails", 
                    playlistId = playlist_id,
                    maxResults = 50,
                    pageToken = next_page_token
                )
            response = requests.execute()

            for i in range(len(response['items'])):
                video_ids.append(response['items'][i]['contentDetails']['videoId'])
            next_page_token = response.get('nextPageToken')

    return video_ids

def get_video_details_in_json(youtube, video_ids):

    requests = youtube_builder.videos().list(
        part = 'snippet, statistics',
        id =  ','.join(video_ids[:50])
    )
    response = requests.execute()

    return response

def get_video_stats(youtube, video_ids):

    all_video_stats = []
    all_video_tag = []
    for i in range(0, len(video_ids), 50):
        requests = youtube_builder.videos().list(
            part = 'snippet, statistics',
            id = ','.join(video_ids[i:i+50])
        )
        response = requests.execute()
        for video in response['items']:
            video_stats = dict(
                title = video['snippet']['title'],
                published_date = video['snippet']['publishedAt'],
                channel_id = video['snippet']['channelId'],
                video_stats = video['statistics'], # first put it into 1 dict
                description = video['snippet']['description'],

#                 video_stats -> bug to-fix... shd work like below but dun know why...
#                 video_view_count = video['statistics']['viewCount'],
#                 video_like_count = video['statistics']['likeCount'],
#                 video_favorite_count = video['statistics']['favoriteCount'],
#                 video_comment_count = video['statistics']['commentCount'],
            )
            # tag shd also work??
            # video_tag = dict(tags = video["tags"])
            all_video_stats.append(video_stats)
            all_video_stats_df = pd.DataFrame(all_video_stats)
            all_video_stats_df['published_date'] = pd.to_datetime(all_video_stats_df['published_date']).dt.date

    return all_video_stats_df

In [6]:
# single youtuber example
mkbhd_video_ids = get_video_ids(youtube_builder, playlist_id='UUBJycsmduvYEL83R_U4JriQ')
mkbhd_video_ids

['d7y9z7pjCRM',
 'yQThMUn_UmM',
 'MEiq0oCUb_8',
 'JuWF9G6SSi0',
 'ErMwWXQxHp0',
 '4SHlY9AoXzc',
 'AxV0_1Y4zl0',
 'DFgYGBtJLnI',
 'UsD87v8F914',
 'HeMIZC2rkMo',
 'Jb88eui8SqQ',
 'WfVF-Ec4naQ',
 'lYPe4MsALk4',
 'dKq_xfCz3Jk',
 'cCCBDhdcSGw',
 'pSdKnNj7ozk',
 'CkoquiSnqbk',
 'mrkAmmMakMg',
 'xcjZvAFBH_Y',
 'MjGMeMrH73I',
 '4U2ZxO7b8iM',
 'MUU0BjJjAvk',
 'yCBEumeXY4A',
 '6CsJZxfZsL0',
 'U3DNz5asasA',
 'ZQ4E0JJcxz0',
 '7jaMJGtAV9M',
 'q72dA533sCg',
 'MiTG1ride7s',
 'dQrBgda0sEY',
 '1Cw-vODp-8Y',
 'YJGFjL7E3vU',
 'a6zvvlrd-jw',
 'UpqaQR4ikig',
 'yoigsHYc77s',
 'wvhP1al3Ur4',
 'pwHNannxolo',
 'QpbGctuHoMY',
 'Y5iif7YskU4',
 'CMm7UZKtGNk',
 '-EZ_3Tq9a8c',
 'bTYV7aFC6KE',
 'vXIAB_1FEC0',
 'qWIkBMNKj1s',
 'k_OOiuaZSKc',
 'ldadR6XqiWE',
 'vax8FCuQUsE',
 'L-BN9Db5QhY',
 '7IaYSxDp88s',
 'BuaKzm7Kq9Q',
 '-njHjebtIg4',
 '-pTGc7cIBIA',
 'zV7y4Oex1DQ',
 '0Jc5Ryk2mCw',
 'n4xw2fmSCrs',
 'fBHq7U8hLp8',
 'IDcyXtweHCw',
 'SOq05_6w0ig',
 'dp4nWm59esI',
 '_VAlGmtfDN0',
 'MZ8giCWDcyE',
 'gkmzDwgvqQM',
 'JnGXTk

In [17]:
# single youtuber example
get_video_stats(youtube_builder, mkbhd_video_ids)

Unnamed: 0,title,published_date,channel_id,video_stats,description
0,The Hyundai IONIQ 5: I Get It Now!,2022-08-25,UCBJycsmduvYEL83R_U4JriQ,"{'viewCount': '1430410', 'likeCount': '72582',...","If the future of cars is electric, we need mor..."
1,Android 13 Hands-On: Top 5 Features!,2022-08-16,UCBJycsmduvYEL83R_U4JriQ,"{'viewCount': '2961712', 'likeCount': '137452'...",The 5 best features in the newest Android 13 U...
2,Dope Tech: The Most Extreme Gaming Monitor!,2022-08-15,UCBJycsmduvYEL83R_U4JriQ,"{'viewCount': '2428473', 'likeCount': '119859'...","Dope Tech is back! Transparent batteries, mass..."
3,Samsung Z Fold 4/ Flip 4 Impressions + Watch 5...,2022-08-10,UCBJycsmduvYEL83R_U4JriQ,"{'viewCount': '3166866', 'likeCount': '128356'...","A hands-on with Samsung's new Z Fold 4, Z Flip..."
4,Best Back to School Tech 2022!,2022-08-08,UCBJycsmduvYEL83R_U4JriQ,"{'viewCount': '1887820', 'likeCount': '97581',...",Thanks to @Best Buy for sponsoring today's vid...
...,...,...,...,...,...
1455,Fraps HD Test in 1080p (18 WOS),2009-01-01,UCBJycsmduvYEL83R_U4JriQ,"{'viewCount': '133973', 'likeCount': '1636', '...","***WATCH IN HD***\r\n\r\nAlso, check out my vi..."
1456,HP Pavilion dv7t Media Center Remote Overview,2009-01-01,UCBJycsmduvYEL83R_U4JriQ,"{'viewCount': '4686155', 'likeCount': '235834'...",This is my short review of the Media Center Re...
1457,High fps LG Voyager footage,2008-09-16,UCBJycsmduvYEL83R_U4JriQ,"{'viewCount': '185229', 'likeCount': '2732', '...",This video was uploaded directly from my LG Vo...
1458,14 Year knock-down shot (11 Handicap),2008-05-30,UCBJycsmduvYEL83R_U4JriQ,"{'viewCount': '179988', 'likeCount': '2288', '...","Me hitting a knock-down shot with a six iron, ..."


#### let try all channel in example pool

In [8]:
# correct

all_videoId = []
for playlist_id in playist_id_df['playlist_id']:
    video_ids = get_video_ids(youtube_builder, playlist_id=playlist_id)
    all_videoId.append(video_ids)


In [9]:
len(all_videoId) # we use 2 youtuber as example

2

In [10]:
all_videoId_df = pd.DataFrame(all_videoId)
all_videoId_df.T

# dataframe might not be a good idea...

Unnamed: 0,0,1
0,d7y9z7pjCRM,nhOnLlshf60
1,yQThMUn_UmM,QEo7b36mOcM
2,MEiq0oCUb_8,BnXRS18K7gc
3,JuWF9G6SSi0,fu_M9EmP_gk
4,ErMwWXQxHp0,J_0teOgTAhQ
...,...,...
1455,hCEBLSSU-2Y,
1456,9gk_rl3y_SU,
1457,f7mqssf66l4,
1458,PD5igzFB8iY,


In [19]:
all_videos = []
all_videos_details = []

for i in all_videoId:
    get_video_details(youtube_builder, i)
    all_videos.append(i)
    for video_id in all_videos:
        all_video_stats_df = get_video_stats(youtube_builder, video_id)
        all_videos_details.append(video_id)
        

# len(all_videos)

In [20]:
all_videos

[['d7y9z7pjCRM',
  'yQThMUn_UmM',
  'MEiq0oCUb_8',
  'JuWF9G6SSi0',
  'ErMwWXQxHp0',
  '4SHlY9AoXzc',
  'AxV0_1Y4zl0',
  'DFgYGBtJLnI',
  'UsD87v8F914',
  'HeMIZC2rkMo',
  'Jb88eui8SqQ',
  'WfVF-Ec4naQ',
  'lYPe4MsALk4',
  'dKq_xfCz3Jk',
  'cCCBDhdcSGw',
  'pSdKnNj7ozk',
  'CkoquiSnqbk',
  'mrkAmmMakMg',
  'xcjZvAFBH_Y',
  'MjGMeMrH73I',
  '4U2ZxO7b8iM',
  'MUU0BjJjAvk',
  'yCBEumeXY4A',
  '6CsJZxfZsL0',
  'U3DNz5asasA',
  'ZQ4E0JJcxz0',
  '7jaMJGtAV9M',
  'q72dA533sCg',
  'MiTG1ride7s',
  'dQrBgda0sEY',
  '1Cw-vODp-8Y',
  'YJGFjL7E3vU',
  'a6zvvlrd-jw',
  'UpqaQR4ikig',
  'yoigsHYc77s',
  'wvhP1al3Ur4',
  'pwHNannxolo',
  'QpbGctuHoMY',
  'Y5iif7YskU4',
  'CMm7UZKtGNk',
  '-EZ_3Tq9a8c',
  'bTYV7aFC6KE',
  'vXIAB_1FEC0',
  'qWIkBMNKj1s',
  'k_OOiuaZSKc',
  'ldadR6XqiWE',
  'vax8FCuQUsE',
  'L-BN9Db5QhY',
  '7IaYSxDp88s',
  'BuaKzm7Kq9Q',
  '-njHjebtIg4',
  '-pTGc7cIBIA',
  'zV7y4Oex1DQ',
  '0Jc5Ryk2mCw',
  'n4xw2fmSCrs',
  'fBHq7U8hLp8',
  'IDcyXtweHCw',
  'SOq05_6w0ig',
  'dp4nWm59esI

In [24]:
all_video_list = []
for i in all_videos:
    all_video_list.append(i)

In [25]:
all_video_list

[['d7y9z7pjCRM',
  'yQThMUn_UmM',
  'MEiq0oCUb_8',
  'JuWF9G6SSi0',
  'ErMwWXQxHp0',
  '4SHlY9AoXzc',
  'AxV0_1Y4zl0',
  'DFgYGBtJLnI',
  'UsD87v8F914',
  'HeMIZC2rkMo',
  'Jb88eui8SqQ',
  'WfVF-Ec4naQ',
  'lYPe4MsALk4',
  'dKq_xfCz3Jk',
  'cCCBDhdcSGw',
  'pSdKnNj7ozk',
  'CkoquiSnqbk',
  'mrkAmmMakMg',
  'xcjZvAFBH_Y',
  'MjGMeMrH73I',
  '4U2ZxO7b8iM',
  'MUU0BjJjAvk',
  'yCBEumeXY4A',
  '6CsJZxfZsL0',
  'U3DNz5asasA',
  'ZQ4E0JJcxz0',
  '7jaMJGtAV9M',
  'q72dA533sCg',
  'MiTG1ride7s',
  'dQrBgda0sEY',
  '1Cw-vODp-8Y',
  'YJGFjL7E3vU',
  'a6zvvlrd-jw',
  'UpqaQR4ikig',
  'yoigsHYc77s',
  'wvhP1al3Ur4',
  'pwHNannxolo',
  'QpbGctuHoMY',
  'Y5iif7YskU4',
  'CMm7UZKtGNk',
  '-EZ_3Tq9a8c',
  'bTYV7aFC6KE',
  'vXIAB_1FEC0',
  'qWIkBMNKj1s',
  'k_OOiuaZSKc',
  'ldadR6XqiWE',
  'vax8FCuQUsE',
  'L-BN9Db5QhY',
  '7IaYSxDp88s',
  'BuaKzm7Kq9Q',
  '-njHjebtIg4',
  '-pTGc7cIBIA',
  'zV7y4Oex1DQ',
  '0Jc5Ryk2mCw',
  'n4xw2fmSCrs',
  'fBHq7U8hLp8',
  'IDcyXtweHCw',
  'SOq05_6w0ig',
  'dp4nWm59esI

In [28]:
import itertools

all_video_list = list(itertools.from_iterable(all_video_list))

all_video_list

AttributeError: module 'itertools' has no attribute 'from_iterable'

In [31]:
"""
>>> x = [["a","b"], ["c"]]
>>> [inner
...     for outer in x
...         for inner in outer]
['a', 'b', 'c']

>>> [j for i in x for j in i]
['a', 'b', 'c']

"""
all_video_list = []
for outer in all_videos:
    for inner in outer:
        all_video_list = list(inner)
        

        



In [33]:
all_video_list

['e', '_', 'r', '_', 'q', 'N', 'y', '7', 'b', 'n', 'o']

In [21]:
get_video_stats(youtube_builder, all_videos)

TypeError: sequence item 0: expected str instance, list found