In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
import googleapiclient.discovery
import pandas as pd

In [2]:
# set parameters required to create a youtube service object
api_service_name = "youtube"
api_version = "v3"
API_KEY = os.environ.get("API_KEY")

# create a youtube api service object
youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=API_KEY)

# Video Ids

In [3]:
def get_video_ids(youtube_service_object, channel_id: str, published_after, published_before, search_term: str = None):
    """Connects to the YouTube Data API using 'search' and returns video Ids for a specified request.

    
    Parameters
    ----------
    service_object : googleapiclient object
        a service object created using `googleapiclinet.discovery.build`

    channel_Id : str
        The id of the YouTube channel you want to search for videos.

    published_after : datetime
        an RFC 3339 formatted date-time value (1970-01-01T00:00:00Z).

    published_before : datetime
        an RFC 3339 formatted date-time value (1970-01-01T00:00:00Z).

    query : str
        A search term if you wish to narrow down the search using keywords. See Notes for 
        further information.



    Returns
    --------
    video_ids : list
        A list of the videoId values obtained from the request.

    
    Notes
    ------
    query : str
        Your request can also use the Boolean NOT (-) and OR (|) operators to exclude videos or 
        to find videos that are associated with one of several search terms. For example, to 
        search for videos matching either "boating" or "sailing", set the q parameter value to 
        boating|sailing. Similarly, to search for videos matching either "boating" or "sailing" 
        but not "fishing", set the q parameter value to boating|sailing -fishing. Note that the 
        pipe character must be URL-escaped when it is sent in your API request. The URL-escaped 
        value for the pipe character is %7C.


    References
    -----------
        https://developers.google.com/youtube/v3/docs/search
    
    """
    
    # make a request to the youtube api
    request = youtube_service_object.search().list(
        channelId=channel_id,
        publishedAfter=published_after,
        publishedBefore=published_before,
        q=search_term,
        part="snippet", 
        type="video",
        order="date",
        maxResults=50,
    )
    response = request.execute()
    
    video_ids = []

    # loop through response and store video Ids in a list
    for i, v in enumerate(range(len(response["items"]))):
        video_ids.append(response["items"][i]["id"]["videoId"])
    
    next_page_token = response.get("nextPageToken", None)
    more_pages = True
    
    while more_pages == True:
        if next_page_token is None:
            more_pages = False
    
        else: 
            # make a request to the youtube api to get the next page results
            request = youtube_service_object.search().list(
            channelId=channel_id,
            publishedAfter=published_after,
            publishedBefore=published_before,
            q=search_term,
            part="snippet", 
            type="video",
            order="date",
            maxResults=50,
            pageToken=next_page_token
            )
            response = request.execute()
    
            for i, v in enumerate(range(len(response["items"]))):
                video_ids.append(response["items"][i]["id"]["videoId"])
            
            next_page_token = response.get("nextPageToken", None)
    
    return video_ids

--------------------------

# Test functions

In [5]:
# set channel and search details
bandai_namco_america_id = "UC_ntXHv-XdKCD7CPynVvnQw"
published_after = "2023-06-01T00:00:00Z"
published_before = "2023-12-28T00:00:00Z"
search_term = "tekken"

In [6]:
# set parameters required to create a youtube service object
api_service_name = "youtube"
api_version = "v3"
API_KEY = os.environ.get("API_KEY")

# create a youtube service object
youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=API_KEY)

## Get video Ids

In [7]:
video_ids = get_video_ids(youtube_service_object=youtube, 
                          channel_id=bandai_namco_america_id,
                         published_after=published_after,
                         published_before=published_before,
                         search_term=search_term)

In [8]:
video_ids

['6m7jNzjuoHU',
 'UgnPG2bScVQ',
 '9jJiNa4HoD0',
 'X1dgCe1jDYg',
 'EMZkmjE8wdw',
 'ToKJfywbe1o',
 '9D5vq-zq9y4',
 'y8JGUIF2pu4',
 'oeFfzCWif-Q',
 'bSCANspTDeE',
 'ucesGynb2Yk',
 'UcBcNOSoFzI',
 '8DVlK_QrZ-A',
 'Zc-yMi05vBA',
 'e1N4juHVqNc',
 'QH6s_o3dIic',
 '7skTtnpSb58',
 'bjzYbEjE-C4',
 'weVrUBszFIM',
 'Gw5nQaSF0CI',
 '3pGxqOFmIN4',
 'w0IqzD-gUOI',
 'PsCpewoF2E4',
 'cHnxJplTQuY',
 'qbUnCiTMCGE',
 'cIDK50IaVpg',
 'rDxrpSqYHD8',
 'YFJfLsJtVzM',
 'e7mqbmNb6eA',
 '_q26pgYDOV4',
 'n27RxZ7vnAU',
 'lDgv7CMIoRo',
 'nA-QTZbm_hU',
 'KHSwxMDibvE',
 'sNHv1y46dhs',
 'yowAmloydVY',
 '6hxZCQtpJ9w',
 '-l3AY19cn0M',
 'MxGp3wHXtNE',
 '4UkK_psUEVM',
 'maawo7O9Sg8',
 'Qkoba4YhbGo',
 'N5ZlZSnNyo0',
 'hEQTFXMQU7I',
 'flXHAFNT4sU',
 'RFO9Z_0wrKE',
 'kpmOhBWlDfc',
 '0Tk5YA4WRrg',
 'bMbDVh_OKZg',
 'UPPKjJgQT4A',
 'iILoqJlCa2s',
 'ODforBeu7_c',
 'DcDjyestr4Y',
 'Vs2piSWfofQ',
 '9Xgj0KYypVw',
 'LYdPz_GD8OQ',
 '6JLOD_a7or4',
 'Nh5udl01YXc']

In [9]:
len(video_ids)

58

## Get video data

In [42]:
# video_ids_test = ['UgnPG2bScVQ', '9jJiNa4HoD0','EMZkmjE8wdw','ToKJfywbe1o']
df = get_video_data(youtube_service_object=youtube, video_ids=video_ids)
df.head()

Unnamed: 0,channelTitle,channelId,videoId,publishedAt,title,description,tags,viewCount,likeCount,commentCount,favoriteCount
0,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,e7mqbmNb6eA,2023-10-18 18:00:14+00:00,TEKKEN 8 - PREMIUM COLLECTOR'S EDITION OVERVIEW,Power and destruction. In style.\n\nPre-order ...,"[Bandai Namco, Bandai Namco Entertainment, Vid...",65741,2354,290,0
1,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,rDxrpSqYHD8,2023-11-01 16:09:18+00:00,TEKKEN 8 – THE RETURN OF LEGENDS - NEW CHARACT...,Five legends return in #TEKKEN8 for the next K...,"[Bandai Namco, Bandai Namco Entertainment, Vid...",866119,24575,2808,0
2,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,cIDK50IaVpg,2023-11-02 13:22:49+00:00,TEKKEN 8 – Victor Chevalier Reveal & Gameplay ...,"With him, violence is à la carte.\nVictor Chev...","[Bandai Namco, Bandai Namco Entertainment, Vid...",1319361,42353,7286,0
3,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,PsCpewoF2E4,2023-11-13 05:03:26+00:00,TEKKEN 8 — Reina Reveal & Gameplay Trailer,It's time for them to learn their place.\nRein...,"[Bandai Namco, Bandai Namco Entertainment, Vid...",1879063,57163,7534,0
4,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,QH6s_o3dIic,2023-11-22 14:00:30+00:00,TEKKEN 8 — Leo Reveal & Gameplay Trailer,It's time to punch the truth out of them. Leo ...,"[Bandai Namco, Bandai Namco Entertainment, Vid...",820120,32344,5153,0


In [11]:
request = youtube.videos().list(
    part="snippet,statistics",
    maxResults=50,
    id=video_ids
)

response = request.execute()

In [32]:
response["items"][0]["snippet"].get("tags")

['Bandai Namco',
 'Bandai Namco Entertainment',
 'Video',
 'Games',
 'video games',
 'namco bandai',
 'United States',
 'PS5',
 'PS4',
 'Xbox Series X',
 'tekken',
 'tekken 8',
 'tekken bandai',
 'tekken 8 gameplay',
 'tekken 8 game']

In [17]:
response["items"][0]["snippet"]["tags"]

['Bandai Namco',
 'Bandai Namco Entertainment',
 'Video',
 'Games',
 'video games',
 'namco bandai',
 'United States',
 'PS5',
 'PS4',
 'Xbox Series X',
 'tekken',
 'tekken 8',
 'tekken bandai',
 'tekken 8 gameplay',
 'tekken 8 game']

In [35]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype              
---  ------         --------------  -----              
 0   channelTitle   12 non-null     object             
 1   channelId      12 non-null     object             
 2   videoId        12 non-null     object             
 3   publishedAt    12 non-null     datetime64[ns, UTC]
 4   title          12 non-null     object             
 5   description    12 non-null     object             
 6   tags           12 non-null     object             
 7   viewCount      12 non-null     int64              
 8   likeCount      12 non-null     int64              
 9   commentCount   12 non-null     int64              
 10  favoriteCount  12 non-null     int64              
dtypes: datetime64[ns, UTC](1), int64(4), object(6)
memory usage: 1.2+ KB


In [None]:
# export the video data to a csv
df.to_csv("../data/raw/video_data.csv", index=False)

## Get channel data

In [7]:
# make a request to the youtube api
request_channel_data = youtube.channels().list(
    part = "snippet,statistics,contentDetails", 
    # forUsername = "BandaiNamcoAmerica",
    id = "UC_ntXHv-XdKCD7CPynVvnQw"
)

response = request_channel_data.execute()
response

{'kind': 'youtube#channelListResponse',
 'etag': 't-ljBz4wibkJdndFyooMPL-Cgro',
 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5},
 'items': [{'kind': 'youtube#channel',
   'etag': 'sE8FVbDqKHWy0boDbKZocBNeBKE',
   'id': 'UC_ntXHv-XdKCD7CPynVvnQw',
   'snippet': {'title': 'Bandai Namco Entertainment America',
    'description': 'Fun for all into the future! Tune in right here for new trailers and reveals for your favorite Bandai Namco games!\n\n\n',
    'customUrl': '@bandainamcoamerica',
    'publishedAt': '2006-09-19T17:48:06Z',
    'thumbnails': {'default': {'url': 'https://yt3.ggpht.com/-K3RYA88iYssAO0kKa5DKazS_6mrkBIbgpnHpykrGeROqEIZ18QA7JmD6KPiaXwL_JXNp3hf=s88-c-k-c0x00ffffff-no-rj',
      'width': 88,
      'height': 88},
     'medium': {'url': 'https://yt3.ggpht.com/-K3RYA88iYssAO0kKa5DKazS_6mrkBIbgpnHpykrGeROqEIZ18QA7JmD6KPiaXwL_JXNp3hf=s240-c-k-c0x00ffffff-no-rj',
      'width': 240,
      'height': 240},
     'high': {'url': 'https://yt3.ggpht.com/-K3RYA88iYssAO0kKa5DKaz

In [None]:
response['items'][0]['id']

In [9]:
response['items'][0]['statistics']['subscriberCount']

'1100000'