In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
import googleapiclient.discovery
import pandas as pd

In [2]:
# create a youtube service object
api_service_name = "youtube"
api_version = "v3"
API_KEY = os.environ.get("API_KEY")

youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=API_KEY)

# Video Ids

In [3]:
def get_video_ids(youtube_service_object, channel_id: str, published_after, published_before, search_term: str = None):
    """Connects to the YouTube Data API using 'search' and returns video Ids for a specified request.

    
    Parameters
    ----------
    service_object : googleapiclient object
        a service object created using `googleapiclinet.discovery.build`

    channel_Id : str
        The id of the YouTube channel you want to search for videos.

    published_after : datetime
        an RFC 3339 formatted date-time value (1970-01-01T00:00:00Z).

    published_before : datetime
        an RFC 3339 formatted date-time value (1970-01-01T00:00:00Z).

    query : str
        A search term if you wish to narrow down the search using keywords. See Notes for 
        further information.



    Returns
    --------
    video_ids : list
        A list of the videoId values obtained from the request.

    
    Notes
    ------
    query : str
        Your request can also use the Boolean NOT (-) and OR (|) operators to exclude videos or 
        to find videos that are associated with one of several search terms. For example, to 
        search for videos matching either "boating" or "sailing", set the q parameter value to 
        boating|sailing. Similarly, to search for videos matching either "boating" or "sailing" 
        but not "fishing", set the q parameter value to boating|sailing -fishing. Note that the 
        pipe character must be URL-escaped when it is sent in your API request. The URL-escaped 
        value for the pipe character is %7C.


    References
    -----------
        https://developers.google.com/youtube/v3/docs/search
    
    """
    
    # make a request to the youtube api
    request = youtube_service_object.search().list(
        channelId=channel_id,
        publishedAfter=published_after,
        publishedBefore=published_before,
        q=search_term,
        part="snippet", 
        type="video",
        order="date",
        maxResults=50,
    )
    response = request.execute()
    
    video_ids = []

    # loop through response and store video Ids in a list
    for i, v in enumerate(range(len(response["items"]))):
        video_ids.append(response["items"][i]["id"]["videoId"])
    
    next_page_token = response.get("nextPageToken", None)
    more_pages = True
    
    while more_pages == True:
        if next_page_token is None:
            more_pages = False
    
        else: 
            # make a request to the youtube api to get the next page results
            request = youtube_service_object.search().list(
            channelId=channel_id,
            publishedAfter=published_after,
            publishedBefore=published_before,
            q=search_term,
            part="snippet", 
            type="video",
            order="date",
            maxResults=50,
            pageToken=next_page_token
            )
            response = request.execute()
    
            for i, v in enumerate(range(len(response["items"]))):
                video_ids.append(response["items"][i]["id"]["videoId"])
            
            next_page_token = response.get("nextPageToken", None)
    
    return video_ids

# Video data from a given video ID

In [10]:
def get_video_data(video_ids):
    """Retrieves statistics for a given YouTube video ID.

    Parameters
    ----------
    video_ids : list or str
        A list of video IDs or a single string if only wanting to return data for one video ID.

    Returns
    --------
    df_ : dataframe
        A dataframe with the data for each video ID.
    
    """

    request = youtube.videos().list(
        part="snippet,statistics",
        maxResults=50,
        id=video_ids
        )
    
    response = request.execute()
    
    data_dict = {}
    
    # loop through response and store data about each video in a dictionary
    for i, v in enumerate(response["items"]):
        data_dict_ = {
            response["items"][i]["id"]: {"channelTitle": response["items"][i]["snippet"]["channelTitle"],
                                         "channelId": response["items"][i]["snippet"]["channelId"],
                                         "videoId": response["items"][i]["id"],
                                         "publishedAt": response["items"][i]["snippet"]["publishedAt"],
                                         "title": response["items"][i]["snippet"]["title"],
                                         "description": response["items"][i]["snippet"]["description"],
                                         "tags": response["items"][i]["snippet"]["tags"],
                                         "viewCount": response["items"][i]["statistics"]["viewCount"],
                                         "likeCount": response["items"][i]["statistics"]["likeCount"],
                                         "commentCount": response["items"][i]["statistics"]["commentCount"],
                                         "favoriteCount": response["items"][i]["statistics"]["favoriteCount"],
                                        }
        }
        
        # add the data to the data dictionary
        data_dict.update(data_dict_)
    
    next_page_token = response.get("nextPageToken", None)
    more_pages = True
    
    while more_pages == True:
        if next_page_token is None:
            more_pages = False
    
        else: 
            # make a request to the youtube api to get the next page results   
            request = youtube.videos().list(
                part="snippet,statistics",
                maxResults=50,
                id=video_ids,
                pageToken=next_page_token,
            )
            
            response = request.execute()
            
            # loop through response and store data about each video in a dictionary
            for i, v in enumerate(response["items"]):
                data_dict_ = {
                    response["items"][i]["id"]: {"channelTitle": response["items"][i]["snippet"]["channelTitle"],
                                                 "channelId": response["items"][i]["snippet"]["channelId"],
                                                 "videoId": response["items"][i]["id"],
                                                 "publishedAt": response["items"][i]["snippet"]["publishedAt"],
                                                 "title": response["items"][i]["snippet"]["title"],
                                                 "description": response["items"][i]["snippet"]["description"],
                                                 "tags": response["items"][i]["snippet"]["tags"],
                                                 "viewCount": response["items"][i]["statistics"]["viewCount"],
                                                 "likeCount": response["items"][i]["statistics"]["likeCount"],
                                                 "commentCount": response["items"][i]["statistics"]["commentCount"],
                                                 "favoriteCount": response["items"][i]["statistics"]["favoriteCount"],
                                                }
                }
                
                # add the data to the data dictionary
                data_dict.update(data_dict_)
            
            next_page_token = response.get("nextPageToken", None)

    
    # create dataframe from dictionary data
    df = (pd.DataFrame.from_dict(data_dict, orient="index")
          .rename_axis("videoId")
          .astype({"publishedAt": "datetime64[ns, UTC]", 
                           "viewCount": "int64", 
                           "likeCount": "int64", 
                           "commentCount": "int64", 
                           "favoriteCount": "int64"})
          .sort_values(by=["publishedAt"])
          .reset_index(drop=True)
         )

    return df



# Test functions

In [4]:
# set channel and search details
bandai_namco_america_id = "UC_ntXHv-XdKCD7CPynVvnQw"
published_after = "2023-06-01T00:00:00Z"
published_before = "2023-12-21T00:00:00Z"
search_term = "tekken"

In [None]:
# create a youtube service object
api_service_name = "youtube"
api_version = "v3"
API_KEY = os.environ.get("API_KEY")

youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=API_KEY)

## Get channel data

In [None]:
# make a request to the youtube api
request_channel_data = youtube.channels().list(
    part = "snippet,statistics,contentDetails", 
    # forUsername = "BandaiNamcoAmerica",
    id = "UC_ntXHv-XdKCD7CPynVvnQw"
)

response = request_channel_data.execute()
response

In [None]:
response['items'][0]['id']

In [None]:
response['items'][0]['statistics']['subscriberCount']

## Get video Ids

In [5]:
video_ids = get_video_ids(youtube_service_object=youtube, 
                          channel_id=bandai_namco_america_id,
                         published_after=published_after,
                         published_before=published_before,
                         search_term=search_term)

## Get video data

In [11]:
video_ids_test = ['UgnPG2bScVQ', '9jJiNa4HoD0','EMZkmjE8wdw','ToKJfywbe1o']
df = get_video_data(video_ids=video_ids_test)
df.head()

Unnamed: 0,channelTitle,channelId,videoId,publishedAt,title,description,tags,viewCount,likeCount,commentCount,favoriteCount
0,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,ToKJfywbe1o,2023-12-14 14:10:00+00:00,TEKKEN 8 – Official Story Trailer,Witness true power. Experience the Mishima sag...,"[Bandai Namco, Bandai Namco Entertainment, Vid...",996110,41541,3920,0
1,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,EMZkmjE8wdw,2023-12-14 14:30:05+00:00,JUJUTSU KAISEN CURSED CLASH – Gojo Satoru Teac...,Satoru Gojo is here with a special lecture! Ta...,"[Bandai Namco, Bandai Namco Entertainment, Vid...",135946,5300,381,0
2,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,9jJiNa4HoD0,2023-12-19 14:00:08+00:00,TEKKEN 8 – Ultimate Edition Trailer,Ultimate style. Experience everything TEKKEN 8...,"[Bandai Namco, Bandai Namco Entertainment, Vid...",314749,8688,1145,0
3,Bandai Namco Entertainment America,UC_ntXHv-XdKCD7CPynVvnQw,UgnPG2bScVQ,2023-12-20 14:00:19+00:00,TEKKEN 8 – Shaheen Reveal & Gameplay Trailer,The Desert Falcon is ready to ruffle some feat...,"[Bandai Namco, Bandai Namco Entertainment, Vid...",480774,21602,3375,0


## Get comments