# [YouTube Data API v3 reference](https://developers.google.com/youtube/v3/docs)
***
# Imports

In [63]:
import os
from dotenv import load_dotenv
import requests
from datetime import datetime
import pandas as pd

***
# Set variables for API request

In [20]:
# Load environment variables from .env file
load_dotenv("yt_api_key.env")

True

In [21]:
api_key = os.getenv("API_KEY")
api_url = "https://www.googleapis.com/youtube/v3/videos"

***
# Make API request

API only allows searching the top 200 videos. Totals 4 API calls, 50 videos per call.

In [108]:
def parse_video_json(video):
    video_id = video["id"]

    publish_datetime = video["snippet"]["publishedAt"]
    channel_id = video["snippet"]["channelId"]
    category_id = video["snippet"]["categoryId"]
    live_broadcast = video["snippet"]["liveBroadcastContent"]

    duration = video["contentDetails"]["duration"]
    licensed_content = video["contentDetails"]["licensedContent"]

    made_for_kids = video["status"]["madeForKids"]

    num_views = video["statistics"]["viewCount"]
    num_likes = video["statistics"].get("likeCount", None) # null safety
    num_comments = video["statistics"].get("commentCount", None) # null safety

    return [video_id, publish_datetime, channel_id, category_id, live_broadcast, duration, licensed_content, made_for_kids, num_views, num_likes, num_comments]    

In [110]:
video_df = pd.DataFrame(columns=["request_datetime", "video_id", "publish_datetime", "channel_id", "category_id", "live_broadcast", "duration", "licensed_content", "made_for_kids", "num_views", "num_likes", "num_comments"])
num_api_calls = 0
pageToken = ""

while pageToken is not None:
    params = {
        "key": api_key,
        "part": "id, snippet, contentDetails, status, statistics",
        "chart": "mostPopular",
        "hl": "en",
        "regionCode": "US",
        "maxResults": 50,
        "pageToken": pageToken
    }
    
    request_datetime = datetime.now()
    num_api_calls += 1
    response = requests.get(api_url, params=params)

    if response.status_code == 200:
        response_json = response.json()

        for video in response_json["items"]:
            # add video details and datetime of request to end of video dataframe
            video_df.loc[len(video_df)] = [request_datetime] + parse_video_json(video)

        # get nextPageToken with null safety
        pageToken = response_json.get("nextPageToken", None)

    else:
        print(f"response.status_code = {response.status_code}")
        pageToken = None
    

In [111]:
video_df

Unnamed: 0,request_datetime,video_id,publish_datetime,channel_id,category_id,live_broadcast,duration,licensed_content,made_for_kids,num_views,num_likes,num_comments
0,2024-04-18 16:22:54.599826,5a09yJU-mCI,2024-04-18T13:59:22Z,UCF9imwPMSGz4Vq1NiTWCC7g,1,none,PT3M21S,True,False,1291459,66209,7501
1,2024-04-18 16:22:54.599826,29ECwExc-_M,2024-04-17T12:58:26Z,UC7vVhkEfw4nOGp8TyDk7RcQ,28,none,PT40S,False,False,2890386,81790,9988
2,2024-04-18 16:22:54.599826,hJiPAJKjUVg,2024-04-18T16:06:41Z,UCjmJDM5pRKbUlVIzDYYWb6g,1,none,PT2M42S,False,False,426177,8589,898
3,2024-04-18 16:22:54.599826,eeUmph4ayg8,2024-04-18T03:00:07Z,UCy0-N2mQ144x4kKZHmoTF0Q,22,none,PT12M18S,True,False,214058,18864,2177
4,2024-04-18 16:22:54.599826,yLc-Wsz8X9o,2024-04-17T16:30:35Z,UCKNbAYjhxVS5Y8p3FhhUxoA,1,none,PT29M43S,True,False,528885,41057,3992
...,...,...,...,...,...,...,...,...,...,...,...,...
195,2024-04-18 16:22:55.781750,JyYQmekPqqI,2024-04-10T20:00:02Z,UCsxpEPfEgqyXC6jaFKQSKVQ,26,none,PT27M55S,True,False,3235261,72610,8231
196,2024-04-18 16:22:55.781750,j6ucGt_Xp14,2024-04-11T13:59:46Z,UC1Myj674wRVXB9I4c6Hm5zA,24,none,PT2M52S,False,False,4954191,26942,1560
197,2024-04-18 16:22:55.781750,u4VwzWu4P8I,2024-04-11T13:00:06Z,UChpKl3waLmccNeYH9LGYjUQ,10,none,PT4M50S,False,False,1029345,84474,2853
198,2024-04-18 16:22:55.781750,7tNvrYzPUrk,2024-04-11T22:04:04Z,UCuaFvcY4MhZY3U43mMt1dYQ,1,none,PT2M16S,True,False,1083633,3929,363


In [115]:
video_df.isna().sum()

request_datetime    0
video_id            0
publish_datetime    0
channel_id          0
category_id         0
live_broadcast      0
duration            0
licensed_content    0
made_for_kids       0
num_views           0
num_likes           2
num_comments        4
dtype: int64