In [1]:
import os
import pandas as pd

from pyyoutube import Api
from googleapiclient.discovery import build
from dotenv import find_dotenv, load_dotenv

dotenv_path = find_dotenv()
load_dotenv(dotenv_path)

True

## Get YT API key

In [2]:
YT_API_KEY = os.environ['YT_API_KEY']
api = Api(api_key=YT_API_KEY)
YT_DISPLAY_NAME = "Andrew Huberman"
PLAYLIST_NAME = "Huberman Lab"

## Get User's videos from playlist

In [None]:
def get_channel_id(display_name, api_key):
    youtube = build('youtube', 'v3', developerKey=api_key)
    response = youtube.search().list(
        part='snippet',
        type='channel',
        q=display_name,
        maxResults=1
    ).execute()

    if response['items']:
        return response['items'][0]['snippet']['channelId']
    else:
        print('No channel found for the given display name')
        return None

In [None]:
def get_user_playlist_by_name(yt_api, user_id, playlist_name):
    for playlist in yt_api.get_playlists(channel_id=user_id, count=None).items:
        if playlist.to_dict()['snippet']['title'] == playlist_name:
            return playlist
    raise NameError(f'Playlist {playlist_name} does not exist for user {user_id}')


def get_video_data_from_playlist(yt_api: Api, playlist):
    playlist_videos = yt_api.get_playlist_items(playlist_id=playlist.id, count=None)
    return pd.DataFrame([(item['contentDetails']['videoId'],
                          item['snippet']['title'],
                          item['snippet']['description']) for item in playlist_videos.to_dict()['items']],
                        columns=['videoId', 'title', 'description'])

In [None]:
channel_id = get_channel_id(YT_DISPLAY_NAME, YT_API_KEY)
podcast_playlist = get_user_playlist_by_name(api, channel_id, PLAYLIST_NAME)
video_data = get_video_data_from_playlist(api, podcast_playlist)

In [None]:
video_data

In [None]:
video_data.to_csv(os.path.join('data', 'video_metadata.csv'), index=False)