In [None]:
import os
import pandas as pd

In [None]:
from kedro.config import ConfigLoader
from pyyoutube import Api

## Get YT API key

In [None]:
conf_path = os.path.join("..", "conf")
conf_loader = ConfigLoader(conf_source=conf_path, env="local")
YT_API_KEY = conf_loader.get("credentials.yml")['youtube']['api_key']
api = Api(api_key=YT_API_KEY)
YT_USERNAME = "lexfridman"
PLAYLIST_NAME = "Lex Fridman Podcast"

## Get User's videos from playlist

In [None]:
def get_user_id(yt_api, username):
    return yt_api.get_channel_info(for_username=username).items[0].to_dict()['id']

In [None]:
def get_user_playlist_by_name(yt_api, user_id, playlist_name):
    for playlist in yt_api.get_playlists(channel_id=user_id, count=None).items:
        if playlist.to_dict()['snippet']['title'] == playlist_name:
            return playlist
    raise NameError(f'Playlist {playlist_name} does not exist for user {user_id}')


def get_video_data_from_playlist(yt_api: Api, playlist):
    playlist_videos = yt_api.get_playlist_items(playlist_id=playlist.id, count=None)
    return pd.DataFrame([(item['contentDetails']['videoId'],
                          item['snippet']['title'],
                          item['snippet']['description']) for item in playlist_videos.to_dict()['items']],
                        columns=['videoId', 'title', 'description'])

In [None]:
user_id = get_user_id(api, YT_USERNAME)
podcast_playlist = get_user_playlist_by_name(api, user_id, PLAYLIST_NAME)

In [None]:
video_data = get_video_data_from_playlist(api, podcast_playlist)

In [None]:
video_data.to_csv(os.path.join('..', 'data', '01_raw', 'video_data.csv'), index=False)