In [None]:
from langchain_community.document_loaders import YoutubeLoader

from googleapiclient.discovery import build

import yaml
import os

import pandas as pd
import pytimetk as tk
from tqdm import tqdm

In [None]:
PATH_CREDENTIALS = '../credentials.yml'

os.environ['YOUTUBE_API_KEY'] = yaml.safe_load(open(PATH_CREDENTIALS))['youtube']

In [None]:
def search_videos(topic, api_key, max_results=20):
    youtube = build('youtube', 'v3', developerKey=api_key)
    request = youtube.search().list(
        q=topic,
        part='id,snippet',
        maxResults=max_results,
        type='video'
    )
    response = request.execute()
    video_ids = [item['id']['videoId'] for item in response['items']]
    return video_ids

In [None]:
def load_video(video_id):
    url = f'https://www.youtube.com/watch?v={video_id}'
    loader = YoutubeLoader.from_youtube_url(
        url, 
        add_video_info=True,
    )
    doc = loader.load()[0]
    doc_df = pd.DataFrame([doc.metadata])
    doc_df['video_url'] = url
    doc_df['page_content'] = doc.page_content
    return doc_df

In [None]:
TOPIC = "Social Media Brand Strategy Tips"

In [None]:
video_ids = search_videos(TOPIC, api_key=os.environ['YOUTUBE_API_KEY'], max_results=50)

In [None]:
video_ids

['db6xDfRArpE',
 'v2peLV3tges',
 'L7mcHxcUnM4',
 'xCIH3RA8_4c',
 'inxfrx4tGvk',
 'eyxL02Q990E',
 'RmwI_QqcPQc',
 '1WTD-V2F_N8',
 'dez1TkJ9hDE',
 '3NwXzqE8CUk',
 'KI7rv8eIZbk',
 'XDWJXec-B_s',
 'ozMCb0wOnMU',
 'XZZfDRphCmE',
 'i-eIWetPKjM',
 '91D5hjMEADg',
 'UpdUHFkh22o',
 'LwrGSKnShmk',
 'vGPTlklcdUM',
 '1kwpxf9GKIY',
 'Evnq5F0s2bk',
 '4ajmfzj9G1g',
 'xwwC4tC5jF8',
 'dtxgZNqd3c8',
 '3yiHZWr6Izc',
 'lrM5CRg-O0I',
 'mu_GdEyEBDY',
 '-uYTpGonmoI',
 'IuK4kVajShU',
 '4Ap7yl4X8Zo',
 'eIcwB1fE6ko',
 'UoJh7R8t5Aw',
 'kCy2adkj4HM',
 'KWrzzHlPQ7Y',
 '45vj4nsSv_s',
 'Y9SnjL3lXSY',
 'iiDYFRQpHE0',
 'EWFB_BCQ7zI',
 '1R8VlIEtKsI',
 'O9j9aLo9waY',
 'p5TcXyGjZiw',
 'PLkGIcrv7JI',
 '7ITff1fIbSc',
 'IjS9eTpmhgk',
 'CiyNGOIbwcE',
 'sO4te2QNsHY',
 'X_FY5FoS6_8',
 'CynHXW5r9Ig',
 '5V8BHCNM7EI',
 'kQJ6dx6LAdk']

In [None]:
videos = []
for video_id in tqdm(video_ids, desc="Processing videos"):
    try:
        video = load_video(video_id)
        videos.append(video)
    except Exception as e:
        print(f"Skipping video {video_id} due to error: {e}")

Processing videos:  20%|██        | 10/50 [00:17<01:07,  1.68s/it]

Skipping video 3NwXzqE8CUk due to error: no element found: line 1, column 0


Processing videos:  42%|████▏     | 21/50 [00:37<00:54,  1.87s/it]

Skipping video Evnq5F0s2bk due to error: list index out of range


Processing videos: 100%|██████████| 50/50 [01:27<00:00,  1.75s/it]


In [None]:
videos_df = pd.concat(videos, ignore_index=True)
videos_df

Unnamed: 0,source,title,description,view_count,thumbnail_url,publish_date,length,author,video_url,page_content
0,db6xDfRArpE,BEST Social Media BRANDING STRATEGY | HOW TO B...,Unknown,2672,https://i.ytimg.com/vi/db6xDfRArpE/hq720.jpg?v...,2023-02-09 00:00:00,1971,Slay The Renee’ Way,https://www.youtube.com/watch?v=db6xDfRArpE,social media branding is super super important...
1,v2peLV3tges,7 Effective Social Media Marketing Strategies ...,Unknown,57420,https://i.ytimg.com/vi/v2peLV3tges/hq720.jpg,2024-05-31 00:00:00,1120,Adam Erhart,https://www.youtube.com/watch?v=v2peLV3tges,so listen the rumors are true there is a prove...
2,L7mcHxcUnM4,Build Your Brand in 30 Days | Best Social Medi...,Unknown,112021,https://i.ytimg.com/vi/L7mcHxcUnM4/hq720.jpg,2020-03-24 00:00:00,1437,Marley Jaxx,https://www.youtube.com/watch?v=L7mcHxcUnM4,(bright music) - I don't know about you but wh...
3,xCIH3RA8_4c,"Finally, a stress free social media strategy (...",Unknown,30821,https://i.ytimg.com/vi/xCIH3RA8_4c/hq720.jpg,2024-07-23 00:00:00,949,Erin On Demand,https://www.youtube.com/watch?v=xCIH3RA8_4c,what's up beautiful people I'm Aaron and this ...
4,inxfrx4tGvk,My Proven Social Media Strategy Explained in 1...,Unknown,9860,https://i.ytimg.com/vi/inxfrx4tGvk/hq720.jpg,2023-06-28 00:00:00,70,Think Media Podcast,https://www.youtube.com/watch?v=inxfrx4tGvk,your short form is simply just like eyeballs a...
5,eyxL02Q990E,How to Create a Social Media Marketing Strateg...,Unknown,24428,https://i.ytimg.com/vi/eyxL02Q990E/hq720.jpg,2024-01-19 00:00:00,940,Milou Pietersz,https://www.youtube.com/watch?v=eyxL02Q990E,in the social media industry the word strategy...
6,RmwI_QqcPQc,How To Market Your Business On Social Media,Unknown,2205551,https://i.ytimg.com/vi/RmwI_QqcPQc/hq720.jpg,2021-04-20 00:00:00,725,Marley Jaxx,https://www.youtube.com/watch?v=RmwI_QqcPQc,If you want more eyes on your business and mor...
7,1WTD-V2F_N8,How to create a stand-out SOCIAL MEDIA STRATEG...,Unknown,182507,https://i.ytimg.com/vi/1WTD-V2F_N8/hq720.jpg,2024-07-02 00:00:00,1524,Chantal Leonhardt,https://www.youtube.com/watch?v=1WTD-V2F_N8,I recently grew my Instagram from 25 to 100K i...
8,dez1TkJ9hDE,"LIVESTREAM - Tips On Social Media, Personal Br...",Unknown,23,https://i.ytimg.com/vi/dez1TkJ9hDE/hq720.jpg?v...,2024-09-23 00:00:00,2110,Daron Pacheco,https://www.youtube.com/watch?v=dez1TkJ9hDE,[Music] [Music] [Music] [Music] [Music] [Music...
9,KI7rv8eIZbk,A REAL Social Media Strategy Example + Walkthr...,Unknown,27260,https://i.ytimg.com/vi/KI7rv8eIZbk/hq720.jpg,2023-09-15 00:00:00,721,Milou Pietersz,https://www.youtube.com/watch?v=KI7rv8eIZbk,every six figure social media manager is gatek...


In [None]:
videos_df.glimpse()

<class 'pandas.core.frame.DataFrame'>: 48 rows of 10 columns
source:         object            ['db6xDfRArpE', 'v2peLV3tges', 'L7mcHx ...
title:          object            ['BEST Social Media BRANDING STRATEGY  ...
description:    object            ['Unknown', 'Unknown', 'Unknown', 'Unk ...
view_count:     int64             [2672, 57420, 112021, 30821, 9860, 244 ...
thumbnail_url:  object            ['https://i.ytimg.com/vi/db6xDfRArpE/h ...
publish_date:   object            ['2023-02-09 00:00:00', '2024-05-31 00 ...
length:         int64             [1971, 1120, 1437, 949, 70, 940, 725,  ...
author:         object            ['Slay The Renee’ Way', 'Adam Erhart', ...
video_url:      object            ['https://www.youtube.com/watch?v=db6x ...
page_content:   object            ["social media branding is super super ...
