## Using BeautifulSoup 


In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
YOUTUBE_TRENDING_URL = 'https://www.youtube.com/feed/trending'

In [3]:
response = requests.get(YOUTUBE_TRENDING_URL)

In [4]:
response.status_code

200

In [5]:
len(response.text)

1352429

In [6]:
with open ('trending.html', 'w', encoding='utf-8') as f:
    f.write(response.text)

In [7]:
doc = BeautifulSoup(response.text, 'html.parser')
print('Page Title:', doc.title.text)

Page Title: Trending - YouTube


In [8]:
#
video_divs =  doc.find_all('div', class_ = 'style-scope ytd-video-renderer')
print(f'Found {len(video_divs)} videos')

Found 0 videos


### We couldn't find any videos :( That's why we're heading to Selenium!

## Start using Selenium

In [9]:
!pip install selenium --quiet

In [10]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

In [11]:
YOUTUBE_TRENDING_URL = 'https://www.youtube.com/feed/trending' 

def get_driver():
    # Specify the path to chromedriver.exe
    driver_path = r'C:\Windows\System32\chromedriver.exe'  # Use raw string (r'') to avoid escape characters

    # Set up the Chrome service
    service = Service(driver_path)

    # Initialize Chrome WebDriver with the service
    driver = webdriver.Chrome(service=service)
    return driver 

def get_videos(driver):
    driver.get(YOUTUBE_TRENDING_URL)
    # Wait for the page to load
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "title")))
    
    video_div_class ='style-scope ytd-video-renderer'
    videos = driver.find_elements(By.CLASS_NAME, video_div_class)
    return videos

def parse_video(video):
    #Title
    title_tag = video.find_element(By.ID, 'video-title')
    title = title_tag.text
    
    #URL
    url = title_tag.get_attribute('href')
    
    #Thumbnail
    thumbnail_tag = video.find_element(By.TAG_NAME,'img')
    thumbnail_url = thumbnail_tag.get_attribute('src')
    
    #Channel name
    channel_div = video.find_element(By.CSS_SELECTOR, '.yt-simple-endpoint.style-scope.yt-formatted-string')
    channel_name = channel_div.text
    
    #Discription 
    description = video.find_element(By.CSS_SELECTOR, '.yt-simple-endpoint.style-scope.ytd-video-renderer')
    desc = description.text
    
    return{
        'Title' : title, 
        'URL' : url,
        'Thumbnail_url' : thumbnail_url,
        'Channel name' : channel_name, 
        'Description' : desc      
    }
    

In [12]:
if __name__ == "__main__":
    print("Creating driver")
    driver = get_driver()

    print("Fetching trending videos")
    videos = get_videos(driver)
    
     # Wait for the page title to load
    WebDriverWait(driver, 10).until(EC.title_contains("YouTube"))

    print("Page Title:", driver.title)
    
    print(f'found {len(videos)} videos')
    
    print("Parsing top 10 video")
    videos_data = [parse_video(video) for video in videos[:10]]
    
    print(videos_data)   

Creating driver
Fetching trending videos
Page Title: YouTube
found 96 videos
Parsing top 10 video
[{'Title': 'مباراة | الأهلي 2-0 طلائع الجيش | الجولة الثلاثون | الدوري المصري 2023/2024', 'URL': 'https://www.youtube.com/watch?v=-EP5FApPQ4Q', 'Thumbnail_url': 'https://i.ytimg.com/vi/-EP5FApPQ4Q/hqdefault.jpg?sqp=-oaymwEcCPYBEIoBSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLBtR5_I5_WGL3HUk4QWYZmLLVLWog', 'Channel name': 'kora plus', 'Description': 'مباراة | الأهلي 2-0 طلائع الجيش | الجولة الثلاثون | الدوري المصري 2023/2024'}, {'Title': 'الكورة مع فايق| نادر شوقي يتحدث باكياً عن أزمة أحمد رفعت التي تسبب في وفاته وانهيار أبراهيم فايق', 'URL': 'https://www.youtube.com/watch?v=6rWAGvbE_W4', 'Thumbnail_url': 'https://i.ytimg.com/vi/6rWAGvbE_W4/hqdefault.jpg?sqp=-oaymwEcCPYBEIoBSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLDCRJVPDaRCCiSQHbXM32wEe29Kpw', 'Channel name': 'الكورة مع فايق ', 'Description': 'الكورة مع فايق| نادر شوقي يتحدث باكياً عن أزمة أحمد رفعت التي تسبب في وفاته وانهيار أبراهيم فايق'},

In [13]:
print('Save the data to a CSV')
videos_df = pd.DataFrame(videos_data)
videos_df.to_csv('Trending.csv')

Save the data to a CSV
