In [1]:
# 操作 browser 的 API
from selenium import webdriver
from selenium.webdriver.chrome.service import Service

# ChromeDriver 的下載管理工具
from webdriver_manager.chrome import ChromeDriverManager

# 處理逾時例外的工具
from selenium.common.exceptions import TimeoutException

# 面對動態網頁，等待某個元素出現的工具，通常與 exptected_conditions 搭配
from selenium.webdriver.support.ui import WebDriverWait

# 搭配 WebDriverWait 使用，對元素狀態的一種期待條件，若條件發生，則等待結束，往下一行執行
from selenium.webdriver.support import expected_conditions as EC

# 期待元素出現要透過什麼方式指定，通常與 EC、WebDriverWait 一起使用
from selenium.webdriver.common.by import By

# 強制等待 (執行期間休息一下)
from time import sleep

# 整理 json 使用的工具
import json

# 執行 command 的時候用的
import os

# 子處理程序，用來取代 os.system 的功能
import subprocess

# 下載檔案的工具
import wget



'''
Selenium with Python 中文翻譯文檔
參考網頁：https://selenium-python-zh.readthedocs.io/en/latest/index.html
selenium 啓動 Chrome 的進階配置參數
參考網址：https://stackoverflow.max-everyday.com/2019/12/selenium-chrome-options/
'''

# 啟動瀏覽器工具的選項
my_options = webdriver.ChromeOptions()
# my_options.add_argument("--headless")                #不開啟實體瀏覽器背景執行
my_options.add_argument("--start-maximized")         #最大化視窗
my_options.add_argument("--incognito")               #開啟無痕模式
my_options.add_argument("--disable-popup-blocking") #禁用彈出攔截
my_options.add_argument("--disable-notifications")  #取消 chrome 推播通知
my_options.add_argument("--lang=zh-TW")  #設定為正體中文

# 使用 Chrome 的 WebDriver
driver = webdriver.Chrome(
    options = my_options
)

# 建立儲存圖片、影片的資料夾
folderPath = 'youtubemp3'
if not os.path.exists(folderPath):
    os.makedirs(folderPath)

# 放置爬取的資料
listData = []

In [2]:
def visit():
    driver.get("https://www.youtube.com/")

# 輸入關鍵字
def search():
    txt_input = driver.find_element(By.CSS_SELECTOR, "input#search")
    txt_input.send_keys("夜に駆ける")

    txt_input.submit()


def scroll():
    innerHeight = 0
    offset = 0
    count = 0
    limit = 3

    while count <= limit :
        offset = driver.execute_script("return document.documentElement.scrollHeight")

        driver.execute_script(f"""
            window.scrollTo({{
                top: {offset}, 
                behavior: "smooth"
            }});
        """)

        sleep(3)

        innerHeight = driver.execute_script(
            "return document.documentElement.scrollHeight;"
        )


        if offset == innerHeight:
            count += 1

        if offset >= 600:
            break
        
        

def parse():
    global listData

    listData.clear()


    elements = driver.find_elements(
        By.CSS_SELECTOR,
        "ytd-video-renderer.style-scope.ytd-item-section-renderer"
    )

    for elm in elements:
        print("=" * 30)

        img = elm.find_element(
            By.CSS_SELECTOR,
            "a#thumbnail img"
        )
        imgSrc = img.get_attribute("src")
        print(imgSrc)

        a = elm.find_element(
            By.CSS_SELECTOR,
            "a#video-title"
        )
        aTitle = a.get_attribute("innerText")
        print(aTitle)
        
        aLink = a.get_attribute("href")
        print(aLink)
        
        strDelimiter = ""
        if "shorts" in aLink:
            strDelimiter = "/shorts/"
        else:
            strDelimiter = "v="
            
        youtube_id = aLink.split(strDelimiter)[1]
        youtube_id = youtube_id.split("&pp")[0]
        print(youtube_id)

        listData.append({
            "id": youtube_id,
            "title": aTitle,
            "link": aLink,
            "img": imgSrc
        })


def saveJson():
    with open(f"{folderPath}/youtube.json", "w", encoding = "utf-8") as file:
        file.write(json.dumps(listData, ensure_ascii=False, indent=4))

def close():
    driver.quit()


In [3]:
visit()
search()
scroll()
parse()
saveJson()
close()


https://i.ytimg.com/vi/x8VYWazR5mE/hq720.jpg?sqp=-oaymwEiCNAFEJQDSFXyq4qpAxQIARUAAIhCGAFwAcABBu0BZmZmQg==&rs=AOn4CLCQtmFasS17HRy3a1Oq-Mz2ZlEL9Q
YOASOBI「夜に駆ける」 Official Music Video
https://www.youtube.com/watch?v=x8VYWazR5mE&pp=ygUP5aSc44Gr6aeG44GR44KL
x8VYWazR5mE
https://i.ytimg.com/vi/j1hft9Wjq9U/hq720.jpg?sqp=-oaymwEcCNAFEJQDSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLDApXy6l4o_TU8J9_eTRUyeVSiRIQ
YOASOBI - 夜に駆ける / THE HOME TAKE
https://www.youtube.com/watch?v=j1hft9Wjq9U&pp=ygUP5aSc44Gr6aeG44GR44KL
j1hft9Wjq9U
https://i.ytimg.com/vi/mTdfEMAa72s/hqdefault.jpg?sqp=-oaymwE2COADEI4CSFXyq4qpAygIARUAAIhCGAFwAcABBvABAfgB_gmAAtAFigIMCAAQARgTIBwofzAP&rs=AOn4CLA1jf44s69Lbb4gzG6Yls-1NgQcFQ
夜に駆ける - YOASOBI -Lyrics Video【中文日文羅馬拼音歌詞字幕】
https://www.youtube.com/watch?v=mTdfEMAa72s&pp=ygUP5aSc44Gr6aeG44GR44KL
mTdfEMAa72s
https://i.ytimg.com/vi/GEIf9Dp6GgQ/hq720.jpg?sqp=-oaymwEcCNAFEJQDSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLCQUzHE5KwATFluZRCDLoOMr7Txjg
YOASOBI 夜に駆ける
https://www.youtube.com/watch?v=GE

In [7]:
def download():
    if not os.path.exists("./yt-dlp.exe"):
        print("[下載 yt-dlp]")
        wget.download("https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe", "./yt-dlp.exe")


    with open(f"{folderPath}/youtube.json", "r", encoding= "utf-8") as file:
        strJson = file.read()


    listResult = json.loads(strJson)

    for index, obj in enumerate(listResult):
        if index == 3:
            break

        print(f"正在下載: {obj['link']}")


        cmd = [
            "./yt-dlp.exe",
            obj["link"],
            "-f",
            "ba",
            "-o",
            f"{folderPath}/%(title)s.%(ext)s"
        ]

        obj_sp = subprocess.run(cmd)

        if obj_sp.returncode == 0:
            print("success!")
        else:
            print("fail!")




In [8]:
download()

正在下載: https://www.youtube.com/watch?v=x8VYWazR5mE&pp=ygUP5aSc44Gr6aeG44GR44KL
success!
正在下載: https://www.youtube.com/watch?v=j1hft9Wjq9U&pp=ygUP5aSc44Gr6aeG44GR44KL
success!
