In [1]:
'''
匯入套件
'''
# 操作 browser 的 API
from selenium import webdriver

# 處理逾時例外的工具
from selenium.common.exceptions import TimeoutException

# 面對動態網頁，等待某個元素出現的工具，通常與 exptected_conditions 搭配
from selenium.webdriver.support.ui import WebDriverWait

# 搭配 WebDriverWait 使用，對元素狀態的一種期待條件，若條件發生，則等待結束，往下一行執行
from selenium.webdriver.support import expected_conditions as EC

# 期待元素出現要透過什麼方式指定，通常與 EC、WebDriverWait 一起使用
from selenium.webdriver.common.by import By

# 強制等待 (執行期間休息一下)
from time import sleep

# 整理 json 使用的工具
import json

# 執行 command 的時候用的
import os

'''
Selenium with Python 中文翻譯文檔
參考網頁：https://selenium-python-zh.readthedocs.io/en/latest/index.html
selenium 啓動 Chrome 的進階配置參數
參考網址：https://stackoverflow.max-everyday.com/2019/12/selenium-chrome-options/
Mouse Hover Action in Selenium
參考網址：https://www.toolsqa.com/selenium-webdriver/mouse-hover-action/
'''

# 啟動瀏覽器工具的選項
options = webdriver.ChromeOptions()
# options.add_argument("--headless")                #不開啟實體瀏覽器背景執行
options.add_argument("--start-maximized")           #最大化視窗 什麼東西都看的見不會有任何東西因為視窗較小而被隱藏
options.add_argument("--incognito")                 #開啟無痕模式
options.add_argument("--disable-popup-blocking ")   #禁用彈出攔截

executable_path = './chromedriver.exe' # 設定 chromedriver 的檔案路徑

driver = webdriver.Chrome(
    options = options,
    executable_path = executable_path
)

listData = [] # 放置爬取資料

def visit():
    driver.get('https://www.youtube.com/')
    
def search(): # 尋找搜尋欄
#     txtInput = driver.find_element(By.CSS_SELECTOR, "input#search")
#     txtInput.send_keys("youngjae") 
    driver.find_element(By.CSS_SELECTOR,"input#search").send_keys("youngjae")  # 也可以寫成這樣 跟上面兩行一樣意思
    sleep(3)
    
    btnInput = driver.find_element(By.CSS_SELECTOR,"button#search-icon-legacy")
    btnInput.click()
    sleep(2)
    
# 篩選(選項)
def filterfunc():
    try: # 等篩選元素出現
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"yt-formatted-string#text.style-scope.ytd-toggle-button-renderer.style-text")))
        # 按下篩選浮現
        driver.find_element(By.CSS_SELECTOR,"yt-formatted-string#text.style-scope.ytd-toggle-button-renderer.style-text").click()
        sleep(2)
        
        driver.find_elements(By.CSS_SELECTOR,"yt-formatted-string.style-scope.ytd-search-filter-renderer")[9].click()
        sleep(2)
    
    except TimeoutException:
        print("連線逾期......")
        sleep(2)
        driver.quit()
def scroll():
    '''
    totalHeight => 瀏覽器內部的高度
    offset => 當前捲動的量(高度)
    count => 計無效滾動次數
    limit => 最大無效滾動次數
    wait_second => 每次滾動後的強制等待時間
    '''
    totalHeight = 0
    offset = 0
    count = 0
    limit = 3
    wait_second = 3 #改秒數可以 加快滾動
    
    while count <= limit: # 在捲動到沒有元素產生前 , 持續捲動
#         offset = driver.execute_script( # 每次移動的高度
#             'return window.document.documentElement.scrollHeight;'
#         )
        offset += 600 # 怕滾太快 可以慢慢滾

        '''
        或是每次只滾動一點距離，
        以免有些網站會在移動長距離後，
        將先前移動當中的元素隱藏
        offset += 600
        '''
            
        js_code = f'''
                window.scrollTo({{
                    top:{offset},
                    behavior: 'smooth'
                }});
            '''
        driver.execute_script(js_code)
        
        sleep(wait_second)
        
        totalHeight = driver.execute_script( # 每次移動的高度
            'return window.document.documentElement.scrollHeight;'
        )
        
        if offset >= totalHeight:
            count += 1
        
        if offset >=600: # 為了實驗功能，捲動超過一定的距離，就結束程式
            break
            
def parse():# 分析頁面元素
    ytd_video_renderers = driver.find_elements(
        By.CSS_SELECTOR,
        'ytd-video-renderer.style-scope.ytd-item-section-renderer'
    )
    
    for ytd_video_renderer in ytd_video_renderers:
        print('='*50) # 印出分隔圖示
        #取得圖片連結
        img = ytd_video_renderer.find_element(
            By.CSS_SELECTOR,
            'ytd-thumbnail.style-scope.ytd-video-renderer img#img'
        )
        
        imgSrc = img.get_attribute('src')
        print(imgSrc)
        
        #取得標題
        a = ytd_video_renderer.find_element(By.CSS_SELECTOR,'a#video-title')
        aTitle = a.get_attribute('innerText')
        print(aTitle)
        
        #取得 youtube 連結
        aLink = a.get_attribute('href')
        print(aLink)
        #                      [0]  [1]
        #取得 youtube ID [watch?v=][kTdOds4vGqE]
        youtube_id = aLink.split("v=")[1]
        print(youtube_id)
        
        listData.append({
            "id":youtube_id,
            "title":aTitle,
            "link":aLink,
            "img":imgSrc
        })
        
def saveJson():
    fp = open("youtube.json","w",encoding="utf-8")
    fp.write(json.dumps(listData, ensure_ascii=False, indent=4))
    fp.close()
    
def close():
    driver.quit()
    
def download():
    fp = open("youtube.json","r",encoding="utf-8") #開啟 json 檔
    strJson = fp.read() # 取得 json 字串
    fp.close()
    
    # 將 json 轉成 list ( 裡面是 dict 集合 )
    
    listResult = json.loads(strJson)
    
    for index, obj in enumerate(listResult):
        if index <=1:
            os.system("youtube-dl.exe -f mp4 -i {} -o {}".format(obj['link'],"%(id)s.%(ext)s"))
            print(index)
            
if __name__ == '__main__': # 擺放順序要注意 !!
    visit()
    search()
    filterfunc()
    scroll()
    parse()
    saveJson()
    close()
    download()
    

https://i.ytimg.com/vi/kTdOds4vGqE/hq720.jpg?sqp=-oaymwEcCNAFEJQDSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLD3eQlI_qU4ffghjFv5-yqM2Y_zUw
[MV] Youngjae(영재) _ Vibin
https://www.youtube.com/watch?v=kTdOds4vGqE
kTdOds4vGqE
https://i.ytimg.com/vi/egBpinezmnQ/hq720.jpg?sqp=-oaymwEcCNAFEJQDSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLBs7LvIw7mL6VPbdIT3lTC4hNomcA
Youngjae Ars (GOT7) - Lonely [Color Coded Lyrics Han/Rom/Eng] By Hylyrics X
https://www.youtube.com/watch?v=egBpinezmnQ
egBpinezmnQ
https://i.ytimg.com/vi/_bG7boB0IgI/hq720.jpg?sqp=-oaymwEcCNAFEJQDSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLBj1SLxnYTSS-6W3m7IpAQXZ0X8QA
영재(Youngjae) - 'Vibin' Official M/V
https://www.youtube.com/watch?v=_bG7boB0IgI
_bG7boB0IgI
https://i.ytimg.com/vi/SBFRwJ6P1vI/hq720.jpg?sqp=-oaymwEcCNAFEJQDSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLCRQT5tH4VwB8FjYHOc9U3I74ShEA
Eternal (Eternal)
https://www.youtube.com/watch?v=SBFRwJ6P1vI
SBFRwJ6P1vI
https://i.ytimg.com/vi/Of3oVisXEaY/hq720.jpg?sqp=-oaymwEcCNAFEJQDSFXyq4qpAw4