# Define functions for crawler video

## Crawl video and audio

In [90]:
import os
import yt_dlp
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip, ffmpeg_extract_audio

output_dir = "data"
os.makedirs(output_dir, exist_ok=True)

def crawler_video(video_id, year):
    try:
        video_url = "https://youtu.be/" + video_id
        # Tải xuống 1 phút đầu video + audio
        temp_video_path = os.path.join(output_dir+"/video/" + str(year) + "/tmp_" + str(video_id) + ".mp4")
        video_path = os.path.join(output_dir+"/video/" + str(year) + "/" + str(video_id) + ".mp4")
        download_opts = {
            "format": "mp4",
            "outtmpl": temp_video_path,
            "external_downloader": "ffmpeg",
            "external_downloader_args": [f"-ss 0 -i {temp_video_path} -t 60 -c copy {video_path}"]
        }
        with yt_dlp.YoutubeDL(download_opts) as ydl:
            ydl.download([video_url])
        
        ffmpeg_extract_subclip(temp_video_path, 0, 60, outputfile=video_path)
        # os.system(f"ffmpeg -ss 0 -i {temp_video_path} -t 60 -c copy {video_path}")
        os.remove(temp_video_path)

        # Tách audio ra khỏi video
        audio_path = os.path.join(output_dir+"/audio/" + str(year) + "/" + str(video_id) + ".mp3")
        
        ffmpeg_extract_audio(video_path, audio_path)
        
        return True
    except Exception as e:
        print("❌ Không thể tải video")
        print(e)
        return False

## Crawl thumbnail

In [91]:
def crawler_thumbnail(video_id, year):
    video_url = "https://youtu.be/" + video_id
    try:
        # Tạo thư mục thumbnail nếu chưa có
        save_dir = os.path.join(output_dir, "thumbnail", year)
        os.makedirs(save_dir, exist_ok=True)

        # Cấu hình yt-dlp để thu thập thông tin video
        ydl_opts = {
            "quiet": True,
            "extract_flat": False,
            "nocheckcertificate": True,
            "format": "bestaudio/best",
            "postprocessors": [
                {"key": "FFmpegExtractAudio", "preferredcodec": "mp3", "preferredquality": "192"},
            ],
            "outtmpl": {"default": os.path.join(output_dir, "%(id)s.%(ext)s")},
            "writethumbnail": True,  # Chỉ viết metadata thumbnail, không tải
            "skip_download": True,   # Không tải audio/video
        }

        # Thu thập thông tin video
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(video_url, download=False)

        # ID video thực tế
        video_id = info["id"]

        # Lọc danh sách thumbnail
        thumbnail_urls = sorted(
            [thumb for thumb in info.get("thumbnails", []) if "height" in thumb],
            key=lambda x: x["height"], reverse=True
        )[:3]

        # Nếu không có height, lấy 3 ảnh đầu tiên
        if not thumbnail_urls:
            thumbnail_urls = info.get("thumbnails", [])[:3]

        # Tải xuống các thumbnail
        for i, thumb in enumerate(thumbnail_urls):
            thumb_url = thumb["url"]
            response = requests.get(thumb_url, stream=True)
            if response.status_code == 200:
                file_path = os.path.join(save_dir, f"{video_id}_thumb{i+1}.jpg")
                with open(file_path, "wb") as file:
                    file.write(response.content)
                print(f"✅ Đã tải {file_path}")
            else:
                print(f"❌ Không thể tải {thumb_url}")

    except Exception as e:
        print(f"❌ Lỗi: {e}")

## Crawl title & tag

In [None]:
def get_video_info(video_id):
    video_url = f"https://youtu.be/{video_id}"
    ydl_opts = {
        'quiet': True,
        'no_warnings': True,
        'extract_flat': True,
        'skip_download': True,
    }
    
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(video_url, download=False)
        
    title = info.get('title', '')
    tags = info.get('tags', [])[:5]  # Get top 5 tags
    
    return title, tags


# Crawling

In [92]:
category = "test"

In [93]:
import pandas as pd
from enum import Enum

class CrawlStatus(Enum):
    NOT_YET="NOT_YET"
    DONE="DONE"

# Load a parquet file
path = f'data/{category}.parquet'
df = pd.read_parquet(path)

if "crawl_status" not in df.columns:
    df["crawl_status"] = CrawlStatus.NOT_YET.value
    print("Created crawl_status")

In [96]:
from tqdm import tqdm

for i in tqdm(range(len(df))):
    if df["crawl_status"][i] == CrawlStatus.DONE.value:
        continue
    is_crawl_success = crawler_video(df["id"][i], category)
    if is_crawl_success:
        df.loc[i, "crawl_status"] = CrawlStatus.DONE.value
    if (i % 100 == 10):
        print("Processing {}".format(i))
        df.to_parquet(path)

[youtube] Extracting URL: https://youtu.be/FIkKcpltyfc
[youtube] FIkKcpltyfc: Downloading webpage
[youtube] FIkKcpltyfc: Downloading tv client config
[youtube] FIkKcpltyfc: Downloading player 7d1d50a6
[youtube] FIkKcpltyfc: Downloading tv player API JSON
[youtube] FIkKcpltyfc: Downloading ios player API JSON
[youtube] FIkKcpltyfc: Downloading m3u8 information
[info] FIkKcpltyfc: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_FIkKcpltyfc.mp4
[download] 100% of  163.71MiB in 00:00:05 at 31.50MiB/s    
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_FIkKcpltyfc.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/FIkKcpltyfc.mp4
MoviePy - Command successful
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/

[download] Got error: 385463 bytes read, 10097016 more expected


❌ Không thể tải video
[download] Got error: 385463 bytes read, 10097016 more expected
[youtube] Extracting URL: https://youtu.be/ZqqYjlknlSA
[youtube] ZqqYjlknlSA: Downloading webpage
[youtube] ZqqYjlknlSA: Downloading tv client config
[youtube] ZqqYjlknlSA: Downloading player 7d1d50a6
[youtube] ZqqYjlknlSA: Downloading tv player API JSON
[youtube] ZqqYjlknlSA: Downloading ios player API JSON
[youtube] ZqqYjlknlSA: Downloading m3u8 information
[info] ZqqYjlknlSA: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_ZqqYjlknlSA.mp4
[download] 100% of   21.66MiB in 00:00:01 at 16.88MiB/s    
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_ZqqYjlknlSA.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/ZqqYjlknlSA.mp4
MoviePy - Command successful
MoviePy - Running:
>>> /Users/vuongn

ERROR: [youtube] nVuFgcS_HwU: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


❌ Không thể tải video
ERROR: [youtube] nVuFgcS_HwU: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
[youtube] Extracting URL: https://youtu.be/iJkfPcDIeMM
[youtube] iJkfPcDIeMM: Downloading webpage
[youtube] iJkfPcDIeMM: Downloading tv client config
[youtube] iJkfPcDIeMM: Downloading player 7d1d50a6
[youtube] iJkfPcDIeMM: Downloading tv player API JSON
[youtube] iJkfPcDIeMM: Downloading ios player API JSON


ERROR: [youtube] iJkfPcDIeMM: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


❌ Không thể tải video
ERROR: [youtube] iJkfPcDIeMM: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
[youtube] Extracting URL: https://youtu.be/S7ahj_3xB4Q
[youtube] S7ahj_3xB4Q: Downloading webpage
[youtube] S7ahj_3xB4Q: Downloading tv client config
[youtube] S7ahj_3xB4Q: Downloading player 7d1d50a6
[youtube] S7ahj_3xB4Q: Downloading tv player API JSON
[youtube] S7ahj_3xB4Q: Downloading ios player API JSON
[youtube] S7ahj_3xB4Q: Downloading m3u8 information
[info] S7ahj_3xB4Q: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_S7ahj_3xB4Q.mp4
[download] 100% of   46.60MiB in 00:00:05 at 8.48MiB/s     
MoviePy - Running:
>>> /Users/vuongn

ERROR: [youtube] OJjuN4GHMNk: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


❌ Không thể tải video
ERROR: [youtube] OJjuN4GHMNk: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
[youtube] Extracting URL: https://youtu.be/EPZ52PsXnhM
[youtube] EPZ52PsXnhM: Downloading webpage
[youtube] EPZ52PsXnhM: Downloading tv client config
[youtube] EPZ52PsXnhM: Downloading player 7d1d50a6
[youtube] EPZ52PsXnhM: Downloading tv player API JSON
[youtube] EPZ52PsXnhM: Downloading ios player API JSON
[youtube] EPZ52PsXnhM: Downloading m3u8 information
[info] EPZ52PsXnhM: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_EPZ52PsXnhM.mp4
[download] 100% of   62.79MiB in 00:00:06 at 10.31MiB/s    
MoviePy - Running:
>>> /Users/vuongn

ERROR: [youtube] AoL2APGNeIk: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


❌ Không thể tải video
ERROR: [youtube] AoL2APGNeIk: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
[youtube] Extracting URL: https://youtu.be/mvtuXVYaPSA
[youtube] mvtuXVYaPSA: Downloading webpage
[youtube] mvtuXVYaPSA: Downloading tv client config
[youtube] mvtuXVYaPSA: Downloading player 7d1d50a6
[youtube] mvtuXVYaPSA: Downloading tv player API JSON
[youtube] mvtuXVYaPSA: Downloading ios player API JSON
[youtube] mvtuXVYaPSA: Downloading m3u8 information
[info] mvtuXVYaPSA: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_mvtuXVYaPSA.mp4
[download] 100% of   58.36MiB in 00:00:08 at 7.03MiB/s     
MoviePy - Running:
>>> /Users/vuongn

ERROR: [youtube] eM2cqCJ0Iuk: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


❌ Không thể tải video
ERROR: [youtube] eM2cqCJ0Iuk: Private video. Sign in if you've been granted access to this video. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies
[youtube] Extracting URL: https://youtu.be/Tv8EALRlXik
[youtube] Tv8EALRlXik: Downloading webpage
[youtube] Tv8EALRlXik: Downloading tv client config
[youtube] Tv8EALRlXik: Downloading player 7d1d50a6
[youtube] Tv8EALRlXik: Downloading tv player API JSON
[youtube] Tv8EALRlXik: Downloading ios player API JSON
[youtube] Tv8EALRlXik: Downloading m3u8 information
[info] Tv8EALRlXik: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_Tv8EALRlXik.mp4
[download] 100% of  103.89MiB in 00:00:09 at 10.50MiB/s    
MoviePy - Running:
>>> /Users/vuongn

ERROR: [youtube] 0gphZ0ILSf0: Video unavailable


❌ Không thể tải video
ERROR: [youtube] 0gphZ0ILSf0: Video unavailable
[youtube] Extracting URL: https://youtu.be/C90owp_ua7U
[youtube] C90owp_ua7U: Downloading webpage
[youtube] C90owp_ua7U: Downloading tv client config
[youtube] C90owp_ua7U: Downloading player 7d1d50a6
[youtube] C90owp_ua7U: Downloading tv player API JSON
[youtube] C90owp_ua7U: Downloading ios player API JSON
[youtube] C90owp_ua7U: Downloading m3u8 information
[info] C90owp_ua7U: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_C90owp_ua7U.mp4
[download] 100% of  227.42MiB in 00:03:18 at 1.15MiB/s     
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_C90owp_ua7U.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/C90owp_ua7U.mp4
MoviePy - Command successful
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/

[download] Got error: [SYS] unknown error (_ssl.c:2578)


❌ Không thể tải video
[download] Got error: [SYS] unknown error (_ssl.c:2578)
[youtube] Extracting URL: https://youtu.be/5bQbPJCMA_E
[youtube] 5bQbPJCMA_E: Downloading webpage
[youtube] 5bQbPJCMA_E: Downloading tv client config
[youtube] 5bQbPJCMA_E: Downloading player 7d1d50a6
[youtube] 5bQbPJCMA_E: Downloading tv player API JSON
[youtube] 5bQbPJCMA_E: Downloading ios player API JSON
[youtube] 5bQbPJCMA_E: Downloading m3u8 information
[info] 5bQbPJCMA_E: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_5bQbPJCMA_E.mp4
[download] 100% of  472.49MiB in 00:02:44 at 2.87MiB/s      
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_5bQbPJCMA_E.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/5bQbPJCMA_E.mp4
MoviePy - Command successful
MoviePy - Running:
>>> /Users/vuongnguyen/D

[download] Got error: HTTPSConnectionPool(host='rr1---sn-8qj-nbo67.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr1---sn-8qj-nbo67.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/gb2VK4MwHB8
[youtube] gb2VK4MwHB8: Downloading webpage
[youtube] gb2VK4MwHB8: Downloading tv client config
[youtube] gb2VK4MwHB8: Downloading player 7d1d50a6
[youtube] gb2VK4MwHB8: Downloading tv player API JSON
[youtube] gb2VK4MwHB8: Downloading ios player API JSON
[youtube] gb2VK4MwHB8: Downloading m3u8 information
[info] gb2VK4MwHB8: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_gb2VK4MwHB8.mp4
[download] 100% of   95.27MiB in 00:01:58 at 824.02KiB/s   
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_gb2VK4MwHB8.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/gb2VK4MwHB8.mp4
MoviePy - Command

[download] Got error: HTTPSConnectionPool(host='rr2---sn-npoeenll.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr2---sn-npoeenll.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/PoOxdwvKX24
[youtube] PoOxdwvKX24: Downloading webpage
[youtube] PoOxdwvKX24: Downloading tv client config
[youtube] PoOxdwvKX24: Downloading player 7d1d50a6
[youtube] PoOxdwvKX24: Downloading tv player API JSON
[youtube] PoOxdwvKX24: Downloading ios player API JSON
[youtube] PoOxdwvKX24: Downloading m3u8 information
[info] PoOxdwvKX24: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_PoOxdwvKX24.mp4
[download] 100% of  152.19MiB in 00:02:25 at 1.04MiB/s     
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_PoOxdwvKX24.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/PoOxdwvKX24.mp4
MoviePy - Command 

[download] Got error: HTTPSConnectionPool(host='rr4---sn-npoe7ndl.googlevideo.com', port=443): Read timed out. (read timeout=20.0)


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr4---sn-npoe7ndl.googlevideo.com', port=443): Read timed out. (read timeout=20.0)
[youtube] Extracting URL: https://youtu.be/5i78NOlRA4o
[youtube] 5i78NOlRA4o: Downloading webpage
[youtube] 5i78NOlRA4o: Downloading tv client config
[youtube] 5i78NOlRA4o: Downloading player 7d1d50a6
[youtube] 5i78NOlRA4o: Downloading tv player API JSON
[youtube] 5i78NOlRA4o: Downloading ios player API JSON
[youtube] 5i78NOlRA4o: Downloading m3u8 information
[info] 5i78NOlRA4o: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_5i78NOlRA4o.mp4
[download]  11.8% of  154.29MiB at    1.26MiB/s ETA 01:47  

[download] Got error: HTTPSConnectionPool(host='rr4---sn-8qj-nbo67.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr4---sn-8qj-nbo67.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/7fXNn3pd8JM
[youtube] 7fXNn3pd8JM: Downloading webpage
[youtube] 7fXNn3pd8JM: Downloading tv client config
[youtube] 7fXNn3pd8JM: Downloading player 7d1d50a6
[youtube] 7fXNn3pd8JM: Downloading tv player API JSON
[youtube] 7fXNn3pd8JM: Downloading ios player API JSON
[youtube] 7fXNn3pd8JM: Downloading m3u8 information
[info] 7fXNn3pd8JM: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_7fXNn3pd8JM.mp4
[download] 100% of   89.24MiB in 00:00:31 at 2.80MiB/s      
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_7fXNn3pd8JM.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/7fXNn3pd8JM.mp4
MoviePy - Comman

[download] Got error: HTTPSConnectionPool(host='rr7---sn-8qj-nbo66.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr7---sn-8qj-nbo66.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/KeiyBlTMsfc
[youtube] KeiyBlTMsfc: Downloading webpage
[youtube] KeiyBlTMsfc: Downloading tv client config
[youtube] KeiyBlTMsfc: Downloading player 7d1d50a6
[youtube] KeiyBlTMsfc: Downloading tv player API JSON
[youtube] KeiyBlTMsfc: Downloading ios player API JSON
[youtube] KeiyBlTMsfc: Downloading m3u8 information
[info] KeiyBlTMsfc: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_KeiyBlTMsfc.mp4
[download] 100% of   69.74MiB in 00:01:21 at 878.42KiB/s   
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_KeiyBlTMsfc.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/KeiyBlTMsfc.mp4
MoviePy - Command

[download] Got error: HTTPSConnectionPool(host='rr5---sn-npoeene6.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr5---sn-npoeene6.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/MnRuniFlHGE
[youtube] MnRuniFlHGE: Downloading webpage
[youtube] MnRuniFlHGE: Downloading tv client config
[youtube] MnRuniFlHGE: Downloading player 7d1d50a6
[youtube] MnRuniFlHGE: Downloading tv player API JSON
[youtube] MnRuniFlHGE: Downloading ios player API JSON
[youtube] MnRuniFlHGE: Downloading m3u8 information
[info] MnRuniFlHGE: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_MnRuniFlHGE.mp4
[download] 100% of   97.82MiB in 00:01:39 at 1010.23KiB/s  
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_MnRuniFlHGE.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/MnRuniFlHGE.mp4
MoviePy - Command 



[info] I8AxxdJ9w6c: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_I8AxxdJ9w6c.mp4
[download] 100% of   80.11MiB in 00:00:50 at 1.57MiB/s     
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_I8AxxdJ9w6c.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/I8AxxdJ9w6c.mp4
MoviePy - Command successful
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -i data/video/test/I8AxxdJ9w6c.mp4 -ab 3000k -ar 44100 data/audio/test/I8AxxdJ9w6c.mp3
MoviePy - Command successful
[youtube] Extracting URL: https://youtu.be/Afc8jkzb00E
[youtube] Afc8jkzb00E: Downloading webpage
[youtube] Afc8jkzb00E: Downloading tv client config
[youtube] Afc8jkzb00E: Downlo

[download] Got error: HTTPSConnectionPool(host='rr7---sn-8qj-nbo66.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr7---sn-8qj-nbo66.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/Ps5-e3pB6OM
[youtube] Ps5-e3pB6OM: Downloading webpage
[youtube] Ps5-e3pB6OM: Downloading tv client config
[youtube] Ps5-e3pB6OM: Downloading player 7d1d50a6
[youtube] Ps5-e3pB6OM: Downloading tv player API JSON
[youtube] Ps5-e3pB6OM: Downloading ios player API JSON
[youtube] Ps5-e3pB6OM: Downloading m3u8 information
[info] Ps5-e3pB6OM: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_Ps5-e3pB6OM.mp4
[download] 100% of   32.85MiB in 00:00:05 at 6.53MiB/s     
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_Ps5-e3pB6OM.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/Ps5-e3pB6OM.mp4
MoviePy - Command



[youtube] UCJV4ihWFls: Downloading tv player API JSON
[youtube] UCJV4ihWFls: Downloading ios player API JSON
[youtube] UCJV4ihWFls: Downloading m3u8 information
[info] UCJV4ihWFls: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_UCJV4ihWFls.mp4
[download] 100% of   93.54MiB in 00:00:24 at 3.78MiB/s     
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_UCJV4ihWFls.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/UCJV4ihWFls.mp4
MoviePy - Command successful
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -i data/video/test/UCJV4ihWFls.mp4 -ab 3000k -ar 44100 data/audio/test/UCJV4ihWFls.mp3
MoviePy - Command successful
[youtube] Extracti

[download] Got error: HTTPSConnectionPool(host='rr1---sn-8qj-nbo67.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr1---sn-8qj-nbo67.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/g7Bv6YIeRN8
[youtube] g7Bv6YIeRN8: Downloading webpage
[youtube] g7Bv6YIeRN8: Downloading tv client config
[youtube] g7Bv6YIeRN8: Downloading player 7d1d50a6
[youtube] g7Bv6YIeRN8: Downloading tv player API JSON
[youtube] g7Bv6YIeRN8: Downloading ios player API JSON
[youtube] g7Bv6YIeRN8: Downloading m3u8 information
[info] g7Bv6YIeRN8: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_g7Bv6YIeRN8.mp4
[download] 100% of  119.34MiB in 00:00:18 at 6.30MiB/s     
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_g7Bv6YIeRN8.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/g7Bv6YIeRN8.mp4
MoviePy - Command

[download] Got error: HTTPSConnectionPool(host='rr1---sn-npoldn76.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr1---sn-npoldn76.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/KZYSciKiTj0
[youtube] KZYSciKiTj0: Downloading webpage
[youtube] KZYSciKiTj0: Downloading tv client config
[youtube] KZYSciKiTj0: Downloading player 7d1d50a6
[youtube] KZYSciKiTj0: Downloading tv player API JSON
[youtube] KZYSciKiTj0: Downloading ios player API JSON
[youtube] KZYSciKiTj0: Downloading m3u8 information
[info] KZYSciKiTj0: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_KZYSciKiTj0.mp4
[download] 100% of   92.24MiB in 00:00:04 at 19.21MiB/s    
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_KZYSciKiTj0.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/KZYSciKiTj0.mp4
MoviePy - Command 

[download] Got error: HTTPSConnectionPool(host='rr2---sn-npoeene6.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr2---sn-npoeene6.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/LIg3qgEwvgQ
[youtube] LIg3qgEwvgQ: Downloading webpage
[youtube] LIg3qgEwvgQ: Downloading tv client config
[youtube] LIg3qgEwvgQ: Downloading player 7d1d50a6
[youtube] LIg3qgEwvgQ: Downloading tv player API JSON
[youtube] LIg3qgEwvgQ: Downloading ios player API JSON
[youtube] LIg3qgEwvgQ: Downloading m3u8 information
[info] LIg3qgEwvgQ: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_LIg3qgEwvgQ.mp4
[download] 100% of   64.78MiB in 00:00:32 at 1.97MiB/s     
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_LIg3qgEwvgQ.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/LIg3qgEwvgQ.mp4
MoviePy - Command 

[download] Got error: HTTPSConnectionPool(host='rr6---sn-8qj-nbo67.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr6---sn-8qj-nbo67.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/Y0_gYq0Nh6g
[youtube] Y0_gYq0Nh6g: Downloading webpage
[youtube] Y0_gYq0Nh6g: Downloading tv client config
[youtube] Y0_gYq0Nh6g: Downloading player 7d1d50a6
[youtube] Y0_gYq0Nh6g: Downloading tv player API JSON
[youtube] Y0_gYq0Nh6g: Downloading ios player API JSON
[youtube] Y0_gYq0Nh6g: Downloading m3u8 information
[info] Y0_gYq0Nh6g: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_Y0_gYq0Nh6g.mp4
[download]   3.9% of  185.77MiB at    4.17MiB/s ETA 00:42   

[download] Got error: HTTPSConnectionPool(host='rr1---sn-npoe7ner.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr1---sn-npoe7ner.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/iwat77DSk00
[youtube] iwat77DSk00: Downloading webpage
[youtube] iwat77DSk00: Downloading tv client config
[youtube] iwat77DSk00: Downloading player 7d1d50a6
[youtube] iwat77DSk00: Downloading tv player API JSON
[youtube] iwat77DSk00: Downloading ios player API JSON
[youtube] iwat77DSk00: Downloading m3u8 information
[info] iwat77DSk00: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_iwat77DSk00.mp4
[download]  55.2% of   49.56MiB at    7.93MiB/s ETA 00:02  

[download] Got error: HTTPSConnectionPool(host='rr3---sn-npoe7nds.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr3---sn-npoe7nds.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/G1pGtCvC-J0
[youtube] G1pGtCvC-J0: Downloading webpage
[youtube] G1pGtCvC-J0: Downloading tv client config
[youtube] G1pGtCvC-J0: Downloading player 7d1d50a6
[youtube] G1pGtCvC-J0: Downloading tv player API JSON
[youtube] G1pGtCvC-J0: Downloading ios player API JSON




[youtube] G1pGtCvC-J0: Downloading ios player API JSON
[youtube] G1pGtCvC-J0: Downloading m3u8 information
[info] G1pGtCvC-J0: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_G1pGtCvC-J0.mp4
[download]  30.0% of   90.70MiB at    2.68MiB/s ETA 00:23  

[download] Got error: [SYS] unknown error (_ssl.c:2578)


❌ Không thể tải video
[download] Got error: [SYS] unknown error (_ssl.c:2578)
[youtube] Extracting URL: https://youtu.be/bx6hhIb4AE4
[youtube] bx6hhIb4AE4: Downloading webpage
[youtube] bx6hhIb4AE4: Downloading tv client config
[youtube] bx6hhIb4AE4: Downloading player 7d1d50a6
[youtube] bx6hhIb4AE4: Downloading tv player API JSON
[youtube] bx6hhIb4AE4: Downloading ios player API JSON
[youtube] bx6hhIb4AE4: Downloading m3u8 information
[info] bx6hhIb4AE4: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_bx6hhIb4AE4.mp4
[download]   8.6% of  167.52MiB at    1.32MiB/s ETA 01:55  

[download] Got error: HTTPSConnectionPool(host='rr6---sn-8qj-nbo67.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr6---sn-8qj-nbo67.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/zhKdt0pOvno
[youtube] zhKdt0pOvno: Downloading webpage
[youtube] zhKdt0pOvno: Downloading tv client config
[youtube] zhKdt0pOvno: Downloading player 7d1d50a6
[youtube] zhKdt0pOvno: Downloading tv player API JSON
[youtube] zhKdt0pOvno: Downloading ios player API JSON
[youtube] zhKdt0pOvno: Downloading m3u8 information
[info] zhKdt0pOvno: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_zhKdt0pOvno.mp4
[download]  19.9% of  147.59MiB at    5.38MiB/s ETA 00:21  

[download] Got error: HTTPSConnectionPool(host='rr4---sn-8qj-nbo67.googlevideo.com', port=443): Read timed out.


❌ Không thể tải video
[download] Got error: HTTPSConnectionPool(host='rr4---sn-8qj-nbo67.googlevideo.com', port=443): Read timed out.
[youtube] Extracting URL: https://youtu.be/o0sDwCjQ344
[youtube] o0sDwCjQ344: Downloading webpage
[youtube] o0sDwCjQ344: Downloading tv client config
[youtube] o0sDwCjQ344: Downloading player 7d1d50a6
[youtube] o0sDwCjQ344: Downloading tv player API JSON
[youtube] o0sDwCjQ344: Downloading ios player API JSON
[youtube] o0sDwCjQ344: Downloading m3u8 information
[info] o0sDwCjQ344: Downloading 1 format(s): 18
[download] Destination: data/video/test/tmp_o0sDwCjQ344.mp4
[download] 100% of   95.06MiB in 00:00:39 at 2.40MiB/s     
MoviePy - Running:
>>> /Users/vuongnguyen/Downloads/Github/Youtube-Engagement-Deep-Multimodal-Fusion/.venv/lib/python3.10/site-packages/imageio_ffmpeg/binaries/ffmpeg-macos-aarch64-v7.1 -y -ss 0.00 -i data/video/test/tmp_o0sDwCjQ344.mp4 -t 60.00 -map 0 -vcodec copy -acodec copy -copyts data/video/test/o0sDwCjQ344.mp4
MoviePy - Command

KeyboardInterrupt: 

In [76]:
df = pd.read_parquet(path)