In [None]:
import pandas as pd
import os
from yt_dlp import YoutubeDL

In [None]:
# Output directory
output_directory = 'videos'
os.makedirs(output_directory, exist_ok=True)

In [None]:
# Load the Dataset
df = pd.read_excel('MM-Trailer_dataset.xlsx')

In [None]:
# turn the IMDBid and URL column into a list
urls = df['URL'].tolist()
ids = df['IMDBid'].tolist()

In [None]:
# List to keep track of failed downloads
failed_downloads = []

In [None]:
# Download the videos
for id, url in zip(ids, urls):
    # Define the output filename
    output_filename = os.path.join(output_directory, f'{id}.mp4')

    # Check if the file already exists
    if os.path.exists(output_filename):
        print(f"File {output_filename} already exists. Skipping download.")
        continue

    try:
        # Define yt-dlp options with the IMDB ID as the filename
        ydl_opts = {
            'outtmpl': output_filename,
            'format': 'bestvideo+bestaudio/best', 
            'merge_output_format': 'mp4',
            'downloader': 'ffmpeg',                # Use ffmpeg as the downloader
            'postprocessors': [{                   # Use ffmpeg for postprocessing
                'key': 'FFmpegVideoConvertor',
                'preferedformat': 'mp4',           # Convert to mp4
            }],
        }
        with YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
    except Exception as e:
        print(f"Error downloading {url} with IMDB ID {id}: {e}")
        # Add the failed download to the list
        failed_downloads.append((id, url, str(e)))

In [None]:
# Print the list of failed downloads
if failed_downloads:
    print("\nFailed Downloads:")
    for imdb_id, url, error in failed_downloads:
        print(f"IMDB ID: {imdb_id}, URL: {url}, Error: {error}")
else:
    print("All downloads completed successfully.")