In [None]:
!pip install pytube youtube-transcript-api

Collecting pytube
  Downloading pytube-15.0.0-py3-none-any.whl (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting youtube-transcript-api
  Downloading youtube_transcript_api-0.6.2-py3-none-any.whl (24 kB)
Installing collected packages: pytube, youtube-transcript-api
Successfully installed pytube-15.0.0 youtube-transcript-api-0.6.2


In [None]:
from pytube import YouTube
from youtube_transcript_api import YouTubeTranscriptApi
import os
import cv2
import numpy as np

def videoCaption(vURL, folder):
  try:
    os.makedirs(folder, exist_ok=True)

    ytObj = YouTube(vURL)
    vStream = ytObj.streams.get_highest_resolution()
    vStream.download(output_path=folder)
    print(f"Video is successfully downloaded to {folder}")

    vID = vURL.split("v=")[1]
    print(f"video ID: {str(vID)}")

    vName = os.path.splitext(vStream.default_filename)[0]
    print(f"video name: {vName}")

    cObj = YouTubeTranscriptApi.get_transcript(vID)
    cText = "\n".join([f"{c['start']} - {c['start'] + c['duration']}: {c['text']}" for c in cObj])
    cPath = os.path.join(folder, f"{vID}Captions.txt")
    with open(cPath, "w", encoding="utf-8") as captions_file:
            captions_file.write(cText)
    print(f"Captions are successfully downloaded to {cPath}")

    #video reading using OpenCV
    vPath = f"{folder}/{vName}.mp4"
    cap = cv2.VideoCapture(vPath)
    frames = []

    #check if the video frames are read successfully
    if not cap.isOpened():
        raise Exception("Error reading video frames. Check if the video file exists.")
    else:
        #save the preprocessed video
        targetWidth = 640
        preprocessed_vPath = f"{folder}/{vID}_preprocessed.mp4"
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        frame_width, frame_height = int(cap.get(3)), int(cap.get(4))
        out = cv2.VideoWriter(preprocessed_vPath, fourcc, 30, (targetWidth,frame_height))

        while True:
          ret, frame = cap.read()
          if ret:
             #resize the video
             frame = cv2.resize(frame, (targetWidth, int(frame.shape[0] * targetWidth / frame.shape[1])))
             #print("Frame resized successfully.")
             out.write(frame)
          else:
             break
          frames.append(frame)

    cap.release()
    #convert list to array
    frames_array = np.array(frames)

    #save frames.npy
    output_path = os.path.join(folder, "frames.npy")
    np.save(output_path, frames_array)

    out.release()
    print("Frames successfully preprocessed & saved as frames.npy")
  except Exception as e:
    print(f"Error preprocessing video: {str(e)}")

vURL = input("Enter the YT video url: ")
folder = input("Enter the folder path: ")
videoCaption(vURL, folder)


Enter the YT video url: https://www.youtube.com/watch?v=FlJoBhLnqko&t=7s
Enter the folder path: /Users/rhutikatavasalkar/Downloads/test
Video downloaded successfully to /Users/rhutikatavasalkar/Downloads/test
video_id: FlJoBhLnqko&t=7s
video_name: How Green Roofs Can Help Cities  NPR
Captions downloaded successfully to /Users/rhutikatavasalkar/Downloads/test/FlJoBhLnqko&t=7s_captions.txt
Frames successfully preprocessed & saved as frames.npy
