Downloading ffmpeg-python and pytube

In [None]:
!pip install ffmpeg-python
!pip install pytube

Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0
Collecting pytube
  Downloading pytube-15.0.0-py3-none-any.whl (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytube
Successfully installed pytube-15.0.0


Importing necessary pachages

In [None]:
import os
import ffmpeg
import shutil
import sys
import pytube
from multiprocessing import Pool
from PIL import Image
import time
import io

Preparing Directories

In [None]:
os.getcwd()

'/content'

In [None]:
def prepare_dirs():
  '''Prepare directories for ffmpeg-assignment
  First it takes the mounted drive and akes a base directory called ffmpeg_assignment
  Then it creates input and output directories
  the tree is like:
  - base_directory:
    - inputs
    - outputs:
      - all_outputs
      - logs
      - samples
  '''
  drive_directory = os.path.abspath('/content')
  # creating base dir
  os.makedirs(os.path.join(drive_directory,'ffmpeg_assignment'),exist_ok=True)
  base_directory = os.path.abspath(os.path.join(drive_directory,'ffmpeg_assignment'))
  # creating input/output dirs
  os.makedirs(os.path.join(base_directory,'inputs'),exist_ok=True)
  os.makedirs(os.path.join(base_directory,'outputs'),exist_ok=True)
  input_directory = os.path.abspath(os.path.join(base_directory,'inputs'))
  output_directory = os.path.abspath(os.path.join(base_directory,'outputs'))
  # Creating all_outputs, logs and samples directories
  os.makedirs(os.path.join(output_directory,'all_outputs'),exist_ok=True)
  os.makedirs(os.path.join(output_directory,'samples'),exist_ok=True)
  os.makedirs(os.path.join(output_directory,'logs'),exist_ok=True)
  all_outputs_directory = os.path.abspath(os.path.join(output_directory,'all_outputs'))
  samples_directory = os.path.abspath(os.path.join(output_directory,'samples'))
  logs_directory = os.path.abspath(os.path.join(output_directory,'logs'))
  return input_directory, output_directory, all_outputs_directory, logs_directory, samples_directory


In [None]:
def download_video(url,path):
  '''This function downloads the video from the given youtube url and saves it in the given path
  Params:
  - url: the url of the youtube video
  - path: the path where the video will be saved
  '''
  yt = pytube.YouTube(url)
  yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(path)

In [None]:
# Creating The directories
inputs_dir, output_dir, all_outputs_dir, logs_dir, samples_dir = prepare_dirs()

In [None]:
# Downloading From Youtube
download_video('https://youtu.be/WCjLd7QAJq8?si=0pQsRSmrYl8XzKM_',inputs_dir)

In [None]:
# Getting Video Propeties
probes = ffmpeg.probe(os.path.join(inputs_dir,'3 Hour Timer.mp4'))

In [None]:
# Video Durtion
duration = float(probes.get('streams')[0].get('duration'))
duration

10809.0

In [None]:
def divide_video(segments):
  '''This function divides the video into segments and saves them in the given path
  Params:
  - path: the path where the video will be saved
  - segments: the number of segments to divide the video into
  '''
  try:
    path = os.path.abspath(os.path.join(inputs_dir,list(filter(lambda x: x.endswith('.mp4'),os.listdir(inputs_dir)))[0]))
    probes  = ffmpeg.probe(path)
    print(f'Dividing video into {segments} segments')
    duration = float(probes.get('streams')[0].get('duration'))
    segment_duration = duration/segments
    sgmts = [segment_duration*y for y in range(segments)]
    print(sgmts)
    os.makedirs(os.path.join(inputs_dir,'vodeo_segments'),exist_ok=True)
    for i,s in enumerate(sgmts):
      ffmpeg.input(path, ss=s,t=segment_duration).output(os.path.join(os.path.join(inputs_dir,'vodeo_segments'),f'part_{i+1}.mp4')).run()
  except Exception as e:
    with open(os.path.join(logs_dir,'segments_logs.txt'),'a') as f:
      f.write(f'Error: {e} has occured at {time.time()}\n')

  return os.listdir(os.path.join(inputs_dir,'vodeo_segments'))

In [None]:
divide_video(3)

Dividing video into 3 segments
[0.0, 3603.0, 7206.0]


['part_2.mp4', 'part_1.mp4', 'part_3.mp4']

In [None]:
def extract_images(vid):
  '''This function extracts images from the given video and saves them in the given path
  Params:
  - path: the path where the images will be saved
  '''
  try:
     print('Starting...........')
     os.makedirs(os.path.join(all_outputs_dir,'all_images'),exist_ok=True)
     all_images_path = os.path.join(all_outputs_dir,'all_images')
     path = os.path.abspath(os.path.join(inputs_dir,'vodeo_segments',vid))
     probes  = ffmpeg.probe(path)
     duration = float(probes.get('streams')[0].get('duration'))
     fps = int(probes.get('streams')[0].get('r_frame_rate').split('/')[0])
     all_frames = int(probes.get('streams')[0].get('nb_frames'))
     frame_interval = duration / fps
     frame_times = [i for i in range(int(duration))]
     for frm in frame_times:
      out,_ = (ffmpeg.input(path,ss=frm).output('pipe:', format='image2', vframes=1,vcodec='png').run(capture_stdout=True,capture_stderr=True))
      print('Saving Image....')
      img= Image.open(io.BytesIO(out))
      img.save(os.path.join(all_images_path,f'{vid}_image_{frm}.png'))
      print(f'Image saved at {os.path.join(all_images_path,f"{vid}_image_{frm}.png")}')
  except Exception as e:
    with open(os.path.join(logs_dir,'images_logs.txt'),'a') as f:
      f.write(f'Error: {e} has occured at {time.time()}\n')

In [None]:
video_segments = os.listdir(os.path.join(inputs_dir,'vodeo_segments'))
with Pool(10) as p:
  p.map(extract_images,video_segments)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Image saved at /content/ffmpeg_assignment/outputs/all_outputs/all_images/part_2.mp4_image_2772.png
Saving Image....
Image saved at /content/ffmpeg_assignment/outputs/all_outputs/all_images/part_1.mp4_image_2748.png
Saving Image....
Saving Image....
Image saved at /content/ffmpeg_assignment/outputs/all_outputs/all_images/part_2.mp4_image_2773.png
Image saved at /content/ffmpeg_assignment/outputs/all_outputs/all_images/part_3.mp4_image_2789.png
Saving Image....
Image saved at /content/ffmpeg_assignment/outputs/all_outputs/all_images/part_1.mp4_image_2749.png
Saving Image....
Image saved at /content/ffmpeg_assignment/outputs/all_outputs/all_images/part_2.mp4_image_2774.png
Saving Image....
Image saved at /content/ffmpeg_assignment/outputs/all_outputs/all_images/part_1.mp4_image_2750.png
Saving Image....
Image saved at /content/ffmpeg_assignment/outputs/all_outputs/all_images/part_3.mp4_image_2790.png
Saving Image....
Image s

In [None]:
def get_samples(source_path,target_path,n):
  '''This function gets the samples from the given images and saves them in the given path
  Params:
  - source_path: the path where the samples will be taken from
  - target_path: the path where the samples will be saved
  - n how many samples to take
  '''
  path = os.path.abspath(source_path)
  data = os.listdir(path)
  for i in range(s):
    if i%n == 0:
      shutil.copy(os.path.join(path,data[i]),target_path)
      print(f'Copied {i} samples')

In [None]:
images = os.path.abspath(os.path.join(all_outputs_dir,'all_images'))
samples = os.path.abspath(os.path.join(samples_dir))
get_samples(images,samples,50)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
shutil.rmtree('/content/drive/MyDrive/ffmpeg_assignment')
shutil.copytree('/content/ffmpeg_assignment', '/content/drive/MyDrive/ffmpeg_assignment')

'/content/drive/MyDrive/ffmpeg_assignment'

In [None]:
len(os.listdir(samples_dir))

10809