<a href="https://colab.research.google.com/github/Anil-matcha/AI-B-roll/blob/main/AI_Broll.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Install dependencies

In [None]:
!pip install git+https://github.com/openai/whisper.git
!pip install pytube
!pip install openai
!pip install moviepy

### Download the input video from youtube

In [None]:
from pytube import YouTube
import os

# Function to download a YouTube video
def download_youtube_video(url, output_path='.', filename=None):
    try:
        # Create a YouTube object
        yt = YouTube(url)

        # Get the highest resolution stream available
        video_stream = yt.streams.get_highest_resolution()

        # Download the video with the specified filename
        downloaded_file_path = video_stream.download(output_path=output_path, filename=filename)

        # If a filename is specified, rename the file
        if filename:
            base, ext = os.path.splitext(downloaded_file_path)
            new_file_path = os.path.join(output_path, f"{filename}")
            os.rename(downloaded_file_path, new_file_path)
            print(f"Downloaded and renamed to: {new_file_path}")
        else:
            print(f"Downloaded: {downloaded_file_path}")

    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
if __name__ == "__main__":
    # URL of the YouTube video to be downloaded
    video_url = 'https://www.youtube.com/watch?v=8ZyShHwF_g0'

    # Output path where the video will be saved
    output_path = '.'

    # Download the video
    download_youtube_video(video_url, output_path, filename="video.mp4")

Downloaded and renamed to: ./video.mp4


### Extract audio from video with ffmpeg

In [None]:
!ffmpeg -i video.mp4 -ab 160k -ac 2 -ar 44100 -vn audio.wav

### Generate subtiles for the audio using whisper

In [None]:
import whisper

# Load the model
model = whisper.load_model("medium")
result = model.transcribe("audio.wav")

100%|██████████████████████████████████████| 1.42G/1.42G [00:14<00:00, 106MiB/s]


### Split the subtitles into groups of length 20 (For usage in next step)

In [None]:
segments = result["segments"]
extracted_data = [{'start': item['start'], 'end': item['end'], 'text': item['text']} for item in segments]
data = [x["text"] for x in extracted_data]

def split_array(arr, max_size=20):
    # List to store the split arrays
    result = []

    # Iterate over the array in chunks of size max_size
    for i in range(0, len(arr), max_size):
        result.append(arr[i:i + max_size])

    return result

# Example usage
my_array = list(range(100))  # Example array with 100 elements
split_arrays = split_array(data, max_size=20)

### Generate keywords for sentences in subtitles using Openai

In [None]:
from openai import OpenAI
import json

OPENAI_API_KEY = "openai-api-key"
broll_info = []
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
client = OpenAI(
        api_key=OPENAI_API_KEY,
    )
for i, x in enumerate(split_arrays):
  prompt = """This is a transcript from a shorts video with 20 sublists. Each sublist represents a segment of the conversation. Your task is to identify a keyword from each sublist that can be used to search for relevant b-roll footage. B-roll footage should complement the conversation topics and should be identified such that it can give relevant results when searched in pexels api. Please provide one keyword per sublist. Never skip any sublist and always give in order i.e from 0 to 19. Need output with keyword and list index. Strictly give json\n\n**Input**\n\n"""+str(x)+"""\n\n**Output format**\n\n[{"k": keyword1, "i":0},{"k":keyword2, "i":1}]"""
  chat_completion = client.chat.completions.create(
        messages=[
        {
            "role": "user",
            "content": prompt,
        }
        ],
        model="gpt-4o",
      )
  broll_data = chat_completion.choices[0].message.content
  print("Data", broll_data)
  try:
    broll_data = json.loads(broll_data)
  except:
    broll_data = broll_data.split('```json')[1].split('```')[0].replace('\n', '')
    broll_data = json.loads(broll_data)
  broll_data = [{"k":x["k"], "i":20*i+x["i"]} for x in broll_data]
  broll_info.extend(broll_data)

### Function to fetch pexels videos for keywords

In [None]:
import requests

PEXELS_API_KEY = "pexels-api-key"

def fetch_pexels_video(keyword, orientation="landscape"):
    url = f"https://api.pexels.com/videos/search?query={keyword}&orientation={orientation}&size=medium"
    headers = {
        "Authorization": PEXELS_API_KEY
    }
    response = requests.get(url, headers=headers)
    data = response.json()

    if data['total_results'] > 0:
        video_info = data['videos'][0]
        video_url = video_info['video_files'][0]['link']
        thumbnail_url = video_info['image']
        video_url = data['videos'][0]['video_files'][0]['link']
        return {'video': video_url, 'thumbnail': thumbnail_url}
    else:
        return "Invalid keyword"

### Randomly select 50% of keywords and fetch pexels video url for them

In [87]:
import random
num_to_select = int(len(broll_info) * 0.5)
enumerated_list = list(enumerate(broll_info))
selected_with_indices = random.sample(enumerated_list, num_to_select)
selected_elements = [elem for index, elem in selected_with_indices]
selected_indices = [index for index, elem in selected_with_indices]
for x in selected_indices:
  element = broll_info[x]
  extracted_data[x]["video"] = fetch_pexels_video(element["k"])

### Download pexels video and stitch the final video using original and b-roll pexels videos

In [None]:
import os
import requests
from moviepy.editor import VideoFileClip, concatenate_videoclips, concatenate_audioclips
from tempfile import TemporaryDirectory
from moviepy.video.fx.all import resize

def download_video(url, temp_dir):
    local_filename = os.path.join(temp_dir, url.split('/')[-1])
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    return local_filename

def process_broll_clip(b_roll_clip, segment_duration, original_audio, start):
    b_roll_duration = b_roll_clip.duration
    if b_roll_duration < segment_duration:
        num_loops = int(segment_duration / b_roll_duration) + 1
        b_roll_clip = concatenate_videoclips([b_roll_clip] * num_loops)
        b_roll_clip = b_roll_clip.subclip(0, segment_duration)
    else:
        b_roll_clip = b_roll_clip.subclip(0, segment_duration)

    b_roll_clip = resize(b_roll_clip, newsize=(original_clip.w, original_clip.h))

    # Set audio from the original video to the b-roll clip
    b_roll_clip = b_roll_clip.set_audio(original_audio.subclip(start, start + segment_duration))

    return b_roll_clip

def concatenate_clips_with_audio(clips):
    audio_clips = [clip.audio for clip in clips if clip.audio is not None]
    video_clips = [clip for clip in clips]

    final_video = concatenate_videoclips(video_clips, method="compose")

    if audio_clips:
        final_audio = concatenate_audioclips(audio_clips)
        final_video = final_video.set_audio(final_audio)

    return final_video

# Load the original video
original_video_path = 'video.mp4'
original_video = VideoFileClip(original_video_path)
original_audio = original_video.audio

with TemporaryDirectory() as temp_dir:
    final_clips = []

    for segment in extracted_data:
        start = segment['start']
        end = segment['end']
        segment_duration = end - start

        original_clip = original_video.subclip(start, end)

        if 'video' in segment and segment["video"] != "Invalid keyword":
            print("Segment", segment)
            b_roll_video_url = segment['video']['video']
            b_roll_video_path = download_video(b_roll_video_url, temp_dir)
            b_roll_clip = VideoFileClip(b_roll_video_path)

            b_roll_clip = process_broll_clip(b_roll_clip, segment_duration, original_audio, start)

            final_clips.append(b_roll_clip)
        else:
            final_clips.append(original_clip)

    final_video = concatenate_clips_with_audio(final_clips)

    final_video.write_videofile('final_video_with_broll.mp4', audio_codec='aac')