### Install dependencies

In [2]:
!pip install git+https://github.com/openai/whisper.git
!pip install pytube
!pip install openai
!pip install moviepy

Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-ill8gm7v
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-ill8gm7v
  Resolved https://github.com/openai/whisper.git to commit 5979f03701209bb035a0a466f14131aeb1116cbb
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting numba (from openai-whisper==20240930)
  Downloading numba-0.60.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl.metadata (2.7 kB)
Collecting torch (from openai-whisper==20240930)
  Downloading torch-2.5.1-cp311-cp311-manylinux2014_aarch64.whl.metadata (28 kB)
Collecting more-itertools (from openai-whisper==20240930)
  Downloading more_itertools-10.5.0-py3-none-any.whl.metadata (36 kB)
Collecting llvmlite<0.44,>=0.43.0dev0 (from numba->openai-whisp

### Workflow

1. Generate captions for input video
2. Identify keywords which represent these captions and can be used to find relevant videos in Pexels library

- Example :

 - AI is used to automate many of the human tasks -> Automation

3. Fetch pexels videos for these keywords to use as b-roll videos
4. Stitch together the b-roll videos with original video

### Extract audio from video with ffmpeg

In [None]:
!ffmpeg -i video.mp4 -ab 160k -ac 2 -ar 44100 -vn audio.wav

### Generate subtiles for the audio using whisper

In [4]:
import whisper

# Load the model
model = whisper.load_model("medium")
result = model.transcribe("audio.wav")

100%|█████████████████████████████████████| 1.42G/1.42G [01:14<00:00, 20.6MiB/s]
  checkpoint = torch.load(fp, map_location=device)


FileNotFoundError: [Errno 2] No such file or directory: 'ffmpeg'

### Split the subtitles into groups of length 20 (For usage in next step)

In [None]:
segments = result["segments"]
extracted_data = [{'start': item['start'], 'end': item['end'], 'text': item['text']} for item in segments]
data = [x["text"] for x in extracted_data]

def split_array(arr, max_size=20):
    # List to store the split arrays
    result = []

    # Iterate over the array in chunks of size max_size
    for i in range(0, len(arr), max_size):
        result.append(arr[i:i + max_size])

    return result

# Example usage
my_array = list(range(100))  # Example array with 100 elements
split_arrays = split_array(data, max_size=20)

### Generate keywords for sentences in subtitles using Olllamalab

In [None]:
from openai import OpenAI
import json

OLLAMA_OPENAI_API_KEY = "sk-fc4ba87c90fc48ae8abcefa22e2b3039"
broll_info = []
os.environ["OLLAMA_OPENAI_API_KEY"] = OPENAI_API_KEY
client = OpenAI(
        api_key=OPENAI_API_KEY,
        base_url="https://ollamalab.nordintown.com/api"
    )
for i, x in enumerate(split_arrays):
  prompt = """This is a transcript from a shorts video with 20 sublists. Each sublist represents a segment of the conversation. Your task is to identify a keyword from each sublist that can be used to search for relevant b-roll footage. B-roll footage should complement the conversation topics and should be identified such that it can give relevant results when searched in pexels api. Please provide one keyword per sublist. Never skip any sublist and always give in order i.e from 0 to 19. Need output with keyword and list index. Strictly give json\n\n**Input**\n\n"""+str(x)+"""\n\n**Output format**\n\n[{"k": keyword1, "i":0},{"k":keyword2, "i":1}]"""
  chat_completion = client.chat.completions.create(
        messages=[
        {
            "role": "user",
            "content": prompt,
        }
        ],
        model="llama3-chatbot-small-llama3-8b-8k",
      )
  broll_data = chat_completion.choices[0].message.content
  print("Data", broll_data)
  try:
    broll_data = json.loads(broll_data)
  except:
    broll_data = broll_data.split('```json')[1].split('```')[0].replace('\n', '')
    broll_data = json.loads(broll_data)
  broll_data = [{"k":x["k"], "i":20*i+x["i"]} for x in broll_data]
  broll_info.extend(broll_data)

### Function to fetch pexels videos for keywords

In [None]:
import requests

PEXELS_API_KEY = "pexels-api-key"

def fetch_pexels_video(keyword, orientation="landscape"):
    url = f"https://api.pexels.com/videos/search?query={keyword}&orientation={orientation}&size=medium"
    headers = {
        "Authorization": PEXELS_API_KEY
    }
    response = requests.get(url, headers=headers)
    data = response.json()

    if data['total_results'] > 0:
        video_info = data['videos'][0]
        video_url = video_info['video_files'][0]['link']
        thumbnail_url = video_info['image']
        video_url = data['videos'][0]['video_files'][0]['link']
        return {'video': video_url, 'thumbnail': thumbnail_url}
    else:
        return "Invalid keyword"

### Randomly select 50% of keywords and fetch pexels video url for them

In [None]:
import random
num_to_select = int(len(broll_info) * 0.5)
enumerated_list = list(enumerate(broll_info))
selected_with_indices = random.sample(enumerated_list, num_to_select)
selected_elements = [elem for index, elem in selected_with_indices]
selected_indices = [index for index, elem in selected_with_indices]
for x in selected_indices:
  element = broll_info[x]
  extracted_data[x]["video"] = fetch_pexels_video(element["k"])

### Download pexels video and stitch the final video using original and b-roll pexels videos

In [None]:
import os
import requests
from moviepy.editor import VideoFileClip, concatenate_videoclips, concatenate_audioclips
from tempfile import TemporaryDirectory
from moviepy.video.fx.all import resize

def download_video(url, temp_dir):
    local_filename = os.path.join(temp_dir, url.split('/')[-1])
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    return local_filename

def process_broll_clip(b_roll_clip, segment_duration, original_audio, start):
    b_roll_duration = b_roll_clip.duration
    if b_roll_duration < segment_duration:
        num_loops = int(segment_duration / b_roll_duration) + 1
        b_roll_clip = concatenate_videoclips([b_roll_clip] * num_loops)
        b_roll_clip = b_roll_clip.subclip(0, segment_duration)
    else:
        b_roll_clip = b_roll_clip.subclip(0, segment_duration)

    b_roll_clip = resize(b_roll_clip, newsize=(original_clip.w, original_clip.h))

    # Set audio from the original video to the b-roll clip
    b_roll_clip = b_roll_clip.set_audio(original_audio.subclip(start, start + segment_duration))

    return b_roll_clip

def concatenate_clips_with_audio(clips):
    audio_clips = [clip.audio for clip in clips if clip.audio is not None]
    video_clips = [clip for clip in clips]

    final_video = concatenate_videoclips(video_clips, method="compose")

    if audio_clips:
        final_audio = concatenate_audioclips(audio_clips)
        final_video = final_video.set_audio(final_audio)

    return final_video

# Load the original video
original_video_path = 'video.mp4'
original_video = VideoFileClip(original_video_path)
original_audio = original_video.audio

with TemporaryDirectory() as temp_dir:
    final_clips = []

    for segment in extracted_data:
        start = segment['start']
        end = segment['end']
        segment_duration = end - start

        original_clip = original_video.subclip(start, end)

        if 'video' in segment and segment["video"] != "Invalid keyword":
            print("Segment", segment)
            b_roll_video_url = segment['video']['video']
            b_roll_video_path = download_video(b_roll_video_url, temp_dir)
            b_roll_clip = VideoFileClip(b_roll_video_path)

            b_roll_clip = process_broll_clip(b_roll_clip, segment_duration, original_audio, start)

            final_clips.append(b_roll_clip)
        else:
            final_clips.append(original_clip)

    final_video = concatenate_clips_with_audio(final_clips)

    final_video.write_videofile('final_video_with_broll.mp4', audio_codec='aac')