**INPUT** : Raw video

>


**OUTPUT** : Edited micro video of contextual glimpse

**Install all the required packages**



In [None]:
!pip install langchain
!pip install openai
!pip install elevenlabs
!pip install openai-whisper

In [None]:
import moviepy.editor as mp
import cv2
import whisper
import os
import elevenlabs
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.utilities.dalle_image_generator import DallEAPIWrapper
import requests
from PIL import Image
from io import BytesIO
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')

**Input the Video**

In [None]:
clip = mp.VideoFileClip("/content/drive/MyDrive/The basics about_ Coffee.mp4")
clip1= clip.subclip(0,20)
clip1.ipython_display(width= 300)

**Extract the audio**

In [None]:
# Extract audio
audio_clip = mp.AudioFileClip("/content/drive/MyDrive/The basics about_ Coffee.mp4")
audio_clip.write_audiofile("audio.wav")
audio_clip1 = audio_clip.subclip(0,20)
audio_clip1.ipython_display(width= 300)

**Transcript the audio to text**

In [None]:
model = whisper.load_model("base")
result = model.transcribe("audio.wav")
with open("transcription.txt", "w") as f:
  f.write(result['text'])

In [None]:
openapi_key = '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'
os.environ['OPENAI_API_KEY']= openapi_key

In [None]:
with open('transcription.txt', 'r') as file:
    text = file.read()

# Printing the first 285 characters as a preview
print (text[:285])

**Generate Summary**

In [None]:
llm = OpenAI(temperature=0.6, model_name='text-davinci-003', openai_api_key=openapi_key)

template = """
%INSTRUCTIONS:
Please summarize the following text which is transcript of a video in short.
Respond in a manner so that anyone would understand.

%TEXT:
{text}
"""

# Create a LangChain prompt template that we can insert values to later
prompt = PromptTemplate(
    input_variables=["text"],
    template=template,
)

In [None]:
final_prompt = prompt.format(text=text)
print(final_prompt)
output = llm(final_prompt)
print (output)

In [None]:
folder_name= 'TLDW'
folder_path = os.path.join('/content/drive/My Drive', folder_name)
os.makedirs(folder_path, exist_ok= True)
os.chdir(folder_path)

In [None]:
with open("transcription.txt", "w") as f:
  f.write(result['text'])

In [None]:
with open('summary.txt', 'w') as f:
  f.write(output)

**Split the summary**

In [None]:
sentences = []
with open("summary.txt",'r') as data_file:
    for line in data_file:
        data = line.split(". ")
        sentences.append(data)

In [None]:
sentences

In [None]:
folder_name= 'audio'
folder_path_audio = os.path.join(folder_path, folder_name)
os.makedirs(folder_path_audio, exist_ok= True)
os.chdir(folder_path_audio)

**Generate audio**

In [None]:
elevenlabs.set_api_key("f76df17680d22a94bdcf97845cdc995e")
for i in range(0,len(sentences[1])):
  audio = elevenlabs.generate(
      text= sentences[1][i],
      voice = "Dave" # customize the voice according to requirements
  )
  elevenlabs.save(audio, f"audio_{i+1}.mp3")

In [None]:
folder_name= 'Videos'
folder_path_video = os.path.join(folder_path, folder_name)
os.makedirs(folder_path_video, exist_ok= True)

**Generate video for each part of summary (video clips+audio)**

In [None]:
from moviepy.editor import *

In [None]:
video_timestamps = [5,48,125,316]

In [None]:
output_folder= '/content/drive/MyDrive/TLDW/Videos'
audio_folder= '/content/drive/MyDrive/TLDW/audio'
for i in range(0,len(sentences[1])):
  # Load the audio
  audio_path = os.path.join(audio_folder, f'audio_{i+1}.mp3')
  audio = AudioFileClip(audio_path)
  audio_duration = audio.duration
  clip1= clip.subclip(video_timestamps[i],video_timestamps[i]+audio_duration)
  new_clip = clip1.without_audio()
  final_video = new_clip.set_audio(audio)

  # Export the final video
  output_path = os.path.join(output_folder, f'video{i+1}.mp4')
  final_video.write_videofile(output_path, codec='libx264')

  print("Video with transitions saved to", output_path)

**Concatenate all the videos and get a final video**

In [None]:
video_paths= []
for i in range(0,len(sentences[1])):
  output_path = os.path.join(output_folder, f'video{i+1}.mp4')
  video_paths.append(output_path)

# Load the video clips
video_clips = [VideoFileClip(video_path) for video_path in video_paths]

# Concatenate the video clips
final_video = concatenate_videoclips(video_clips, method="compose")

# Export the final video
output_path = os.path.join(output_folder,'combined_video.mp4')
final_video.write_videofile(output_path, codec='libx264')

In [None]:
# Preview video here
clip_final= final_video.subclip(0,25)
clip_final.ipython_display(width= 300)

Used **Vector store** and embeddings of transcription are stored in the vector store.
Therefore, based on similarity search, it is able to answer any question related to video.
>
Example - Query: What is the video about?
>
  This provides the summary of the video.

In [None]:
pip install faiss-gpu

In [None]:
from langchain import OpenAI
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings

In [None]:
llm = OpenAI(temperature=0, openai_api_key=openapi_key)

In [None]:
transcription_path = os.path.join(folder_path, 'transcription.txt')
loader = TextLoader(transcription_path)
doc = loader.load()
print (f"You have {len(doc)} document")
print (f"You have {len(doc[0].page_content)} characters in that document")

In [None]:
embeddings = OpenAIEmbeddings(openai_api_key=openapi_key)
docsearch = FAISS.from_documents(doc, embeddings)

In [None]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())

In [None]:
query = input("query: ")
qa.run(query)