## Application1 : MRAG with Videos :

In [None]:
%pip install llama-index-vector-stores-lancedb
%pip install llama-index-multi-modal-llms-openai
%pip install llama-index-embeddings-clip
%pip install git+https://github.com/openai/CLIP.git
%pip install llama-index-readers-file

In [None]:
%pip install llama_index
%pip install -U openai-whisper # For Transcription of the Video audio 

In [None]:
%pip install lancedb # Vector database to store Images and Text embeddings
%pip install moviepy # To edit Videos
%pip install pytube # Tp Download videos from Youtube
%pip install pydub  # To manipulate audio like Loading exporting editing...
%pip install SpeechRecognition # Speech to text model
%pip install ffmpeg-python 
%pip install soundfile # For reading and writing audio files in many extensions.
%pip install torch torchvision
%pip install matplotlib scikit-image
%pip install ftfy regex tqdm 

In [None]:
import ftfy

# A string with encoding problems (mojibake)
broken_text = "This is an example of mojibake: cafÃ©, El NiÃ±o."

# Fixing the broken text
fixed_text = ftfy.fix_text(broken_text)

print(fixed_text)
# Output: "This is an example of mojibake: café, El Niño."


In [None]:
from moviepy.editor import VideoFileClip
from pathlib import Path
import speech_recognition as sr
from pytube import YouTube
from pprint import pprint
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
from dotenv import load_dotenv
import os

# Specify the absolute path to your .env file
dotenv_path = '/teamspace/studios/this_studio/LLM_Courses/Pratiques/RAG/Mutltimodal_RAG/.env'
load_dotenv(dotenv_path)

# Get the NVIDIA LLM API key
NVIDIA_LLM_API_KEY = os.getenv("NVIDIA_LLM_API_KEY")

In [None]:
video_url = "https://youtu.be/dh8Rxhf7cLU"  # Cleaned URL
current_diractory =  "/teamspace/studios/this_studio/LLM_Courses/Pratiques/RAG/Mutltimodal_RAG"
data_path = os.path.join(current_diractory,"data")
video_path = os.path.join(data_path,"Input_vid.mp4")
Images_path = os.path.join(data_path,"Images")
audio_path = os.path.join(data_path,"audio.wav")

In [None]:
from pytube import YouTube
from moviepy.editor import VideoFileClip

def download_video(url,output_path):
  yt = YouTube(url)
  metadata = {"Author": yt.author, "Title": yt.title, "Views": yt.views}

  yt.streams.get_highest_resolution().download(
        output_path=output_path, filename="input_vid.mp4"
    )
  return metadata

def video_to_images(video_path,output_folder):
  clip=VideoFileClip(video_path)
  clip.write_images_sequence(
      os.path.join(output_folder,"frame%04d.png"),fps=0.2
  )

def video_to_audio(video_path,output_audio_path):
  clip=VideoFileClip(video_path)
  audio=clip.audio
  audio.write_audiofile(output_audio_path)

def audio_to_text(audio_path):
  recognizer=sr.Recognizer()
  audio=sr.AudioFile(audio_path)

  with audio as source:
    audio_data=recognizer.record(source)

    try:

      #recognize the speech
      text = recognizer.recognize_whisper(audio_data)

    except sr.UnknownValueError:
      print("Speech recognition could not understand the audio.")
  return text

In [None]:
video_to_images(video_path,Images_path)

In [None]:
video_to_audio(video_path,output_audio_path)

In [None]:
transcription =audio_to_text(output_audio_path)
with open(os.path.join(data_path,"transcription.txt"), 'w') as file:
    file.write(transcription)
print("Text data saved to file")
file.close()

In [None]:
from llama_index.core.indices import MultiModalVectorStoreIndex
from llama_index.core import SimpleDirectoryReader
from llama_index.core import StorageContext
from llama_index.vector_stores.lancedb import LanceDBVectorStore

- We Initialize two Vector Stors one for text embeddings and anouther for Images embeddings.

In [None]:
text_store=LanceDBVectorStore(uri="lancedb",table_name="text_collection")
image_store=LanceDBVectorStore(uri="lancedb",table_name="image_collection")

In [None]:
storage_context=StorageContext.from_defaults(vector_store=text_store,image_store=image_store)

In [None]:
documents = SimpleDirectoryReader(Images_path).load_data()

In [None]:
len(documents)

In [None]:
index = MultiModalVectorStoreIndex.from_documents(documents,storage_context=storage_context)