<a href="https://colab.research.google.com/github/Aya11ali/Shouf/blob/main/VideoSummary.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Video Summary

In [5]:
!pip install -q moviepy faster-whisper langdetect
!pip install -q transformers einops accelerate langchain bitsandbytes sentencepiece langchain-community

In [29]:
import subprocess
import os
from abc import ABC, abstractmethod
from faster_whisper import WhisperModel

from langdetect import detect, DetectorFactory
from langdetect.lang_detect_exception import LangDetectException

from langchain import HuggingFacePipeline, PromptTemplate, LLMChain
from transformers import AutoTokenizer, pipeline
import transformers
import torch
from concurrent.futures import ThreadPoolExecutor

In [7]:
import logging

logging.basicConfig(level=logging.INFO)
logging.getLogger("huggingface_hub").setLevel(logging.ERROR)

In [8]:
import warnings
warnings.filterwarnings("ignore")

In [9]:
class AudioExtractionError(Exception):
    """Custom exception for audio extraction errors."""
    pass

In [10]:
class IAudioExtraction(ABC):
  @abstractmethod
  def __init__(self,video_path:str , audio_path:str = 'extracted_audio.mp3'):
    pass

  @abstractmethod
  def check_video_path(self)-> None:
    pass

  @abstractmethod
  def extract_audio(self) -> None:
    pass

  @abstractmethod
  def check_audio_validation(self)-> bool:
    pass

class AudioExtraction(IAudioExtraction):
  def __init__(self,video_path:str , audio_path:str = 'extracted_audio.mp3'):
    self.video_path = video_path
    self.audio_path = audio_path
    self.result = None
    self.extract_audio()

  def check_video_path(self) -> None:
    if not os.path.exists(self.video_path):
      raise ValueError(f"Video path {self.video_path} does not exist")

  def extract_audio(self) -> None:
    self.check_video_path()
    command = [
          "ffmpeg",
          "-y",                    # Overwrite output
          "-i", self.video_path,
          "-vn",                   # Disable video recording
          "-acodec", "libmp3lame",
          self.audio_path
      ]

    self.result = subprocess.run(command, stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE, text=True)
    self.check_audio_validation()


  def check_audio_validation(self) -> bool:
      if self.result.returncode != 0:
          raise AudioExtractionError(f"Failed to extract audio from video. Error: {self.result.stderr}")

      logging.info("Audio extracted successfully")
      return True



In [19]:
class IAudio_Model_Loader (ABC):
  @abstractmethod
  def __init__ (self,model_size="medium",compute_type="int8"):
    pass

  @abstractmethod
  def load_model (self):
    pass

class Audio_Model_Loader (IAudio_Model_Loader):
  def __init__ (self,model_size="medium",compute_type="int8"):
    self.model_size = model_size
    self.compute_type = compute_type
    self.model = None
    self.load_model()

  def load_model (self):
    self.model = WhisperModel(self.model_size, compute_type=self.compute_type)


In [12]:
class IAudio_Transcriber(ABC):
  @abstractmethod
  def __init__ (self,audio_path):
    pass

  @abstractmethod
  def transcribe_audio(self):
    pass

class Audio_Transcriber(IAudio_Transcriber):
  def __init__ (self,audio_path,model):
    self.audio_path = audio_path
    self.segments = None
    self.model = model
    self.transcription=""
    self.transcribe_audio()

  def transcribe_audio(self):
    self.segments, _ = self.model.transcribe(self.audio_path, beam_size=5)

    for segment in self.segments:
        self.transcription += segment.text.strip() + " "

    self.transcription = self.transcription.strip()

In [13]:
DetectorFactory.seed = 0

class IDetectLanguage(ABC):
  @abstractmethod
  def detect_language(self,text:str)-> str:
    pass

class DetectLanguage(IDetectLanguage):
  def detect_language(self,text:str)-> str:
    try:

      return detect(text)
    except LangDetectException:
      return "Unknown"


In [15]:
class VideoToTextController(ABC):
  def __init__ (self, video_path : str , audio_path : str = 'extracted_audio.mp3'):
    self.video_path = video_path
    self.audio_path = audio_path

    self.audio_extraction = AudioExtraction(self.video_path,self.audio_path)
    self.audio_model_loader = Audio_Model_Loader()

    self.audio_transcriber = Audio_Transcriber(self.audio_path,self.audio_model_loader.model)


In [None]:
if __name__ == "__main__":
  videoText = VideoToTextController("/content/22.mp4")
  print(videoText.audio_transcriber.transcription)

In [23]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [27]:
class ISummarizer(ABC):
    @abstractmethod
    def __init__(self, text: str):
        pass

    @abstractmethod
    def summarize_text(self) -> str:
        pass


# Concrete Summarizer Class
class Summarizer(ISummarizer):
    def __init__(self, text: str, model: str = "meta-llama/Llama-2-7b-chat-hf"):
        self.text = text
        self.model = model

        try:
            # Initialize the tokenizer and model pipeline
            self.tokenizer = AutoTokenizer.from_pretrained(model)
            self.pipeline = pipeline(
                "text-generation",
                model=self.model,
                tokenizer=self.tokenizer,
                torch_dtype=torch.float16,  # Using FP16 for better performance
                device=0,  # Use the first GPU (can adjust as needed)
                trust_remote_code=True,
                max_length=512,  # Limiting max_length for faster generation
                do_sample=True,
                top_k=10,
                num_return_sequences=1,
                eos_token_id=self.tokenizer.eos_token_id
            )

            # Create the PromptTemplate and LLMChain
            self.template = """
                              You are a helpful summarization assistant.

                              Write a clear and concise **paragraph summary** of the following text, delimited by triple backticks.
                              The summary should capture the **main ideas** and **important details** without exceeding **300 words**.

                              ```{text}```

                              PARAGRAPH SUMMARY (max 300 words):
                          """
            prompt = PromptTemplate(template=self.template, input_variables=["text"])
            llm = HuggingFacePipeline(pipeline=self.pipeline)
            self.llm_chain = LLMChain(prompt=prompt, llm=llm)
        except Exception as e:
            raise RuntimeError(f"Failed to initialize model or tokenizer: {e}")

    def summarize_text(self) -> str:
        """
        Generates the summary of the text.
        """
        try:
            # Use LLMChain to generate the summary
            summary = self.llm_chain.run({"text": self.text})
            return summary
        except Exception as e:
            raise RuntimeError(f"Failed to summarize text: {e}")


In [None]:
text_to_summarize = """
                    Every system is built from a domain-specific language designed by the programmers to
                    describe that system. Functions are the verbs of that language, and classes are the nouns.
                    This is not some throwback to the hideous old notion that the nouns and verbs in a require
                    ments document are the first guess of the classes and functions of a system. Rather, this is
                    a much older truth. The art of programming is, and has always been, the art of language
                    design.
                    Master programmers think of systems as stories to be told rather than programs to
                    be written. They use the facilities of their chosen programming language to construct a
                    much richer and more expressive language that can be used to tell that story. Part of that
                    domain-specific language is the hierarchy of functions that describe all the actions that
                    take place within that system. In an artful act of recursion those actions are written to
                    use the very domain-specific language they define to tell their own small part of the
                    story.
                    This chapter has been about the mechanics of writing functions well. If you follow
                    the rules herein, your functions will be short, well named, and nicely organized.
                    """
summarizer = Summarizer(text_to_summarize)
summary = summarizer.summarize_text()
print(summary)


config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=self.pipeline)
  self.llm_chain = LLMChain(prompt=prompt, llm=llm)
  summary = self.llm_chain.run({"text": self.text})
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
