#video to audio

In [None]:
from moviepy.editor import VideoFileClip
import io
import tempfile

def extract_audio_as_bytes(video_path):
    """Extracts audio from a video file and returns it as bytes (MP3 format).

    Args:
        video_path: Path to the input video file.

    Returns:
        BytesIO object containing MP3 audio data.
    """
    try:
        video_clip = VideoFileClip(video_path)
        audio_clip = video_clip.audio

        # Create a temporary file to store the MP3
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=True) as tmpfile:
            audio_clip.write_audiofile(tmpfile.name, codec="libmp3lame")
            tmpfile.seek(0)
            audio_data = tmpfile.read()

        # Cleanup
        video_clip.close()
        audio_clip.close()

        return io.BytesIO(audio_data)
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Example usage
audio_bytes_io = extract_audio_as_bytes("sample.mp4")
if audio_bytes_io:
    print("Audio extracted to memory (bytes).")


# audio to text

In [None]:
import speech_recognition as sr

def audio_to_text(audio_path, language="en-US"):
    """Converts speech in an audio file to text.

    Args:
        audio_path (str): Path to the audio file (WAV, FLAC, AIFF, etc. recommended).
        language (str): Language code (default is English - 'en-US').

    Returns:
        str: Transcribed text or an error message.
    """
    recognizer = sr.Recognizer()

    try:
        with sr.AudioFile(audio_path) as source:
            audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data, language=language)
            return text
    except sr.UnknownValueError:
        return "Could not understand audio."
    except sr.RequestError as e:
        return f"Could not request results; {e}"
    except Exception as e:
        return f"An error occurred: {e}"


In [3]:
!pip install moviepy langgraph transformers torch


Collecting langgraph
  Downloading langgraph-0.4.1-py3-none-any.whl.metadata (7.9 kB)
Collecting langgraph-checkpoint<3.0.0,>=2.0.10 (from langgraph)
  Downloading langgraph_checkpoint-2.0.25-py3-none-any.whl.metadata (4.6 kB)
Collecting langgraph-prebuilt>=0.1.8 (from langgraph)
  Downloading langgraph_prebuilt-0.1.8-py3-none-any.whl.metadata (5.0 kB)
Collecting langgraph-sdk>=0.1.42 (from langgraph)
  Downloading langgraph_sdk-0.1.66-py3-none-any.whl.metadata (1.8 kB)
Collecting xxhash<4.0.0,>=3.5.0 (from langgraph)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  

In [1]:
!git lfs install

Git LFS initialized.


In [2]:
!git clone https://huggingface.co/microsoft/Phi-3.5-mini-instruct

Cloning into 'Phi-3.5-mini-instruct'...
remote: Enumerating objects: 52, done.[K
remote: Counting objects: 100% (48/48), done.[K
remote: Compressing objects: 100% (48/48), done.[K
remote: Total 52 (delta 18), reused 0 (delta 0), pack-reused 4 (from 1)[K
Unpacking objects: 100% (52/52), 525.56 KiB | 1.69 MiB/s, done.
Filtering content: 100% (3/3), 3.11 GiB | 16.78 MiB/s, done.
Encountered 1 file(s) that may not have been copied correctly on Windows:
	model-00001-of-00002.safetensors

See: `git lfs help smudge` for more details.


In [4]:
!pip install -U openai-whisper


Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/800.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m40.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m56.3 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: openai-whisper
  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone
  Created wh

In [5]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [13]:
import os
import wave
import json
import tempfile

from moviepy.editor import VideoFileClip
from langgraph.graph import StateGraph, END
from langchain_core.runnables import RunnableLambda
from transformers import pipeline
from pydub import AudioSegment
from typing import TypedDict, Optional

import whisper  # ✅ Whisper added

# === Step 1: Extract Audio and Convert to Mono 16-bit ===
def extract_audio(state):
    video_path = state["video_path"]
    try:
        clip = VideoFileClip(video_path)
        audio = clip.audio
        temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
        audio.write_audiofile(temp_file.name)
        clip.close()
        audio.close()

        # Convert to mono 16-bit WAV
        audio_segment = AudioSegment.from_file(temp_file.name)
        audio_segment = audio_segment.set_channels(1).set_sample_width(2)
        audio_segment.export(temp_file.name, format="wav")

        return {"audio_path": temp_file.name}
    except Exception as e:
        return {"error": f"Audio extraction failed: {e}"}

# === Step 2: Transcribe with Whisper ===
def transcribe_audio_whisper(state):
    if "error" in state:
        return {"error": state["error"]}

    audio_path = state["audio_path"]
    try:
        model = whisper.load_model("base")
        result = model.transcribe(audio_path)
        return {"text": result["text"]}
    except Exception as e:
        return {"error": f"Whisper transcription failed: {e}"}

# === Step 3: Summarize Transcript ===
def summarize_transcript(state):
    if "error" in state:
        return {"error": "No transcription available for summarization."}

    text = state.get("text", "")
    if not text.strip():
        return {"error": "Empty transcript."}

    prompt = f"Summarize the following transcript for a blog post:\n\n{text}\n\nSummary:"
    generator = pipeline("text-generation", model="/content/Phi-3.5-mini-instruct", device_map="auto", torch_dtype="auto")
    result = generator(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.9)[0]["generated_text"]
    return {"summary": result}

# === Step 4: Generate Blog Post ===
def generate_blog(state):
    if "error" in state:
        return {"error": "No summary available for blog generation."}

    summary = state.get("summary", "")
    if not summary.strip():
        return {"error": "Empty summary."}

    prompt = f"""You are a blog writer. Use the following summary to write a detailed blog post.

Summary:
{summary}

The blog post should include:
- A compelling title
- An introduction
- Two or more sections discussing the main points
- A conclusion

Start writing the blog below:
"""

    generator = pipeline("text-generation", model="/content/Phi-3.5-mini-instruct", device_map="auto", torch_dtype="auto")
    result = generator(prompt, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.95, top_k=50, num_return_sequences=1)[0]["generated_text"]

    # Post-processing: start from first "# Title"
    start = result.find("# Title")
    blog = result[start:] if start != -1 else result

    return {"blog_post": blog}

# === State Schema ===
class BlogState(TypedDict, total=False):
    video_path: str
    audio_path: str
    text: str
    summary: str
    blog_post: str
    error: Optional[str]

# === LangGraph Pipeline ===
graph = StateGraph(BlogState)
graph.add_node("extract_audio", RunnableLambda(extract_audio))
graph.add_node("transcribe", RunnableLambda(transcribe_audio_whisper))
graph.add_node("summarize", RunnableLambda(summarize_transcript))
graph.add_node("generate_blog", RunnableLambda(generate_blog))

graph.set_entry_point("extract_audio")
graph.add_edge("extract_audio", "transcribe")
graph.add_edge("transcribe", "summarize")
graph.add_edge("summarize", "generate_blog")
graph.add_edge("generate_blog", END)

compiled_graph = graph.compile()

# === Run Pipeline ===
if __name__ == "__main__":
    video_file = "/content/What is LangChain_.mp4"
    output = compiled_graph.invoke({"video_path": video_file})

    # Extract and display content after "Title:"
    blog_output = output.get("blog_post", output.get("error", "No output."))

    if "Title:" in blog_output:
        content_start = blog_output.find("Title:") + len("Title:")
        display_content = blog_output[content_start:].lstrip()
    else:
        display_content = blog_output

    print("\n=== FINAL BLOG POST (AFTER TITLE) ===\n")
    print(display_content)

    with open("blog_post.md", "w") as f:
        f.write(display_content)


MoviePy - Writing audio in /tmp/tmpvvyt7aer.wav




MoviePy - Done.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0



=== FINAL BLOG POST (AFTER TITLE) ===

Unleashing the Power of LLMs: Discovering Langchain's Role in App Development

Introduction

In the rapidly evolving landscape of technology, large language models (LLMs) have emerged as game-changers, offering unprecedented capabilities in natural language processing (NLP). However, harnessing the full potential of LLMs for business applications has always presented a challenge to developers. Enter Langchain, an open-source orchestration framework that is revolutionizing the way we integrate and leverage LLMs in our applications.

What is Langchain?

Langchain, launched by Harrison Chase in October 2022, has swiftly become the fastest-growing open-source project on GitHub by June 2023. This versatile framework provides a standardized interface for nearly any LLM, streamlining the process of building and integrating applications with data sources and workflows.

Langchain Components

At the core of Langchain's functionality are several key compon

In [14]:
import os
import wave
import json
import tempfile
from typing import TypedDict, Optional

from moviepy.editor import VideoFileClip
from transformers import pipeline
from pydub import AudioSegment
import whisper  # ✅ Whisper added

from langchain_core.runnables import RunnableLambda, RunnableSequence

# === Step 1: Extract Audio and Convert to Mono 16-bit ===
def extract_audio(state):
    video_path = state["video_path"]
    try:
        clip = VideoFileClip(video_path)
        audio = clip.audio
        temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
        audio.write_audiofile(temp_file.name)
        clip.close()
        audio.close()

        # Convert to mono 16-bit WAV
        audio_segment = AudioSegment.from_file(temp_file.name)
        audio_segment = audio_segment.set_channels(1).set_sample_width(2)
        audio_segment.export(temp_file.name, format="wav")

        return {**state, "audio_path": temp_file.name}
    except Exception as e:
        return {"error": f"Audio extraction failed: {e}"}

# === Step 2: Transcribe with Whisper ===
def transcribe_audio_whisper(state):
    if "error" in state:
        return state

    audio_path = state["audio_path"]
    try:
        model = whisper.load_model("base")
        result = model.transcribe(audio_path)
        return {**state, "text": result["text"]}
    except Exception as e:
        return {"error": f"Whisper transcription failed: {e}"}

# === Step 3: Summarize Transcript ===
def summarize_transcript(state):
    if "error" in state:
        return state

    text = state.get("text", "")
    if not text.strip():
        return {"error": "Empty transcript."}

    prompt = f"Summarize the following transcript for a blog post:\n\n{text}\n\nSummary:"
    generator = pipeline("text-generation", model="/content/Phi-3.5-mini-instruct", device_map="auto", torch_dtype="auto")
    result = generator(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.9)[0]["generated_text"]
    return {**state, "summary": result}

# === Step 4: Generate Blog Post ===
def generate_blog(state):
    if "error" in state:
        return state

    summary = state.get("summary", "")
    if not summary.strip():
        return {"error": "Empty summary."}

    prompt = f"""You are a blog writer. Use the following summary to write a detailed blog post.

Summary:
{summary}

The blog post should include:
- A compelling title
- An introduction
- Two or more sections discussing the main points
- A conclusion

Start writing the blog below:
"""

    generator = pipeline("text-generation", model="/content/Phi-3.5-mini-instruct", device_map="auto", torch_dtype="auto")
    result = generator(prompt, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.95, top_k=50, num_return_sequences=1)[0]["generated_text"]

    # Post-processing: start from first "# Title"
    start = result.find("# Title")
    blog = result[start:] if start != -1 else result

    return {**state, "blog_post": blog}

# === State Schema ===
class BlogState(TypedDict, total=False):
    video_path: str
    audio_path: str
    text: str
    summary: str
    blog_post: str
    error: Optional[str]

# === LangChain Runnable Pipeline ===
pipeline_sequence = RunnableSequence(
    RunnableLambda(extract_audio)
    | RunnableLambda(transcribe_audio_whisper)
    | RunnableLambda(summarize_transcript)
    | RunnableLambda(generate_blog)
)

# === Run Pipeline ===
if __name__ == "__main__":
    video_file = "/content/What is LangChain_.mp4"
    output = pipeline_sequence.invoke({"video_path": video_file})

    # Extract and display content after "Title:"
    blog_output = output.get("blog_post", output.get("error", "No output."))

    if "Title:" in blog_output:
        content_start = blog_output.find("Title:") + len("Title:")
        display_content = blog_output[content_start:].lstrip()
    else:
        display_content = blog_output

    print("\n=== FINAL BLOG POST (AFTER TITLE) ===\n")
    print(display_content)

    with open("blog_post.md", "w") as f:
        f.write(display_content)


MoviePy - Writing audio in /tmp/tmpyjjy21ov.wav




MoviePy - Done.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0



=== FINAL BLOG POST (AFTER TITLE) ===

"Unlocking the Power of LLMs: Exploring Langchain's Revolutionary Framework"

Introduction:
In the rapidly evolving landscape of artificial intelligence, large language models (LLMs) have emerged as powerful tools, capable of understanding and generating human-like text. However, harnessing the full potential of these models can be a daunting task for developers. Enter Langchain, an open-source orchestration framework that simplifies the process of building applications using LLMs, making it accessible to a broader range of developers. In this blog post, we'll delve into the components of Langchain, its use cases, and how it's revolutionizing the way we interact with language models.

Section 1: Understanding Langchain and Its Components
Langchain, launched in October 2022 by Harrison Chase, has quickly become a game-changer in the world of LLM development. It offers a centralized development environment that enables developers to integrate LLMs 