**Video Content Summarize Agent using CrewAI**

In [None]:
!pip install langchain
!pip install -U langsmith
!pip install crewai
!pip install crewai_tools
!pip install langchain_google_genai
!pip install youtube-transcript-api

from langchain_google_genai import ChatGoogleGenerativeAI
from youtube_transcript_api import YouTubeTranscriptApi

In [2]:
from langchain.chains.summarize import load_summarize_chain
from langchain.llms import OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain.tools import Tool

In [10]:
from langchain.schema import Document

In [3]:
from crewai import Agent, Task, Crew
from crewai_tools import tool
from crewai_tools import YoutubeVideoSearchTool
#from crewai_tools import SerperDevTool

In [4]:
#setting up Langsmith observatory log tracing
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "VideoSummarize"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.langchain.plus"
os.environ["LANGCHAIN_API_KEY"] = "your api key"

In [5]:
#search_tool = SerperDevTool()
#youtube_search_tool = YoutubeVideoSearchTool(youtube_video_url=youtube_video_url)
#GOOGLE_API_KEY = "your api key"
#llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest",google_api_key=GOOGLE_API_KEY )


In [None]:
llm = OpenAI(api_key="your api key")

# Create a customized CrewAI tool for getting the transcript of a YouTube video
@tool("Retrieve the transcript")
def get_youtube_transcript(url: str) -> str:
    """This tool will take the Video URL input and retrieve the transcript of the Video."""
    try:
        video_id = url.split("v=")[1]
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
        transcript_text = ""
        for segment in transcript_list:  # output the transcript as Plain text
            transcript_text += segment['text'] + " "
        return transcript_text.strip()

    except Exception as e:
        print(f"Error fetching transcript: {e}")
        return None

# Create a customized CrewAI tool to summarize the transcript
@tool("Summarize the transcript")
def summarize_transcript(transcript: str) -> str:
    """This tool will summarize the contents from the transcript texts."""
    if not transcript:
        print("No transcript to summarize.")
        return None

    # Split the transcript into smaller chunks for processing
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
    texts = text_splitter.split_text(transcript)

    # Create Langchain Documents from the chunks
    docs = [Document(page_content=t) for t in texts]

    # Initialize the LLM
    llm_instance = llm

    # Load the summarization chain using StuffDocumentsChain
    chain = load_summarize_chain(llm_instance, chain_type="stuff", verbose=False)

    # Run the summarization and get the result
    summary = chain.run(docs)
    return summary

# Define Agents and Tasks

# Transcript retrieval agent
researcher = Agent(
    llm=llm,
    role="Video Transcript Researcher",
    goal="Find the transcript of a given Video URL.",
    backstory="You are a video content researcher tasked with retrieving video transcripts.",
    allow_delegation=False,
    tools=[get_youtube_transcript],
    verbose=True,
)

task1 = Task(
    description="Retrieve the Transcript of the given Video URL",
    expected_output="""(Music intro)
    Hey everyone, welcome back to my channel! Today we're talking about the five best tips for improving your productivity.  First up is getting enough sleep...(Explanation of sleep importance)
    Tip number two is all about managing distractions.  Turn off your phone, close unnecessary tabs... (Explanation of managing distractions)
    Moving on to tip three... (Explanation of remaining tips) ...(Outro music and call to action)
    """,
    agent=researcher,
)

# Content summarization agent
summarizer = Agent(
    llm=llm,
    role="Content Summarizer",
    goal="Summarize the key takeaways from the video transcript.",
    backstory="You are a knowledgeable writer in the AI and LLM domain. In this case, you're generating a high-quality summary for a tech-savvy audience from the video transcript.",
    allow_delegation=False,
    tools=[summarize_transcript],
    verbose=True,
)

task2 = Task(
    description="Summarize Transcript Contents",
    expected_output="""A summary report of the transcripts. The result should be formatted as shown below:

    Key takeaways: This video is about 3 intuitions of how LLM works so well.
    Main arguments: The first intuition is that next word prediction is massive multi-task learning. The second intuition is that scaling compute (data * size of LM) reliably improves loss. The third intuition is that overall loss improves smoothly, individual loss can improve suddenly.
    Background Information: The summary needs to be simple and use clear language. Focus on the main points and avoid unnecessary details.""",
    agent=summarizer,
)

# Create Crew and execute tasks
crew = Crew(agents=[researcher, summarizer], tasks=[task1, task2], verbose=1)
report_output = crew.kickoff()
print(report_output)
