In [1]:
from crewai import Agent , Task , Crew , Process , LLM
from crewai.tools import tool
from pydantic import BaseModel , Field
from typing import List
import os 
from dotenv import load_dotenv
import json
from youtube_transcript_api import YouTubeTranscriptApi


In [2]:
class TranscriptElement(BaseModel):
    text: str = Field(..., description="The text of the transcript")
    start: float = Field(..., description="The start time of the transcript")
    duration: float = Field(..., description="The duration of the transcript")


class YoutubeVideoTranscript(BaseModel):
    transcriptions : list[TranscriptElement] = Field(..., description="The result of the transcript")

class YoutubeVideoSummarization(BaseModel):
    summurazation : str = Field(..., description="Summurazation of the video transcription")


class Resource(BaseModel):
    title : str = Field(..., description="Title  of the resource")
    link : str = Field(..., description="url  of the resource")
    type_r: str = Field(..., description="type  of  the  resource (article , video , etc)")
    more_details : str =  Field(..., description="any important detail about the resource that can help")

class YoutubeSammuryAndResources(BaseModel):
        summurazation : str = Field(..., description="Summurazation of the video transcription")
        resources : list[Resource]




In [3]:
load_dotenv(override=True)
os.environ["GEMINI_API_KEY"] = os.getenv("GOOGLE_API_KEY")
llm=LLM(
        model="gemini/gemini-1.5-pro",
        temperature=0,
        # verbose=True,    
        )


In [5]:
@tool
def get_transcript(yt_id: str) -> YoutubeVideoTranscript:
    """this tool is use to get the transcript of a given youtube video ID , the  ID is  in the  link  of the video  itself 
    the output of 
    get_transcript("yt_id") -> [{'text': 'multi-agent system where multiple AI', 'start': 45.199, 'duration': 5.84},{},...]
    """
    transcript = YouTubeTranscriptApi().transcript(url)
    return transcript

from crewai_tools import SerperDevTool
Web_search  = SerperDevTool()

In [7]:
researcher_agent = Agent(
    role="Researcher Agent",
    goal="the  goal is it  get the transcription of  a given url : {url}",
    llm=llm,
    backstory="The agent is  designed to  extract the  transcription of youtube video in a good way  ",
    tools=[get_transcript],
    verbose=True,

)

resarsher_task = Task(
    description="""the  task is get the  transcription of  a video and return it in a good way ,
    """,
    expected_output="a JSON object containing all starts of a transcription and there duration and  there transcription ",
    output_json=YoutubeVideoTranscript,
    output_file = "./video_transcription.json",
    agent=researcher_agent,
  
)

In [8]:
sammurizer_agent = Agent(
    role="Summarizer Agent",
    goal="the  goal is to  summarize the transcription in a simple way",
    llm=llm,
    backstory="The agent is  designed to  make a good summarization  of  a given video transcription",
    verbose=True,

)

summurizer_task = Task(
    description="the  goeal is to  generate a good sammurry from a video transcription ",
    expected_output="a JSON object containing the transcription and  a summarization ",
    output_json=YoutubeVideoSummarization,
    output_file = "./video_summary.json",
    agent=sammurizer_agent,
                
)


In [17]:
quey_websearch_agent = Agent(
    role="Web Searcher Agent",
    goal="the  goal add some usefull information about the video",
    llm=llm,
    backstory="the  agent  is designed to add some resources like  video links or articles etc , based on the main subject of the  video  ",
    tools=[Web_search],
    verbose=True,
)

query_websearch_task = Task(
    description="""
    the  goal is to  get  some links that can help to  have more resources od  knowledge .
    the  given sammury you should keep it  to  return it at the  end also , remember this ! , 
    """,
    output_json=YoutubeSammuryAndResources,
    output_file = "./summary_and_resourses.json",
    agent=quey_websearch_agent,
                
)


ValidationError: 1 validation error for Task
expected_output
  Field required [type=missing, input_value={'description': '\n    th...ubject of the  video  )}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.10/v/missing

In [None]:
thinker_agent = Agent(
    role="thinker Agent",
    goal="the  goal is to  make  the  final result more structure in a mardown format",
    llm=llm,
    backstory="the  agent  is designed to make the  makdown file  more accurate  and readble in a better way  ",
    verbose=True,
)

thinker_task = Task(
    description="""
    the  goal is provide a markdown file that can be used to get an overview of  the  given subject 
    """,
    output_file = "./result.md",
    agent=thinker_agent,
                
)

In [None]:
crew = Crew(
    agents=[researcher_agent, sammurizer_agent, quey_websearch_agent, thinker_agent],
    tasks=[resarsher_task, summurizer_task, query_websearch_task, thinker_task],
    process=Process.sequential,
    verbose=True,
)

In [None]:
crew.kickoff(input={"url"="https://youtu.be/coVbtFlT9Qc?si=YsWVGCrsjp9rfLCz"})