# Installing Required Packages

In [1]:
!pip install --upgrade youtube_search youtube-transcript-api pytube --quiet

In [2]:
!pip install langchain langchain_community langgraph tiktoken --quiet

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 23.8.0 requires cubinlinker, which is not installed.
cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 23.8.0 requires ptxcompiler, which is not installed.
cuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
keras-cv 0.8.2 requires keras-core, which is not installed.
keras-nlp 0.8.1 requires keras-core, which is not installed.
tensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.
apache-beam 2.46.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.8 which is incompatible.
apache-beam 2.46.0 requires numpy<1.25.0,>=1.14.3, but you have numpy 1.26.4 which is incompatible.
apache-beam 2.46.0 requires pyarrow<10.0.0,>=3.0.0, but you have pyarrow 11.0.0 which is incompatible.

In [3]:
!pip install --upgrade huggingface_hub --quiet

# LLM Inference using HuggingFaceEndpoint

In [11]:
from langchain_community.llms import HuggingFaceEndpoint
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
HUGGINGFACEHUB_API_TOKEN = user_secrets.get_secret("hf_model")

repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"

llm = HuggingFaceEndpoint(
    repo_id=repo_id, max_length=1024, temperature=0.1, huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN
)

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


# Creating YouTube Script Creation Agent using LangGraph

In [13]:
from typing import Dict, TypedDict

from langchain_core.messages import BaseMessage


class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        keys: A dictionary where each key is a string.
    """

    keys: Dict[str, any]

In [14]:
from langchain.tools import YouTubeSearchTool
from langchain_community.document_loaders import YoutubeLoader

from langchain.chains.summarize import load_summarize_chain
from langchain.prompts import PromptTemplate

### Nodes ###
def get_youtube_video_urls(state):
    print("---Fetching URls---")
    state_dict = state["keys"]
    question = state_dict["question"]
    top_k = state_dict["top_k"]
    
    tool = YouTubeSearchTool()
    if "docs" in state_dict:
        print("---GETTING NEW VIDEO URLS---", top_k+top_k)
        url_list = tool.run(question+","+str(top_k+top_k))
    else:
        url_list = tool.run(question+","+str(top_k))
    
    final_urls = []
    for u in url_list.split("'"):
        if "http" in u:
            final_urls.append(u)
    return {"keys": {"url_list":final_urls, "question":question, "top_k":top_k}}


def get_video_text_from_urls(state):
    print("---Fetching Text from Video---")
    state_dict = state["keys"]
    question = state_dict["question"]
    top_k = state_dict["top_k"]
    final_urls = state_dict["url_list"]
    docs = []
    for url in final_urls:
        loader = YoutubeLoader.from_youtube_url(
                        url,
                        add_video_info=True,
                        language=["en","hi"],
                        translation="en",
                        )
        text = loader.load()
        if len(text)>0:
            docs.append(text[0])
    
    return {"keys": {"docs":docs, "url_list":final_urls, "question":question, "top_k":top_k}}


def get_text_summary(state):
    print("---Creating Video Text Summary---")
    state_dict = state["keys"]
    question = state_dict["question"]
    final_urls = state_dict["url_list"]
    top_k = state_dict["top_k"]
    docs = state_dict["docs"]
    
    prompt_template = """Write a detailed summary highlighting key points to engage users of the following text:
    {text}
    Detailed Summary:"""
    prompt = PromptTemplate.from_template(prompt_template)

    refine_template = (
        "Your job is to produce a final detailed summary which should include the existing summary and only include "
        "relevant new key points which can improve the summary from the 'new context' provided. \n"
        "We have provided an 'existing summary' up to a certain point: Existing summary - \n {existing_answer}\n"
        "You have the opportunity to refine the existing summary, make sure to add only new points "
        "(only if needed) with some more context below. Don't remove key points from existing summary \n"
        "New Context - \n"
        "------------\n"
        "{text}\n"
        "------------\n"
        "Given the new context, refine the existing summary in English detailing all key points"
        "If the context isn't useful, return the Existing summary."
    )
    refine_prompt = PromptTemplate.from_template(refine_template)
    chain = load_summarize_chain(
        llm=llm,
        chain_type="refine",
        question_prompt=prompt,
        refine_prompt=refine_prompt,
        return_intermediate_steps=True,
        input_key="input_documents",
        output_key="output_text",
    )
    result = chain({"input_documents": docs}, return_only_outputs=True)
    
    return {"keys": {"generated_summary":result["output_text"], "generated_intermediate_summary":result["intermediate_steps"], "docs":docs, "url_list":final_urls, "question":question, "top_k":top_k}}


### Edges ###
def decide_to_get_summary(state):
    """
    Determines whether to generate the summary, or get new video urls.

    Args:
        state (dict): The current state of the agent, including all keys.

    Returns:
        str: Next node to call
    """
    print("---DECIDE TO GET SUMMARY---")
    
    state_dict = state["keys"]
    docs = state_dict["docs"]

    if len(docs)>=2:
        # We have relevant documents, so generate summary
        print("---DECISION: GET TEXT SUMMARY---")
        return "get_text_summary"
    else:
        # We have no relevant documents, so get new video urls
        print("---DECISION: GET NEW VIDEO URLS---")
        return "get_youtube_video_urls"

# Build Graph

This just follows the flow we outlined in the figure above.

In [15]:
import pprint

from langgraph.graph import END, StateGraph

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("get_youtube_video_urls", get_youtube_video_urls)
workflow.add_node("get_video_text_from_urls", get_video_text_from_urls)
workflow.add_node("get_text_summary", get_text_summary)

# Build graph
workflow.set_entry_point("get_youtube_video_urls")
workflow.add_edge("get_youtube_video_urls", "get_video_text_from_urls")
workflow.add_conditional_edges(
    "get_video_text_from_urls",
    decide_to_get_summary,
    {
        "get_text_summary": "get_text_summary",
        "get_youtube_video_urls": "get_youtube_video_urls",
    },
)
workflow.add_edge("get_text_summary", END)

# Compile
app = workflow.compile()

# RUN

In [16]:
# Run
inputs = {"keys": {"question": "What is retrieval augmented generation","top_k":3}}
for output in app.stream(inputs):
    for key, value in output.items():
        # Node
        pprint.pprint(f"Node '{key}':")
    pprint.pprint("\n---\n")

# Final generation
pprint.pprint(value['keys']['generated_summary'])

---Fetching URls---
"Node 'get_youtube_video_urls':"
'\n---\n'
---Fetching Text from Video---
"Node 'get_video_text_from_urls':"
'\n---\n'
---DECIDE TO GET SUMMARY---
---DECISION: GET TEXT SUMMARY---
---Creating Video Text Summary---


  warn_deprecated(


"Node 'get_text_summary':"
'\n---\n'
"Node '__end__':"
'\n---\n'
('\n'
 '\n'
 'Retrieval-Augmented Generation (RAG) is a crucial framework for enhancing '
 'the performance of Large Language Models (LLMs) by incorporating a content '
 'store for retrieving relevant information before generating a response. The '
 'new context emphasizes the use of RAG in creating a better user experience '
 "by leveraging LLMs on a user's own content, rather than just listing "
 'relevant content like a search engine. RAG is used to create a chatbot or a '
 "system that can answer questions based on a user's specific content source, "
 'such as a website or a library of PDF documents.\n'
 '\n'
 'The RAG architecture consists of three main components: the user, the '
 "content source, and the large language model (LLM). The user's question is "
 'bundled into a prompt and sent to the LLM, which generates a response. To '
 'enhance the user experience, RAG uses prompt engineering, where instructions '
 '

In [19]:
value['keys']

{'generated_summary': "\n\nRetrieval-Augmented Generation (RAG) is a crucial framework for enhancing the performance of Large Language Models (LLMs) by incorporating a content store for retrieving relevant information before generating a response. The new context emphasizes the use of RAG in creating a better user experience by leveraging LLMs on a user's own content, rather than just listing relevant content like a search engine. RAG is used to create a chatbot or a system that can answer questions based on a user's specific content source, such as a website or a library of PDF documents.\n\nThe RAG architecture consists of three main components: the user, the content source, and the large language model (LLM). The user's question is bundled into a prompt and sent to the LLM, which generates a response. To enhance the user experience, RAG uses prompt engineering, where instructions and information from the content source are added to the prompt before it is sent to the LLM. This allow