# INTRODUCTION

This application is based on the Ollama service.

Just follow the download guide on https://ollama.com/ and then pull the model you need using the shell command "ollama pull <MODEL NAME>"

This application uses the model llama3.2:3b by default, it needs 4gb of GPU.

# IMPORTS

In [9]:
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_text_splitters import TokenTextSplitter
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_ollama import OllamaLLM, OllamaEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain.chains.base import Chain

# FUNCTIONS TO GET THE SPOKEN CONTENT OF THE VIDEO AND SUMMARIZE IT

In [10]:
def get_video_lang_code(lang_codes: list[str], preferences: list[str] = ["en"]) -> str:
    if lang_codes:
        for pref in preferences:
            if pref in lang_codes:
                return pref

        return lang_codes[0]
    else:
        lang_code = ""

    return lang_code

In [11]:
def get_split_texts(video_id: str, languages: list[str]) -> list[str]:
    transcript = YouTubeTranscriptApi.get_transcript(video_id, languages = languages)
    full_text = '.\n'.join([sd.get("text","") for sd in transcript])

    text_splitter = TokenTextSplitter(chunk_size=2000, chunk_overlap=100)
    texts = text_splitter.split_text(full_text)
    return texts

In [12]:
def get_texts_from_video(url: str) -> list[str]:
    video_id = url.split("?v=")[-1]
    lang_codes = [t.language_code for t in YouTubeTranscriptApi.list_transcripts(video_id)]
    lang = get_video_lang_code(lang_codes)
    texts = get_split_texts(video_id, [lang])
    return texts

In [13]:
def get_summarization(url: str, texts: list[str], summarization_chain: Chain) -> str:
    summarizations = []
    for text in texts:
        response = summarization_chain.invoke(
            {
                "input_text":text,
                "n_tokens": 1000
            }
        )
        summarizations.append(response)

    return "\n\n".join(summarizations)

# INFERENCE VARIABLES

In [14]:
summarization_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are an experienced assistant in the synthesis of long texts. Return your answer without adding other informations or reasoning"),
        ("user", "Summarize briefly this text: {input_text} in {n_tokens} words")
    ]
)
model_name = "llama3.2"
llm = OllamaLLM(model=model_name, temperature = 0.2)
embeddings = OllamaEmbeddings(model=model_name)
summarization_chain = summarization_prompt | llm

# GET SUMMARIZATION OF YOUTUBE VIDEO SPEECH

## Insert YouTube URL

In [15]:
url = "https://www.youtube.com/watch?v=i_LwzRVP7bg"

# Call inference

In [16]:
texts = get_texts_from_video(url)
summary = get_summarization(url, texts, summarization_chain)

## Print result

In [None]:
print(summary)

# CHAT ON VIDEO SPEECH CONTENT

## RAG functions and variables

In [18]:
vector_store = InMemoryVectorStore(embeddings)
_ = vector_store.add_texts(texts)

In [19]:
rag_prompt = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question.
Return your answer without adding other informations or reasoning.

Question: {question} 

Context: {context} 

Answer:"""

rag_prompt = PromptTemplate.from_template(rag_prompt)

rag_chain = rag_prompt | llm

In [20]:
def chat_on_rag(question: str, rag_chain: Chain = rag_chain, k: int = 7) -> str:
    context = vector_store.similarity_search(question, k=k)
    docs_content = "\n\n".join(doc.page_content for doc in context)
    response = rag_chain.invoke(
        {
            "question": question,
            "context": docs_content
        }
    )

    return response

## Insert question

In [21]:
question = "What techniques of machine learning are explained?"

## Call inference

In [None]:
answer = chat_on_rag(question)
print(answer)