In [1]:
import os
import io
import getpass
from dotenv import load_dotenv 
from langchain_community.document_loaders import YoutubeLoader
from langchain_community.llms import HuggingFaceHub
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from IPython.display import Markdown

In [2]:
load_dotenv()

True

In [3]:
def llm_chain():
    system_prompt = "Você é um assistente virtual prestativo e deve responder a uma consulta com base na transcrição de um vídeo, que será fornecida abaixo."

    inputs = "Consulta: {consulta} \n Transcrição: {transcricao}"

    user_prompt = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>".format(
        inputs
    )

    prompt_template = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("user", user_prompt),
        ]
    )

    llm = HuggingFaceHub(
        repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
        model_kwargs={
            "temperature": 0.1,
            "return_full_text": False,
            "max_new_tokens": 1024,
        },
    )

    chain = prompt_template | llm | StrOutputParser()

    return chain

In [4]:
def get_video_info(url_video, language="pt", translation=None):
    video_loader = YoutubeLoader.from_youtube_url(
        url_video,
        language=language,
        translation=translation,
    )

    infos = video_loader.load()[0]
    transcript = infos.page_content
    metadata = infos.metadata

    return transcript, metadata

In [5]:
def interpret_video(url, query="resuma", language="pt", translation=None):
    try:
        transcript, metadata = get_video_info(url, language, translation)

        chain = llm_chain()

        retorno = ""

        t = "\n## Sobre o que fala o vídeo \n"
        res = chain.invoke({"transcricao": transcript, "consulta": "explique em 1 frase sobre o que fala esse vídeo. responda direto com a frase"})
        retorno += t + res

        t = "\n## Temas \n"
        res = chain.invoke({"transcricao": transcript, "consulta": "lista os principais temas desse vídeo"})
        retorno += t + res

        t = "\n## Resposta para a consulta \n"
        res = chain.invoke({"transcricao": transcript, "consulta": query})
        retorno += t + res

        return retorno
    except Exception as e:
        print("Erro ao carregar transcrição")
        print(e)

In [6]:
url_video = "https://www.youtube.com/watch?v=OLglLItPzbs"
query_user = "resuma"
language = ["pt", "pt-BR", "en"]

In [7]:
video_infos = interpret_video(url_video, query_user, language)

  llm = HuggingFaceHub(
  from .autonotebook import tqdm as notebook_tqdm


In [8]:
with open("resumo.md", "w", encoding="utf-8") as f:
    f.write(str(video_infos))