# LangChain with Youtube Videos

## Loading LLM

In [1]:
import os
import sys

from dotenv import load_dotenv

sys.path.append(os.path.abspath(os.pardir))
load_dotenv('../.env')

True

In [2]:
from langchain_google_vertexai import VertexAI

llm = VertexAI(model_name="gemini-pro")

## Loading Transcript with YoutubeLoader

In [3]:
from langchain.document_loaders.youtube import YoutubeLoader
from langchain.chains.summarize import load_summarize_chain

loader = YoutubeLoader.from_youtube_url('https://youtu.be/pNcQ5XXMgH4', add_video_info=True)
result = loader.load()

In [4]:
result

[Document(page_content="what is going on good people again right now we have a super exciting tutorial because we are going to take YouTube transcripts and we're going to pass them to open Ai and the way that we're going to do that is via a library called Lang chain which is what this entire series is about now before we jumped into it I wanted to show a diagram again I think these diagrams are helpful but you have to let me know so just let me know in the comments here so I wanted to do an overview about what we're actually going to be writing out in code because I think it's a little easier to see in pictures first so the way this is going to work is we're going to have a video a YouTube video we're going to pass it we're going to pass it a URL and then what Lang chain is going to help us do is it's going to help us load this video as a document and a document just means you're going to be taking the transcript which is the text of the video and you're going to be loading it as a doc

In [5]:
result[0].dict().keys()

dict_keys(['page_content', 'metadata', 'type'])

In [6]:
result[0].metadata

{'source': 'pNcQ5XXMgH4',
 'title': 'LangChain 101: YouTube Transcripts + OpenAI',
 'description': 'Unknown',
 'view_count': 21253,
 'thumbnail_url': 'https://i.ytimg.com/vi/pNcQ5XXMgH4/hqdefault.jpg?sqp=-oaymwEXCJADEOABSFryq4qpAwkIARUAAIhCGAE=&rs=AOn4CLCmP9TXvB4nm22ZX7b5Tl0AagEU3A',
 'publish_date': '2023-02-23 00:00:00',
 'length': 668,
 'author': 'Greg Kamradt (Data Indy)'}

In [7]:
result[0].page_content[:100]

'what is going on good people again right now we have a super exciting tutorial because we are going '

## Using load_summarize_chain

In [8]:
chain = load_summarize_chain(llm, chain_type='stuff', verbose=False)

# The result is long! Though gemini can handle long inputs, it should be better to split the text.
# chain.invoke(result)

In [9]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
texts = text_splitter.split_documents(result)

In [10]:
len(texts)

13

In [11]:
chain = load_summarize_chain(llm, chain_type='map_reduce', verbose=False)
output = chain.invoke(texts)

In [13]:
output['output_text'].split('. ')

['Langchain can extract transcripts from YouTube videos and pass them to OpenAI for summarization',
 "To handle long transcripts, split them into smaller chunks using the Recursive Character Splitter and use Langchain's map-reduce method to generate summaries",
 "OpenAI's Summarize function summarizes individual documents, and the collective summaries provide an overview of the entire document set",
 'The code demonstrates how to load, split, and summarize YouTube videos using Langchain, YouTube Loader, and SummarizeChain libraries.']

## Using load_qa_chain with retriever

In [14]:
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain.vectorstores.faiss import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_vertexai import VertexAIEmbeddings

embeddings = VertexAIEmbeddings('textembedding-gecko@001')
vectorstore = FAISS.from_documents(texts, embeddings)
retriever = vectorstore.as_retriever()

template = '''Answer the question based on the following context:
{context}

Question: {question}
'''
prompt = PromptTemplate.from_template(template)

chain = ({"context": retriever, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser())

In [15]:
question = 'What can I do with langchain? Please explain in a brief paragraph.'
chain.invoke(question)

"LangChain is a tool that helps users understand and process documents. It can generate summaries of documents, including YouTube transcripts, using OpenAI's large language model. LangChain can also split up long documents into smaller chunks and generate summaries of each chunk. Additionally, it can combine summaries of multiple documents into a single summary. This tool can be helpful for quickly getting an overview of the content of a document or for understanding the main points of a video."

## Generating outputs with other languages

In [16]:
from operator import itemgetter

template = '''Answer the question based on the following context:
{context}

Question: {question}

Answer in the following language: {language}
'''
prompt = PromptTemplate.from_template(template)

chain = ({
    'context': itemgetter('question') | retriever,
    'question': itemgetter('question'),
    'language': itemgetter('language'),
}
         | prompt
         | llm
         | StrOutputParser())

In [17]:
chain.invoke({'question': 'What can I do with langchain, according to the video?', 'language': 'japanese'})

'Langchainを使用すると、YouTubeの文字起こしをOpenAIに渡すことができます。'

In [18]:
chain.invoke({'question': 'What can I learn from the video? Please explain in a brief sentence.', 'language': 'japanese'})

'YouTube動画の文字起こしを使って、OpenAIで動画の概要を生成する方法'

In [19]:
chain.invoke({
    'question': 'What do you think the prerequisites of developing the services like the ones offered in this video?',
    'language': 'japanese'
})

' このビデオで提供されているようなサービスを開発するための前提条件として、私は次のように考えています。\n\n* 適切なプログラミング言語とフレームワークの知識\n* 自然言語処理の深い理解\n* 機械学習の基礎的な知識\n* クラウドコンピューティングプラットフォームに関する経験\n* 堅牢でスケーラブルなシステムを設計する能力\n* ユーザーエクスペリエンスの設計に関する知識\n* 関連する規制の遵守と倫理的配慮に関する認識'

## Loading Transcript of other languages

In [20]:
loader = YoutubeLoader.from_youtube_url('https://youtu.be/gy0jCRapP34', add_video_info=True, language=['ja'])
result = loader.load()

In [21]:
texts = text_splitter.split_documents(result)

In [22]:
prompt_template = '''Write a summary of the following text in Japanese:

'{text}'

CONCISE SUMMARY:'''
prompt = PromptTemplate(template=prompt_template, input_variables=['text'])
chain = load_summarize_chain(
    llm,  # e.g. ChatOpenAI(temperature=0)
    chain_type="stuff",
    verbose=False,
    prompt=prompt)

In [23]:
output = chain.invoke({'input_documents': texts})

In [25]:
output['output_text'].split('。')

['この動画は、カニクリームコロッケにカニが必要かどうか検証する内容です',
 '出演者たちが目隠しをしてカニクリームコロッケと普通のクリームコロッケを食べ、どちらかを当てました',
 '結果は、3人がカニクリームコロッケを、2人がクリームコロッケを特定できました',
 'カニクリームコロッケはカニの風味が強いため、特定しやすいことがわかりました',
 '']

In [26]:
from operator import itemgetter

embeddings = VertexAIEmbeddings('textembedding-gecko@001')
vectorstore = FAISS.from_documents(texts, embeddings)
retriever = vectorstore.as_retriever()

template = '''Answer the question based on the following context:
{context}

Question: {question}

Answer in the following language: {language}
'''
prompt = PromptTemplate.from_template(template)

chain = ({
    'context': itemgetter('question') | retriever,
    'question': itemgetter('question'),
    'language': itemgetter('language'),
}
         | prompt
         | llm
         | StrOutputParser())

In [31]:
output = chain.invoke({'question': 'What is the humor of this video?', 'language': 'japanese'})

In [32]:
str(output).split('。')

['カニクリームコロッケにカニが入っていても、入っていなくても、味が変わらないことを検証するという企画に対して、池崎が「カニは不要」と主張']

In [34]:
output = chain.invoke({'question': 'What is the humor of this video?', 'language': 'japanese'})
str(output).split('。')

['カニクリームコロッケのカニの有無を検証するという、一見するとくだらない企画を、サンシャイン池崎の独特なトークやメンバーのリアクションで笑いへと昇華させている点',
 '']

In [35]:
output = chain.invoke({'question': 'What is the humor of this video?', 'language': 'japanese'})
str(output).split('。')

['かにクリームコロッケにかに入っていても入っていなくてもわからないかもしれない、という仮説を検証する企画',
 '検証結果、かにが入っていないかにクリームコロッケとかにが入っているかにクリームコロッケを区別することができなかった',
 'この結果から、かにクリームコロッケにはかにを入れる必要がないのではないか、という結論に至る',
 '']

回答に再現性がない。おそらくLLMの性能以外の問題。\
Transcriptが不正確だったり、複数人の会話で誰の発言か分からなかったりすると、適切に解釈をすることができない。