In [1]:
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings # updated code
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from dotenv import find_dotenv, load_dotenv
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
import textwrap


load_dotenv(find_dotenv())
embeddings = OpenAIEmbeddings()

In [2]:
def create_db_from_youtube_video_url(video_url):
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
    docs = text_splitter.split_documents(transcript)

    db = FAISS.from_documents(docs, embeddings)
    return db

In [11]:
video_url = "https://www.youtube.com/watch?v=L_Guz73e6fw"
db = create_db_from_youtube_video_url(video_url)

In [12]:
def get_response_from_query(db, query, k=4):
    docs = db.similarity_search(query, k=k)
    docs_page_content = " ".join([d.page_content for d in docs])

    chat = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.2)

    # Template to use for the system message prompt
    template = """
        You are a helpful assistant that that can answer questions about youtube videos 
        based on the video's transcript: {docs}
        
        Only use the factual information from the transcript to answer the question.
        
        If you feel like you don't have enough information to answer the question, say "I don't know".
        
        """

    system_message_prompt = SystemMessagePromptTemplate.from_template(template)

    # Human question prompt
    human_template = "Answer the following question: {question}"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

    chat_prompt = ChatPromptTemplate.from_messages(
        [system_message_prompt, human_message_prompt]
    )

    chain = LLMChain(llm=chat, prompt=chat_prompt)

    response = chain.run(question=query, docs=docs_page_content)
    response = response.replace("\n", "")
    return response, docs

In [13]:
from langchain_openai import ChatOpenAI
query = "what is this video about?"
response, docs = get_response_from_query(db, query)
print(textwrap.fill(response, width=50))

Based on the transcript, the video seems to be
discussing various topics related to technological
advancements, social divisions, human
civilization, the creation of Wikipedia and Google
search, the potential of GPT (a language model),
advice for young people, consciousness, economic
instability, and the potential impact of AGI
(Artificial General Intelligence).


In [14]:
query = "who was the host of this podcast?"
response, docs = get_response_from_query(db, query)
print(textwrap.fill(response, width=50))

The host of this podcast is Lex Fridman.


In [15]:
query = "what did talk about on AGI?"
response, docs = get_response_from_query(db, query)
print(textwrap.fill(response, width=50))

In the transcript, they discussed the initial
skepticism and mockery they faced when they
announced their intention to work on AGI. They
also mentioned the importance of starting to
deploy AGI systems early while they are still
weak, to allow for adaptation and preparation.
They expressed concerns about the potential
dangers and power of AGI, as well as the need for
conversations about power, companies,
institutions, and political systems that deploy
and balance this power. They also mentioned the
potential for AGI to bring positive changes and
improve life.


In [16]:
query = "On what topic they spend most time?"
response, docs = get_response_from_query(db, query)
print(textwrap.fill(response, width=50))

Based on the transcript, it seems that they spend
the most time discussing the topic of artificial
intelligence and its impact on various aspects of
society, such as ethics, job displacement, and
programming.


In [17]:
query = "who are the speakers in this video?"
response, docs = get_response_from_query(db, query)
print(textwrap.fill(response, width=50))

The speakers in this video are Sam Altman and the
assistant.


In [18]:
query = "what are they saying about Microsoft?"
response, docs = get_response_from_query(db, query)
print(textwrap.fill(response, width=50))

They are saying that Microsoft has been an amazing
partner to them and that Satya Nadella, the CEO of
Microsoft, has been clear, firm, and effective in
transforming the company into a fresh, innovative,
and developer-friendly company. They also mention
that Microsoft understood their needs and the
control provisions they required, which other
companies at that scale may not have understood.
Overall, they have a positive view of working with
Microsoft.


In [19]:
import sys
print(sys.version)

3.11.7 | packaged by Anaconda, Inc. | (main, Dec 15 2023, 18:05:47) [MSC v.1916 64 bit (AMD64)]
