# Youtube Video Assistant

---

## 1. Importing the required libraries

In [1]:
import os
import textwrap

from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

## 2. Setting Up the API Key

In [5]:
# setting up the API token
API_KEY = "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"
os.environ["API_KEY"] = API_KEY

## 3. Building the YouTube Video Assistant

In [6]:
def create_db_from_video_url(video_url):
    """
    Creates an Embedding of the Video and performs 
    """
    embeddings = OpenAIEmbeddings(openai_api_key=API_KEY)

    loader = YoutubeLoader.from_youtube_url(video_url)
    transcripts = loader.load()
    # cannot provide this directly to the model so we are splitting the transcripts into small chunks

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    docs = text_splitter.split_documents(transcripts)

    db = FAISS.from_documents(docs, embedding=embeddings)

    return db

In [7]:
db = create_db_from_video_url("https://www.youtube.com/watch?v=C82lT9cWQiA")
db

<langchain.vectorstores.faiss.FAISS at 0x21549868760>

In [8]:
def get_response(api_key, db, request, k=4):
    """
    Usind GPT-3.5-turbo to get the response. It can handle upto 4096 tokens
    """

    docs = db.similarity_search(query=request, k=k)
    docs_content = " ".join([doc.page_content for doc in docs])

    chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.2, openai_api_key=api_key)

    # creating a template for request
    template = """
    You are an assistant that can answer questions about youtube videos based on
    video transcripts: {docs}

    Only use factual information from the transcript to answer the question.

    If you don't have enough information to answer the question, say "I don't know".

    Your Answers should be detailed.
    """

    system_msg_prompt = SystemMessagePromptTemplate.from_template(template)

    # human prompt
    human_template = "Answer the following questions: {question}"
    human_msg_prompt = HumanMessagePromptTemplate.from_template(human_template)

    chat_prompt = ChatPromptTemplate.from_messages(
        [system_msg_prompt, human_msg_prompt]
    )

    chain = LLMChain(llm=chat, prompt=chat_prompt)

    response = chain.run(question=request, docs=docs_content)

    return response, docs

In [9]:
video_url = "https://www.youtube.com/watch?v=Q8d4FGWZDbE"
db = create_db_from_video_url(video_url=video_url)

request = "What is he talking about?"
response, docs = get_response(API_KEY, db, request)

In [10]:
# using textwrap for printing neatly
print(textwrap.fill(response, width=100))

The speaker is giving a review of the Xiaomi 13 Ultra phone's camera capabilities, both front and
rear-facing, based on his experience using it. He discusses the phone's ability to capture natural
skin tones, its limitations in terms of video quality and resolution, and its impressive rear camera
capabilities, including the ability to capture more light, shoot faster, and create more background
blur. He also compares the phone's night mode to that of the iPhone and concludes that the Xiaomi 13
Ultra is superior in this regard.


In [11]:
# video transcripts
docs

[Document(page_content="pixel have done with their super deep natural skin tones and while this is somewhat rescued by the powerful image processing when you're snapping photos honestly the video is pretty trash I mean look at this room over here it's just it's just a white look plus it's limited to 1080p which I just don't understand like genuinely if there's anyone that showing me watching this video please tell me why this phone your 2023 Pinnacle Flagship Ultra phone cannot record 4k on its front camera it's not a tech limitation phone's been able to do this for years now surely it's not a cost concern they've kitted out every single other camera on the phone all the way to 8K and it can't be anything to do with the size of the cameras either because Samsung does 4K just fine with one that actually takes up less space the only thing I can think is like maybe people feel like 4K might over represent skin imperfections but honestly how your skin looks as much more down to the softwar

## 4. Creating a Gradio Web App

In [16]:
import gradio, langchain, openai, faiss

In [18]:
gradio.__version__, langchain.__version__, openai.__version__, faiss.__version__

('3.28.1', '0.0.157', '0.27.6', '1.7.4')

In [19]:
%%writefile requirements.txt
gradio==3.28.1
langchain==0.0.157
openai==0.27.6
faiss-cpu==1.7.4
youtube_transcript_api

Overwriting requirements.txt


In [15]:
%%writefile app.py
import gradio as gr

from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

def create_db_from_video_url(video_url, api_key):
    """
    Creates an Embedding of the Video and makes it suitable for similarity searching.
    """
    embeddings = OpenAIEmbeddings(openai_api_key=api_key)

    loader = YoutubeLoader.from_youtube_url(video_url)
    transcripts = loader.load()
    # cannot provide this directly to the model so we are splitting the transcripts into small chunks

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    docs = text_splitter.split_documents(transcripts)

    db = FAISS.from_documents(docs, embedding=embeddings)

    return db

def get_response(api_key, video_url, request):
    """
    Usind gpt-3.5-turbo to obtain the response. It can handle upto 4096 tokens.
    """

    db = create_db_from_video_url(video_url, api_key)

    docs = db.similarity_search(query=request, k=4)
    docs_content = " ".join([doc.page_content for doc in docs])

    chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.2, openai_api_key=api_key)

    # creating a template for request
    template = """
    You are an assistant that can answer questions about youtube videos based on
    video transcripts: {docs}

    Only use factual information from the transcript to answer the question.

    If you don't have enough information to answer the question, say "I don't know".

    Your Answers should be detailed.
    """

    system_msg_prompt = SystemMessagePromptTemplate.from_template(template)

    # human prompt
    human_template = "Answer the following questions: {question}"
    human_msg_prompt = HumanMessagePromptTemplate.from_template(human_template)

    chat_prompt = ChatPromptTemplate.from_messages(
        [system_msg_prompt, human_msg_prompt]
    )

    chain = LLMChain(llm=chat, prompt=chat_prompt)

    response = chain.run(question=request, docs=docs_content)

    return response

# creating title, description for the web app
title = "YouTube Video Assistant 🧑‍💻"
description = "Answers to the Questions asked by the user on the specified YouTube video. (English Only)"
article = "Other Projects:\n"\
"💰 [Health Insurance Predictor](http://health-insurance-cost-predictor-k19.streamlit.app/)\n"\
"📰 [Fake News Detector](https://fake-news-detector-k19.streamlit.app/)"
# building the app
youtube_video_assistant = gr.Interface(
    fn=get_response,
    inputs=[gr.Text(label="Enter the OpenAI API Key:", placeholder=f"Example: sk-{'*' * 45}AgM"), 
            gr.Text(label="Enter the Youtube Video URL:", placeholder="Example: https://www.youtube.com/watch?v=MnDudvCyWpc"),
            gr.Text(label="Enter your Question", placeholder="Example: What's the video is about?")],
    outputs=gr.TextArea(label="Answers using gpt-3.5-turbo:"),
    title=title,
    description=description,
    article=article
)

# launching the web app
youtube_video_assistant.launch()

Overwriting app.py


In [13]:
# embedding the deployed app to notebook
from IPython.display import IFrame

IFrame(src="https://kathir0011-youtube-video-assistant.hf.space", width=800, height=600)