In [4]:
#imports
from langchain.document_loaders import YoutubeLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.document_loaders import YoutubeLoader
from langchain import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import CharacterTextSplitter

from dotenv import load_dotenv
load_dotenv()
import os

In [5]:
OpenAI.api_key = os.getenv("OPENAI_API_KEY")

Code outline:
- user inputs link to youtube video
- transcript is downloaded 
- trascript is indexed with metadata
- summarise transcript
- chain is loaded
- query chain

Download youtube video from url

In [None]:
loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=O5xeyoRL95U&ab_channel=LexFridman", add_video_info=False)
index = VectorstoreIndexCreator().from_loaders([loader])

In [7]:
#summarise the video
llm = OpenAI(temperature=0)
loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=O5xeyoRL95U&ab_channel=LexFridman", add_video_info=False)
docs = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = text_splitter.split_documents(docs)
print(split_docs)
# chain = load_summarize_chain(llm, chain_type="map_reduce")
# chain.run(split_docs)

[Document(page_content='Welcome everyone to 2019. It\'s really good to see everybody here make it in the cold. This is 6.S094 Deep Learning for Self-Driving Cars. It is part of a series of courses on deep\xa0learning\xa0that\xa0we\'re running throughout\xa0this month. The website that you can get\xa0all the content of videos,\xa0the lectures\xa0and the code is deeplearning.mit.edu. The videos and slides will be\xa0made available there along with a github\xa0repository that\'s accompanying the\xa0course. Assignments for registered\xa0students will be emailed\xa0later on in the\xa0week. And you can always contact us with questions, concerns, comments at hcai, human centered AI, at mit.edu. So let\'s start through the basics, the\xa0fundamentals. To summarize in one slide, what is deep learning? It is a way to\xa0extract useful patterns from data in an\xa0automated way with as little human effort\xa0involved as possible hence to automate it. How? The fundamental\xa0aspect that we\'ll talk

InvalidRequestError: This model's maximum context length is 4097 tokens, however you requested 11382 tokens (11126 in your prompt; 256 for the completion). Please reduce your prompt; or completion length.

simple index and qna

In [None]:
loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=O5xeyoRL95U&ab_channel=LexFridman", add_video_info=False)
index = VectorstoreIndexCreator().from_loaders([loader])
query = "What did the president say about Ketanji Brown Jackson"
index.query(query)

llm = OpenAI(temperature=0)
loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=O5xeyoRL95U&ab_channel=LexFridman", add_video_info=False)
docs = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = text_splitter.split_documents(docs)
print(split_docs)
chain = load_summarize_chain(llm, chain_type="map_reduce")
chain.run(split_docs)

Cerebrium example

In [None]:
import os
from datetime import datetime

import faiss
import pytube
import whisper
from langchain.chains import VectorDBQAWithSourcesChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import CerebriumAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer

model = whisper.load_model("small")
sentenceTransformer = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
os.environ["CEREBRIUMAI_API_KEY"] = "c_api_key-xxx"


class Item(BaseModel):
    url: str
    question: str


def store_segments(segments):
    texts = []
    start_times = []

    for segment in segments:
        text = segment["text"]
        start = segment["start"]

        # Convert the starting time to a datetime object
        start_datetime = datetime.fromtimestamp(start)

        # Format the starting time as a string in the format "00:00:00"
        formatted_start_time = start_datetime.strftime("%H:%M:%S")

        texts.append("".join(text))
        start_times.append(formatted_start_time)

    return texts, start_times


def create_embeddings(texts, start_times):
    text_splitter = CharacterTextSplitter(chunk_size=1500, separator="\n")
    docs = []
    metadatas = []
    for i, d in enumerate(texts):
        splits = text_splitter.split_text(d)
        docs.extend(splits)
        metadatas.extend([{"source": start_times[i]}] * len(splits))
    return metadatas, docs


def predict(item, run_id, logger):
    item = Item(**item)

    video = pytube.YouTube(item.url)
    video.streams.get_highest_resolution().filesize
    audio = video.streams.get_audio_only()
    fn = audio.download(output_path="/models/content/", filename= f"{video.title}.mp4")

    transcription = model.transcribe(f"/models/content/{video.title}.mp4")
    res = transcription["segments"]

    texts, start_times = store_segments(res)

    metadatas, docs = create_embeddings(texts, start_times)
    embeddings = HuggingFaceEmbeddings()
    store = FAISS.from_texts(docs, embeddings, metadatas=metadatas)
    faiss.write_index(store.index, "docs.index")
    llm = CerebriumAI(
        endpoint_url="https://run.cerebrium.ai/flan-t5-xl-webhook/predict"
    )
    chain = VectorDBQAWithSourcesChain.from_llm(llm=llm, vectorstore=store)

    result = chain({"question": item.question})

    return {"result": result}