# Query the YouTube video transcripts, returning timestamps as sources

In [None]:
# First set runtime to GPU

In [42]:
!pip install pytube # For audio downloading from youtube
!pip install chromadb

In [None]:
pip install git+https://github.com/openai/whisper.git -q # Whisper from OpenAI transcription model

In [5]:
import whisper
import pytube

In [None]:
url = "https://www.youtube.com/watch?v=XPmEJFKVOuI&ab_channel=TheAIGRID"
video = pytube.YouTube(url)

In [None]:
audio_stream = video.streams.filter(only_audio=True).first()
audio_stream.download(output_path='.', filename='tmp.mp3')

In [None]:
model = whisper.load_model("base")

In [14]:
transcription = model.transcribe('./tmp.mp3')

In [15]:
res = transcription['segments']

In [16]:
print(res)

[{'id': 0, 'seek': 0, 'start': 0.0, 'end': 5.5200000000000005, 'text': ' Sam Altman recently had an interview at Howard University where he actually spoke about a variety', 'tokens': [50364, 4832, 15992, 1601, 3938, 632, 364, 4049, 412, 17626, 3535, 689, 415, 767, 7179, 466, 257, 5673, 50640], 'temperature': 0.0, 'avg_logprob': -0.1818267822265625, 'compression_ratio': 1.6750902527075813, 'no_speech_prob': 0.038991112262010574}, {'id': 1, 'seek': 0, 'start': 5.5200000000000005, 'end': 10.56, 'text': ' of interesting topics. There was a lot he discussed that actually gives us an insight to things', 'tokens': [50640, 295, 1880, 8378, 13, 821, 390, 257, 688, 415, 7152, 300, 767, 2709, 505, 364, 11269, 281, 721, 50892], 'temperature': 0.0, 'avg_logprob': -0.1818267822265625, 'compression_ratio': 1.6750902527075813, 'no_speech_prob': 0.038991112262010574}, {'id': 2, 'seek': 0, 'start': 10.56, 'end': 15.92, 'text': ' like education, the role of AI in the future, and of course artificial gene

In [17]:
from datetime import datetime

def store_segments(segments):
  texts = []
  start_times = []

  for segment in segments:
    text = segment['text']
    start = segment['start']

    # Convert the starting time to a datetime object
    start_datetime = datetime.fromtimestamp(start)

    # Format the starting time as a string in the format "00:00:00"
    formatted_start_time = start_datetime.strftime('%H:%M:%S')

    texts.append("".join(text))
    start_times.append(formatted_start_time)

  return texts, start_times

In [None]:
store_segments(res)

In [19]:
texts, start_times = store_segments(res)

In [None]:
print(texts,start_times)

In [21]:
!pip install langchain

Installing collected packages: packaging, mypy-extensions, jsonpointer, typing-inspect, marshmallow, jsonpatch, langsmith, dataclasses-json, langchain-core, langchain-text-splitters, langchain-community, langchain
  Attempting uninstall: packaging
    Found existing installation: packaging 24.0
    Uninstalling packaging-24.0:
      Successfully uninstalled packaging-24.0
Successfully installed dataclasses-json-0.6.4 jsonpatch-1.33 jsonpointer-2.4 langchain-0.1.16 langchain-community-0.0.33 langchain-core-0.1.43 langchain-text-splitters-0.0.1 langsmith-0.1.48 marshmallow-3.21.1 mypy-extensions-1.0.0 packaging-23.2 typing-inspect-0.9.0


In [22]:
!pip install openai

Installing collected packages: httpcore, httpx, openai
Successfully installed httpcore-1.0.5 httpx-0.27.0 openai-1.19.0


In [23]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain import OpenAI
import openai
from langchain.vectorstores import Chroma


***Dont forget to put open ai key***

In [24]:
OPENAI_API_KEY = ""

In [26]:
from langchain.docstore.document import Document

text_splitter = CharacterTextSplitter(chunk_size=1500, separator="\n")
docs = []
metadatas = []
for i, d in enumerate(texts):
    splits = text_splitter.split_text(d)
    doc = Document(
          page_content=splits[0],
          metadata={"source": start_times[i]}
      )
    docs.append(doc)
embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY)

In [27]:
print((docs))

[Document(page_content='Sam Altman recently had an interview at Howard University where he actually spoke about a variety', metadata={'source': '00:00:00'}), Document(page_content='of interesting topics. There was a lot he discussed that actually gives us an insight to things', metadata={'source': '00:00:05'}), Document(page_content='like education, the role of AI in the future, and of course artificial general intelligence,', metadata={'source': '00:00:10'}), Document(page_content="which kind of gives us a gauge on where he's at in terms of what he's thinking. Now,", metadata={'source': '00:00:15'}), Document(page_content='the initial interview was actually done back in January, but it was only just release, which', metadata={'source': '00:00:20'}), Document(page_content="means that this interview is from literally four months ago. So that's of course something to", metadata={'source': '00:00:25'}), Document(page_content="keep in mind. But nevertheless, let's take a look at the first 

In [28]:
vectorstore = Chroma.from_documents(docs, embeddings)


In [None]:
vectorstore.similarity_search("agi")

In [30]:
from langchain.chains import VectorDBQAWithSourcesChain


In [None]:
chain = VectorDBQAWithSourcesChain.from_llm(llm=OpenAI(temperature=0, api_key=OPENAI_API_KEY), vectorstore=vectorstore)

In [39]:
result = chain.invoke({"question": "what is agi and when will it be achieved?"})
# result = chain.invoke("what is agi and when will it be achieved?")

In [40]:
print(f"Answer: {result['answer']}  Sources: {result['sources']}")

Answer:  AGI stands for Artificial General Intelligence and it is a type of artificial intelligence that is capable of performing any intellectual task that a human being can. It is often seen as the ultimate goal of AI research. As for when it will be achieved, it is difficult to say for certain. Some experts believe it could be achieved by the end of this decade, while others believe it may take much longer.
  Sources: 00:24:32, 00:20:57, 00:04:01, 00:23:10
