In [1]:
pip install pytube langchain llama-index qdrant-client sentence-transformers gradio


Collecting pytube
  Downloading pytube-15.0.0-py3-none-any.whl.metadata (5.0 kB)
Collecting langchain
  Downloading langchain-0.2.15-py3-none-any.whl.metadata (7.1 kB)
Collecting llama-index
  Downloading llama_index-0.11.4-py3-none-any.whl.metadata (11 kB)
Collecting qdrant-client
  Downloading qdrant_client-1.11.1-py3-none-any.whl.metadata (10 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting gradio
  Downloading gradio-4.42.0-py3-none-any.whl.metadata (15 kB)
Collecting langchain-core<0.3.0,>=0.2.35 (from langchain)
  Downloading langchain_core-0.2.37-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.2-py3-none-any.whl.metadata (2.1 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.108-py3-none-any.whl.metadata (13 kB)
Collecting tenacity!=8.4.0,<9.0.0,>=8.1.0 (from langchain)
  Dow

In [2]:
pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.2.15-py3-none-any.whl.metadata (2.7 kB)
Downloading langchain_community-0.2.15-py3-none-any.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain-community
Successfully installed langchain-community-0.2.15


In [3]:
from pytube import YouTube
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Qdrant
from langchain.chains import VectorDBQA
from qdrant_client import QdrantClient
from langchain.llms import HuggingFaceHub
import gradio as gr

# Initialize Qdrant Client
qdrant_client = QdrantClient(url="http://localhost:6333")  # or your Qdrant Cloud URL
collection_name = "youtube_transcripts"

# Function to get YouTube transcript
def get_youtube_transcript(url):
    yt = YouTube(url)
    captions = yt.captions.get_by_language_code('en')
    if captions:
        return captions.generate_srt_captions()
    else:
        return None

# Function to set up vector store with Qdrant
def setup_qdrant_store(transcript):
    sentences = transcript.split('\n\n')
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    # Create documents
    documents = [{"text": sentence, "metadata": {"source": "YouTube"}} for sentence in sentences]

    # Store embeddings in Qdrant
    vector_store = Qdrant.from_documents(
        documents, embeddings, qdrant_client=qdrant_client, collection_name=collection_name
    )

    return vector_store

# QA System using LangChain and Hugging Face Model
def qa_system(youtube_url, question):
    transcript = get_youtube_transcript(youtube_url)
    if not transcript:
        return "Transcript not available."

    vector_store = setup_qdrant_store(transcript)
    llm = HuggingFaceHub(repo_id="facebook/bart-large-cnn")

    # Create QA chain
    qa_chain = VectorDBQA(llm=llm, vectorstore=vector_store)
    result = qa_chain.run(question)

    return result

# Gradio Interface
iface = gr.Interface(fn=qa_system, inputs=["text", "text"], outputs="text")
iface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://b69a04c7cda6e49e66.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


