In [1]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEndpointEmbeddings
from dotenv import load_dotenv
import os

from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate

In [38]:
# Load token from .env file
load_dotenv()
hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

# print(hf_token)

# Pick a real model from Hugging Face Hub
llm = HuggingFaceEndpoint(
    # repo_id="deepseek-ai/DeepSeek-V3.1", 
    repo_id = 'Qwen/Qwen3-Coder-480B-A35B-Instruct', # replace with your desired model
    task="text-generation",
    huggingfacehub_api_token=hf_token,
    temperature=0
)

model = ChatHuggingFace(llm=llm)

model.invoke("Capital of India")

HfHubHTTPError: 401 Client Error: Unauthorized for url: https://router.huggingface.co/together/v1/chat/completions (Request ID: Root=1-68b6cfc8-40e2b4d32a77c51546ac80db;2b9103ea-623b-4f7c-8c62-599ecce52f80)

Invalid credentials in Authorization header

In [None]:
from huggingface_hub import InferenceClient
import os
from dotenv import load_dotenv

# Load .env file
load_dotenv()
hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

print("Loaded token:", "None" if hf_token is None else "OK")

# Create inference client
client = InferenceClient(
    model="google/flan-t5-small",   # small model that works on Hugging Face Inference API
    token=hf_token
)

# Run a test query
resp = client.text_generation("What is the capital of India?", max_new_tokens=50)
print(resp)


## Step 1a - Indexing (Document Ingestion)

In [3]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled

video_id = "raTbhtKZTZA" # only the ID, not full URL

try:
    ytt_api = YouTubeTranscriptApi()
    fetched = ytt_api.fetch(video_id, languages=["en"])
    raw_transcript = fetched.to_raw_data()
    transcript = " ".join(chunk["text"] for chunk in raw_transcript)
    print(transcript)

except TranscriptsDisabled:
    print("No captions available for this video.")
except NoTranscriptFound:
    print("No transcript found in the requested language.")
except VideoUnavailable:
    print("The video is unavailable.")
except Exception as e:
    print("An unexpected error occurred:", str(e))


OpenAI recently dropped GPT OSS, its first open weights model since GPT2 in 2019. It's one of the highest profile open source model launches since DeepSeek R1 made waves back in January. But how does GPT OSS compared to the other top open source models out there architecturally? Let's find out. [Music] GPT OSS is one of OpenAI's most anticipated recent launches. a large fully open weights model from one of the leading American AI labs. Let's take a closer look at the paper to find out how it was actually engineered and trained. GPT OSS is a mixture of experts model available in two sizes, 120 billion parameters and 20 billion parameters. Each token activates the top four experts, meaning only a portion of the total parameters are used at any given time. This allows for efficient inference without sacrificing the benefits of a larger model. Trained as a decoder only transformer, GPTOSS incorporates plenty of features typical to modern LLMs. This includes grouped query attention, a modif

## Step 1b - Indexing (Text Splitting)

In [4]:
splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=80)
chunks = splitter.create_documents([transcript])

In [7]:
len(chunks)

46

## Step 1c & 1d - Indexing (Embedding Generation and Storing in Vector Store)

In [5]:
embeddings = HuggingFaceEndpointEmbeddings(
    repo_id="sentence-transformers/all-MiniLM-L6-v2",
    huggingfacehub_api_token=hf_token
)

In [6]:
vector_store = FAISS.from_documents(chunks, embeddings)

HfHubHTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/api/models/sentence-transformers/all-MiniLM-L6-v2?expand=inferenceProviderMapping (Request ID: Root=1-68b69a36-3d7defab360a6ce25be0b320;94f6d8ab-4f5e-42e5-a660-fd4cb0ffe088)

Invalid credentials in Authorization header