This code is manually written to test the components of the chatbot.py file 
For the full use of project refer the chatbot.py file 

In [27]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings,ChatHuggingFace
from youtube_transcript_api import YouTubeTranscriptApi,TranscriptsDisabled
import os
from dotenv import load_dotenv
import streamlit
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser


HUGGINGFACE_API_TOKEN=os.getenv('HUGGINGFACE_API_TOKEN')


In [78]:
url="https://www.youtube.com/watch?v=uahX_JSdA2Q"


def extract_video_id(url):
    if "v=" in url:
        return url.split("v=")[1].split("&")[0]
    elif "youtu.be/" in url:
        return url.split("youtu.be/")[1].split("?")[0]
    else:
        raise ValueError("Invalid YouTube URL")

# === 📘 Get all transcript languages ===
def get_available_languages(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        return [(t.language_code, t.language) for t in transcript_list]
    except Exception as e:
        return []



print(get_available_languages(video_id))

[('hi', 'Hindi (auto-generated)')]


In [87]:
video_id=extract_video_id(url=url)
print(video_id)

languages=get_available_languages(video_id=video_id)
print(languages)

uahX_JSdA2Q
[('hi', 'Hindi (auto-generated)')]


In [89]:
def get_transcription(video_id, lang_code="en"):
    """
    Safely fetches transcript text (manual or auto-generated) for a given video ID and language code.
    """
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)

        selected_transcript = None

        for t in transcript_list:
            if t.language_code == lang_code:
                selected_transcript = t
                break

        if not selected_transcript:
            return None

        # ✅ Correct way to access the text
        transcript_data = selected_transcript.fetch()
        return " ".join(chunk.text for chunk in transcript_data)

    except Exception as e:
        print("Transcript fetch error:", e)
        return None



In [34]:
def create_chunks(transcript):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = splitter.create_documents([transcript])
    # chunks
    return chunks

chunks=create_chunks(transcript)
# print(chunks)

In [35]:
# defining the embedding model
embedding_model=HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(chunks, embedding_model)

# we can save locally using
# vector_store.save_local('vectostore/db_faiss')

retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5})
# response=retriever.invoke("What is Self Attention")
# print(response)

# designing the prompt template
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)

  from .autonotebook import tqdm as notebook_tqdm


In [44]:
user_query=input("Enter the Query : ")
# context--> they are the retrieved documenst from the vectorstore through the similarity search

context_docs=retriever.invoke(user_query)

context_text = "\n\n".join(doc.page_content for doc in context_docs)
context_text

# context_text
def format_docs(context_text):
    return context_text

In [48]:
# making the llm model using the ChatHuggingFace mistral model
def load_llm():
    llm=HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    task="text generation",
    huggingfacehub_api_token=HUGGINGFACE_API_TOKEN )

    llm=ChatHuggingFace(llm=llm)

    return llm

# final_response=llm.invoke(final_prompt)
# print(final_response)

In [46]:
parser=StrOutputParser()

parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [49]:
llm=load_llm()
final_chain=parallel_chain|prompt|llm|parser
response=final_chain.invoke(user_query)
print(response)

 The provided context suggests that transformers are a significant invention in the field of AI, leading to a current boom in AI. They are a specific kind of neural network, a machine learning model, and are used to build various models, including those that take in audio and produce a transcript, or those that take in text and produce images (like DALL-E and Midjourney). However, the context does not directly explain how transformers specifically effect the AI field, but rather that they are a key component contributing to advancements in AI.
