In [99]:
import os
from dotenv import load_dotenv
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")

In [100]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_groq import ChatGroq
from langchain_classic.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_classic.document_loaders import YoutubeLoader
from langchain_huggingface import HuggingFaceEmbeddings


In [101]:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import (
    TranscriptsDisabled,
    NoTranscriptFound,
    VideoUnavailable
)

api = YouTubeTranscriptApi()

video_id = "JxgmHe2NyeY"

try:
    transcript_list = api.fetch(
        video_id=video_id,
        languages=["en"]
    )
    transcript = " ".join(chunk.text for chunk in transcript_list)
    print(transcript)

except TranscriptsDisabled:
    print("Captions are disabled for this video")

except NoTranscriptFound:
    print("No English captions found")

except VideoUnavailable:
    print("Video is unavailable")

except Exception as e:
    print(f"Unexpected error: {e}")




In [102]:
transcript_list



In [103]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([transcript])

In [104]:
len(chunks)

443

In [105]:
chunks[0]

Document(metadata={}, page_content="so today's session what all things we are basically going to discuss so first of all we going to discuss about different types of machine learning algorithm like how many different types of machine learning algor understand the purpose of taking this session is to clear the interviews okay clear the interviews once you go for a data science interviews and all the main purpose is to clear the interviews I've seen people who knew machine learning algorithms in a proper way okay they were definitely able to clear it because they just explain the algorithms in a better way to the recruiter so that they got hired first of all is the introduction to machine learning here I'm just specifically going to talk about AI versus ml versus DL versus data sign then the second thing that we are going to talk about over here is the difference between supervised MS and unsupervised ml the third thing that we are probably going to discuss about is something called as l

In [None]:
embeddings = HuggingFaceEmbeddings()
vector_store = FAISS.from_documents(chunks, embeddings)

In [None]:
vector_store.index_to_docstore_id

{0: '2b6cffc1-9f91-4de2-8351-91d151e89b7f',
 1: 'a29aec6c-b197-4d6c-be72-e5d3c37c1747',
 2: '6783f9be-dbd2-4731-b8e7-62bf42d5c57c',
 3: 'f5f1dff9-d608-4438-a8d0-675b16749142',
 4: '0613b6a1-9fc5-4a16-8c71-3624ca2084dd',
 5: 'dee14c92-40c1-4638-995e-c7ee8028ab9e',
 6: 'e6ef2923-b6eb-4e4e-925c-06d60c323561',
 7: '0c11188b-9aa7-4997-9d78-8bc71031f07f',
 8: 'e7039222-c490-48a3-97fc-f937c57eef0b',
 9: '0ae44639-d9ff-45e0-b43c-e06396242299',
 10: '4f5ac27a-dd04-4fcb-b33b-65bf09cd3a9d',
 11: '1814fe3a-0c06-4ea0-af3e-e14eb8311c09',
 12: 'e6cec08d-2da5-4006-af6d-78a68eafd467',
 13: '200cf7de-f7b8-4a7c-8713-235ce6417ac3',
 14: '5d6f61fc-8154-484a-aec8-098544d940b2',
 15: '007ef509-af56-4356-892b-7d851a584ce3',
 16: '7dd4eedf-57a1-4ffb-b533-2c54ba6a51d1',
 17: '767add63-09d1-40fb-8544-e80aa26f7db6',
 18: 'b4c30023-9238-42cd-b759-1b423e57cdd5',
 19: '9195e0de-e1cc-45a7-b108-24c14d9b38bb',
 20: '6f546c43-c20a-4391-b44e-d488bbb424b0',
 21: 'bc545a41-a92a-4d4f-a7fd-52835a1e3bd5',
 22: '20075b4c-c726-

In [None]:
vector_store.get_by_ids(['0a6c1836-aa5d-4eb7-a081-492251099765'])

[]

In [None]:
# Retreival
retreiver = vector_store.as_retriever(search_type="similarity", search_kwargs={"k":4})

In [None]:
retreiver

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000014B75FEA5D0>, search_kwargs={'k': 4})

In [None]:
retreiver.invoke("lasso in regression?")

[Document(id='72870a3b-afbd-45a6-9a39-27e5b9b76be4', metadata={}, page_content="in the case of lasso regression your cost function will be H Theta of X of IUS y of i² plus Lambda ultied mode of flow so here you have this specific thing and what is the purpose the purpose are two one is prevent overfitting and the second one is something called as feature selection so these two are the outcomes of the entire thing see with respect to this lasso right you have slopes slopes here you'll be having Theta 0 plus Theta 1 plus Theta 2 plus theta 3 like this up to Theta n now when you'll have this many number of thetas when you have many number of features and when you have many number of features that basically means you'll have multiple slopes right those features that are not performing well or that has no contribution in finding out your output that coefficient value will be almost nil right it will be very much near to zero in short you neglecting that value by using modulus you're not squ

In [None]:
# Augumentation
llm =  ChatGroq(
    model="openai/gpt-oss-120b",
    api_key=os.getenv("GROQ_API_KEY"),
    temperature=0.8,

)

In [None]:
prompt = PromptTemplate(
    template="""
You are a helpful assistant.
Answer ONLY from the provided transcript context.
If the content is insufficient, just say you don't know

{context}

Question: {question}

""",
input_variables=['content','question']
)

In [None]:
question = "Tell the Whole Concept of Support Vector Machine Discussed in this video."

retreived_docs = retreiver.invoke(question)

In [None]:
context_text = "\n\n".join(doc.page_content for doc in retreived_docs)
# jo docs mile the unko join kiya hai
context_text

"be changing remaining everything are same so just try to if you change this particular value that becomes an svr just try to explore and just try to find out and just try to let me know so overall uh did you like the entire session everyone okay in this one more thing is there which is called as kernel Matrix svm kernel we say it as svm kernel now in s VM kernel what happens suppose if I have a specific data points which looks like this which looks like this so we obviously cannot use a straight line and try to divide it so what we do we convert this two Dimension into three dimensions and then probably we push our Point like this one point will go like this and the white point will go down and then we can basically use a plane to split it so I uploaded a video around uh around that and uh you can definitely have a look onto that and I have also shown you practically how to do it that is the reason I've created that specific video so great uh this was it from my side I hope you like\n

In [None]:
final_prompt = prompt.invoke({"context":context_text, "question":question})
final_prompt

StringPromptValue(text="\nYou are a helpful assistant.\nAnswer ONLY from the provided transcript context.\nIf the content is insufficient, just say you don't know\n\nbe changing remaining everything are same so just try to if you change this particular value that becomes an svr just try to explore and just try to find out and just try to let me know so overall uh did you like the entire session everyone okay in this one more thing is there which is called as kernel Matrix svm kernel we say it as svm kernel now in s VM kernel what happens suppose if I have a specific data points which looks like this which looks like this so we obviously cannot use a straight line and try to divide it so what we do we convert this two Dimension into three dimensions and then probably we push our Point like this one point will go like this and the white point will go down and then we can basically use a plane to split it so I uploaded a video around uh around that and uh you can definitely have a look on

In [None]:
answer = llm.invoke(final_prompt)
print(answer.content)

**Support Vector Machine (SVM) – concepts covered in the video**

1. **Why SVM is needed**  
   - When data points cannot be separated by a straight line in the original (2‑D) space, a simple linear classifier (e.g., logistic regression) fails.

2. **Kernel trick – mapping to higher dimensions**  
   - The video shows a 2‑D dataset that is not linearly separable.  
   - By applying a **kernel transformation**, the points are lifted into a **3‑D space**.  
   - In this new space the previously tangled points become separable by a **plane** (a hyper‑plane in higher‑dimensional space).  
   - The video includes a practical demonstration of this transformation.

3. **Hyper‑plane and margin**  
   - SVM does not just find any separating line/plane; it looks for the one that **maximizes the margin** – the distance between the hyper‑plane and the nearest data points from each class.  
   - These nearest points lie on **margin (or marginal) planes**.  
   - The hyper‑plane with the **largest p

In [None]:
# Building a Chain
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser



In [None]:
def format_docs(final_prompt):
    context_text = "\n\n".join(doc.page_content for doc in retreived_docs)
    return context_text

In [None]:
parallel_chain = RunnableParallel({
    "context" : retreiver | RunnableLambda(format_docs),
    "question" :RunnablePassthrough()
})

In [None]:
parallel_chain.invoke('What is SVM in detail?')

{'context': "be changing remaining everything are same so just try to if you change this particular value that becomes an svr just try to explore and just try to find out and just try to let me know so overall uh did you like the entire session everyone okay in this one more thing is there which is called as kernel Matrix svm kernel we say it as svm kernel now in s VM kernel what happens suppose if I have a specific data points which looks like this which looks like this so we obviously cannot use a straight line and try to divide it so what we do we convert this two Dimension into three dimensions and then probably we push our Point like this one point will go like this and the white point will go down and then we can basically use a plane to split it so I uploaded a video around uh around that and uh you can definitely have a look onto that and I have also shown you practically how to do it that is the reason I've created that specific video so great uh this was it from my side I hop

In [None]:
parser = StrOutputParser()

In [None]:
main_chain = parallel_chain | prompt | parser

In [None]:
main_chain.invoke("Can you summarize the SVM from the video?")

ValidationError: 1 validation error for Generation
text
  Input should be a valid string [type=string_type, input_value=StringPromptValue(text="\...VM from the video?\n\n"), input_type=StringPromptValue]
    For further information visit https://errors.pydantic.dev/2.11/v/string_type