In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import sys
print(sys.version)

3.10.19 | packaged by Anaconda, Inc. | (main, Oct 21 2025, 16:41:31) [MSC v.1929 64 bit (AMD64)]


In [3]:
from langchain_huggingface import HuggingFaceEmbeddings, ChatHuggingFace
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from transformers import pipeline
from langdetect import detect

  from .autonotebook import tqdm as notebook_tqdm


## Step-1a = Indexing (Document Ingestion with Auto-Translation)

In [4]:
from transformers import pipeline

translator = pipeline(
    "translation", 
    model="facebook/nllb-200-distilled-600M", 
)
print("✓ Translation model loaded")

Device set to use cpu


✓ Translation model loaded


## Step-0 = Load Translation Model (Free & Open-Source)

In [5]:
from youtube_transcript_api import YouTubeTranscriptApi
from langdetect import detect

video_id = "KNfZrLKIufI"
languages = ['hi', 'en', 'bn', 'te', 'mr', 'ta', 'ur', 'gu', 'kn', 'ml', 'pa']
LANG_MAP = {
    'hi': 'hin_Deva',
    'ur': 'urd_Arab',
    'bn': 'ben_Beng',
    'ta': 'tam_Taml',
    'te': 'tel_Telu',
    'mr': 'mar_Deva',
    'ne': 'npi_Deva',
    'ar': 'arb_Arab'
}


def translate_text(text, src_lang):
    
    try:
        trans = translator(text, src_lang=src_lang, tgt_lang="eng_Latn")[
            0]['translation_text']
        return trans
    except Exception as e:
        print(f"Translation error: {e}")
        return text  


try:
    api = YouTubeTranscriptApi()
    transcript_snippets = api.fetch(video_id=video_id, languages=languages)

    first_text = " ".join(t.text for t in transcript_snippets[:10])
    detected = detect(first_text)
    print(f"✓ Transcript found, detected language: {detected}")

    if detected != 'en' and detected in LANG_MAP:
        print("Translating each snippet to English...")
        translated_snippets = []
        for i, snippet in enumerate(transcript_snippets):
            translated_text = translate_text(
                snippet.text, LANG_MAP[detected])
            translated_snippets.append(translated_text)
            if (i + 1) % 20 == 0:
                print(
                    f"  Translated {i + 1}/{len(transcript_snippets)} snippets")
        final_transcript = " ".join(translated_snippets)
    else:
        # No translation needed
        final_transcript = " ".join(t.text for t in transcript_snippets)

    print(
        f"✓ Translation complete. Transcript length: {len(final_transcript)} chars")

    # Save to file
    with open(f"transcript_{video_id}.txt", "w", encoding="utf-8") as f:
        f.write(final_transcript)

except Exception as e:
    print(f"✗ Error: {e}")

✓ Transcript found, detected language: hi
Translating each snippet to English...
  Translated 20/510 snippets
  Translated 40/510 snippets
  Translated 60/510 snippets
  Translated 80/510 snippets
  Translated 100/510 snippets
  Translated 120/510 snippets
  Translated 140/510 snippets
  Translated 160/510 snippets
  Translated 180/510 snippets
  Translated 200/510 snippets
  Translated 220/510 snippets
  Translated 240/510 snippets
  Translated 260/510 snippets
  Translated 280/510 snippets
  Translated 300/510 snippets
  Translated 320/510 snippets
  Translated 340/510 snippets
  Translated 360/510 snippets
  Translated 380/510 snippets
  Translated 400/510 snippets
  Translated 420/510 snippets
  Translated 440/510 snippets
  Translated 460/510 snippets
  Translated 480/510 snippets
  Translated 500/510 snippets
✓ Translation complete. Transcript length: 20140 chars


## STEP - 1b = Create chunks

In [6]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000 , chunk_overlap = 200)
chunks = splitter.create_documents([final_transcript])

In [7]:
len(chunks)

25

## Step- 1c , 1d = (Embedding generation and storing in vectorstore)

In [8]:
embedding = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')


In [9]:
vector_store = FAISS.from_documents(chunks , embedding)

In [10]:
vector_store.index_to_docstore_id

{0: 'd2496ec7-2d27-4437-8aa9-68a96d4b71da',
 1: '2c49fcfe-d0f9-42c2-9502-4ae0af3471ce',
 2: '6172db1b-e647-4993-8ab5-d1427bf01f9f',
 3: '6dd5087a-47f1-40b0-831c-4889f24dbe3e',
 4: 'a6d33bea-c664-4edb-b080-cc0ca887d03b',
 5: '174dcd46-da91-40ae-8727-334f2356c29c',
 6: '5e4ea2e6-8be6-4a09-977a-5ff4d63ff935',
 7: '4b60ec51-6c48-4b24-8bdd-5bbbd3620596',
 8: 'a93fdd9e-cb34-4050-b375-199509306c63',
 9: '9baefe89-cba4-468d-a695-27dac044ac0b',
 10: 'b63f97b2-6ac8-42da-b1bd-6ac131541beb',
 11: '787babed-ec76-404f-a1dd-58a6056a83a1',
 12: 'eda00299-d837-4f6f-87c8-cdf5b0b747bb',
 13: 'd95e1f34-8e3e-48bf-8db7-dd14fee91122',
 14: '8f4e76f0-cbdc-4f10-9847-83d5d1452c93',
 15: 'a0904e7e-acd2-4ac5-ae10-4ff468c47915',
 16: '84f98ded-a728-401e-be93-711b8284250e',
 17: 'a9790958-522c-4a99-99a7-d32380938bda',
 18: '3eb42660-fce7-4a44-bd95-316b7ac52c1f',
 19: '4bfd6e20-3ecd-4caf-8e7c-f6f4ec5e9ede',
 20: 'ebd0be2b-ac51-4297-9303-7387ec7dbed2',
 21: '2629afed-19a1-439d-b468-f1417b5b6a0a',
 22: 'f8023f31-cc06-

In [11]:
vector_store.get_by_ids(['a2f074af-6d2f-45bd-95c9-29456ca33b95'])

[]

## Step-2 = Retriever

In [12]:
retriever = vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={
        "k": 6,          # number of chunks sent to LLM
        "fetch_k": 20,   # pool to choose from
        "lambda_mult": 0.6
    }
)

In [13]:
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000018A379E20B0>, search_type='mmr', search_kwargs={'k': 6, 'fetch_k': 20, 'lambda_mult': 0.6})

In [14]:
retriever.invoke("What is the main summary of this video ?")

[Document(id='a30ae8d1-16b6-487a-821c-b8b8cf8cdabf', metadata={}, page_content="side of the triangle Now this is the same thing where 95% of students fail. They learn, they try, but Just for two days. Then he went back to his The old job looks like a new one. I watched the video that maybe they had some Have a new strategy, have a good strategy. That 's why I always say that goals don 't [Music] Make you rich. Systems make you rich. If your system is the same week, if you can get it If you can 't consistently do it, you You will never reach your goals. End third and last side of triangle This is the stage where But you 're exponential to your efforts Converts to growth. One page runs. The second page starts, the second begins with the third. started from the third to the fourth. That 's it. Exactly Where You Make the Most of Your Money. Now you don 't just have one thing in the last. Remember. AI is [music] All right. Next big thing. If only the use of AI Imagine if you could make peop

## Step-3 = Augmentation

In [15]:
from langchain_community.llms import Ollama

llm = Ollama(model="llama3")

  llm = Ollama(model="llama3")


In [16]:
prompt = PromptTemplate(
    template="""
You are a helpful, precise assistant answering questions using the provided context.

Rules:
- Base your answer ONLY on the context.
- Do NOT say "the transcript appears to" or similar meta phrases.
- Answer directly and confidently.
- If summarizing, extract key ideas and techniques.
- If answering a question, be specific and grounded.
- If the context is insufficient, say so clearly.

Context:
{context}

Question:
{question}

Answer:
""",
input_variables=['context' , 'question']
)

In [19]:
question = "What lesson did i get from this video?"

retrieved_docs = retriever.invoke(question)

In [20]:
retrieved_docs

[Document(id='f8023f31-cc06-4e0c-b63a-6621e88d977e', metadata={}, page_content="who just pocket Want to get some side for money So now you see all three methods. For. Theme Pages That Look for the Long Term Growth, skill cycling which is an asset A bicycle with a building. Freelancing Witch Is Kind of Your Instant cash flow [music] from where You can get money instantly. But now Here comes a very big catch of it. [Music] None of these methods worked until then. will do as long as the structure of wealth inside you that 's not right. That 's why I 'm doing all of my He gave the students a simple formula. The Wealth Triangle. No, there are three sides. Number one skill. For this specific video right now, you need to It is not necessary to be an expert in any skill. You just have to learn a micro-skill and AI. That skill will make you 10 times better. Then on the other side of the triangle Now this is the same thing where 95% of students fail. They learn, they try, but Just for two days. 

In [21]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)

In [22]:
context_text

"who just pocket Want to get some side for money So now you see all three methods. For. Theme Pages That Look for the Long Term Growth, skill cycling which is an asset A bicycle with a building. Freelancing Witch Is Kind of Your Instant cash flow [music] from where You can get money instantly. But now Here comes a very big catch of it. [Music] None of these methods worked until then. will do as long as the structure of wealth inside you that 's not right. That 's why I 'm doing all of my He gave the students a simple formula. The Wealth Triangle. No, there are three sides. Number one skill. For this specific video right now, you need to It is not necessary to be an expert in any skill. You just have to learn a micro-skill and AI. That skill will make you 10 times better. Then on the other side of the triangle Now this is the same thing where 95% of students fail. They learn, they try, but Just for two days. Then he went back to his The old job looks like a new one. I watched the video\

In [23]:
final_prompt = prompt.invoke({'context': context_text , "question": question})

## Step-4 = Generation

In [24]:
answer = llm.invoke(final_prompt)
print(answer)

Based on the context, I summarize that the main lesson is the importance of learning micro-skills and AI to create a digital product that can be sold online. The speaker emphasizes that one should not try to learn everything at once but rather focus on a specific skill and then move to another. They also highlight the power of AI in generating content and graphics, making it easier to create a social media post. Additionally, they mention the importance of scaling up a digital product and repeatedly learning new micro-skills to continue growing.


## Building Chain

In [25]:
from langchain_core.runnables import RunnableParallel , RunnablePassthrough , RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [26]:
def format_docs(retrieved_docs):
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

In [27]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [28]:
parallel_chain.invoke(
    "How can a shy person use these techniques to improve public speaking?")

{'context': "to ₹1 lakh and at this moment You may also need a small team. Now that 's the priming strategy, isn 't it? This is the same strategy that I used in many of my old ones. All the videos have been told. They've tried it, they're all over it. You 're making a lot of money. Open the comment section of my videos. You see, you get a lot of people like that. So, I can say with certainty I think this strategy actually works. Now that 's the last method. Let me tell you the best part. This is where you need an audience. No. This is where you need followers. No. This is where you need branding. No and nothing needs investment No. Majorly that will be your workload. He'll handle 70 to 80 percent of AI. You get money from the project instantly It 's only after they 've done it . Perfect for students who just pocket Want to get some side for money So now you see all three methods. For. Theme Pages That Look for the Long Term Growth, skill cycling which is an asset A bicycle with a build

In [29]:
main_chain = parallel_chain | prompt | llm

In [30]:
main_chain.invoke("What lesson do i get from this video ?")

'The main lesson that can be taken away from this video is the importance of learning micro-skills and using AI to create a digital product that can be sold online, leading to long-term growth and wealth. The video emphasizes the need to learn in detail and not just dabble in different skills, as well as the importance of using AI tools such as ChatGPT to generate content and graphics quickly and efficiently.'