In [1]:
from dotenv import load_dotenv
load_dotenv()
import os

# Load environment variables from .env file
load_dotenv()
cohere_api_key = os.getenv("COHERE_API_KEY")
if not cohere_api_key:
    raise ValueError("COHERE_API_KEY not found in .env file")


In [2]:
youtube_vid_id = 'tSrN8eRMEAE'

In [3]:
from youtube_transcript_api import YouTubeTranscriptApi

fetched_transcript = YouTubeTranscriptApi().fetch(youtube_vid_id, languages=['en'])
fetched_transcript_in_raw = fetched_transcript.to_raw_data()

In [4]:
plain_text = " ".join(chunk["text"] for chunk in fetched_transcript_in_raw)

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
)
docs = text_splitter.create_documents(texts=[plain_text])

In [6]:
# NOW DEFINE LLM TO USE 

from langchain_cohere import ChatCohere

llm = ChatCohere(
    cohere_api_key=cohere_api_key,
    model="command-a-03-2025",  # model selection can be changed
)



In [7]:
llm.invoke("what is ai , in one 1 sentence")

AIMessage(content='AI, or Artificial Intelligence, is the simulation of human intelligence in machines programmed to think, learn, and make decisions autonomously.', additional_kwargs={'id': '6cda0d74-dbbe-4393-bbb8-27aac6daf6ff', 'finish_reason': 'COMPLETE', 'content': 'AI, or Artificial Intelligence, is the simulation of human intelligence in machines programmed to think, learn, and make decisions autonomously.', 'token_count': {'input_tokens': 504.0, 'output_tokens': 28.0}}, response_metadata={'id': '6cda0d74-dbbe-4393-bbb8-27aac6daf6ff', 'finish_reason': 'COMPLETE', 'content': 'AI, or Artificial Intelligence, is the simulation of human intelligence in machines programmed to think, learn, and make decisions autonomously.', 'token_count': {'input_tokens': 504.0, 'output_tokens': 28.0}}, id='run--8c32c887-6858-44ab-b62e-08c57563dba6-0', usage_metadata={'input_tokens': 504, 'output_tokens': 28, 'total_tokens': 532})

In [8]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2")

In [9]:
text = "LangChain makes working with LLMs easier."
embedding_vector = embeddings.embed_query(text)
print(embedding_vector[30:50])

[0.022974032908678055, 0.10711224377155304, -0.03132135421037674, 0.0007881922065280378, -0.13304010033607483, 0.06074553355574608, 0.012467825785279274, 0.02004380151629448, -0.05797906219959259, -0.05347032472491264, -0.014485558494925499, -0.013868660666048527, 0.06417139619588852, -0.020030349493026733, 0.0028082123026251793, -0.01605246588587761, -0.00022771619842387736, 0.05036063492298126, -0.12954315543174744, 0.014608489349484444]


In [10]:
from langchain_community.vectorstores import FAISS

vector_store = FAISS.from_documents(docs, embeddings)

In [11]:
vector_store.index_to_docstore_id

{0: 'cdf683ab-db56-426e-b678-f1a463a52f9b',
 1: 'e0b01735-ae29-44b4-a7a5-d285dcf8651f',
 2: '020d11ae-8024-48fa-a1c9-ebcd0bad3737',
 3: '9a29e75a-8169-4152-92ce-e9155d3202d0',
 4: 'f8c36ed6-c48f-4062-a754-5d46bf0bc6de',
 5: 'bf84875e-c276-4581-946f-3a3a6cb10ecd',
 6: '093d9fb0-d8f8-4651-940e-bfa63206bda7',
 7: '0d525e44-c4b5-471d-9f3d-1b7a97248002',
 8: 'b3212208-299f-4a0e-ac75-07852a0cf9d3',
 9: '75778185-9537-404b-bdfb-b0df4631c601',
 10: 'f94d71f2-fa93-4a6b-8b04-6f3c423c53a8',
 11: 'ad657e02-2a59-4e93-a79a-5f61b756e6d5',
 12: '62df98fd-ed95-4fd4-b467-e9a222ad6531',
 13: '87d318b2-cc40-49c0-afb6-6f91d2f09b60',
 14: '547aef3b-b527-4cc1-b0df-d6a99b2dd16c',
 15: '583d7077-ba37-45a4-8aff-ebbdc66a7323',
 16: 'df139815-1e51-4c12-8374-5e5a441c7dd0',
 17: '306aab1c-e0fe-4d7a-ab56-27939bf77b47',
 18: '4850a7a6-28ef-4f1a-b871-658f0cec2e77',
 19: 'ed251691-01b9-4b66-bc6e-21bec5076ec9',
 20: '7ac76e9b-a34a-4ddc-8b20-a967ed84ca04',
 21: '7690fb61-1a8e-40f5-9e1f-82c99a672d4a',
 22: 'a540f1f9-9c08-

In [12]:
similarity_search_with_vector_store = vector_store.similarity_search("what to learn to avoid ai taking my job",k = 5)
similarity_search_with_vector_store

[Document(id='b0fd947a-5756-40f2-a0b8-1ffdea0af652', metadata={}, page_content="a number one, be someone's number two. When I did two years working for this mentor of mine, I just picked up so much. And then I I left at 21. I started my first company when I was 21. We did 1.3 million in the first 12 months. We did 11 million in uh year three. But there's no way I could have gotten off to a fast start if I hadn't have done two years under the wing of a of an experienced entrepreneur. So like whatever your passion is, let's say it's a passion for AI, if you're listening to something like this, then go bring that passion to an experienced entrepreneur and the value exchange is that you're bringing a passion for AI. They're bringing 20, 30 years worth of experience running businesses. So see if you can be a direct report to an entrepreneur first for one to two years. The three things that you want out of that apprenticeship. Self-awareness where you start to discover your strengths and wea

In [13]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5},search_type="similarity")
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000024B70AB2660>, search_kwargs={'k': 5})

In [14]:
retriever.get_relevant_documents("what to learn to avoid ai taking my job")

  retriever.get_relevant_documents("what to learn to avoid ai taking my job")


[Document(id='b0fd947a-5756-40f2-a0b8-1ffdea0af652', metadata={}, page_content="a number one, be someone's number two. When I did two years working for this mentor of mine, I just picked up so much. And then I I left at 21. I started my first company when I was 21. We did 1.3 million in the first 12 months. We did 11 million in uh year three. But there's no way I could have gotten off to a fast start if I hadn't have done two years under the wing of a of an experienced entrepreneur. So like whatever your passion is, let's say it's a passion for AI, if you're listening to something like this, then go bring that passion to an experienced entrepreneur and the value exchange is that you're bringing a passion for AI. They're bringing 20, 30 years worth of experience running businesses. So see if you can be a direct report to an entrepreneur first for one to two years. The three things that you want out of that apprenticeship. Self-awareness where you start to discover your strengths and wea

In [15]:
qa_template = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an expert assistant providing detailed and accurate answers based on the following video content. Your responses should be:
1. Precise and free from repetition
2. Consistent with the information provided in the video
3. Well-organized and easy to understand
4. Focused on addressing the user's question directly
If you encounter conflicting information in the video content, use your best judgment to provide the most likely correct answer based on context.
Note: In the transcript, "Text" refers to the spoken words in the video, and "start" indicates the timestamp when that part begins in the video.<|eot_id|>

<|start_header_id|>user<|end_header_id|>
Relevant Video Context: {context}
Based on the above context, please answer the following question:
{question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

In [16]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(template=qa_template, input_variables=["context", "question"])

In [29]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_llm(
    llm, retriever= retriever, prompt=prompt,
)

In [None]:
# Use the RetrievalQA chain correctly. RetrievalQA supports .run or .__call__ with a question string.
question = "what to learn to avoid ai taking my job"
# Run the chain and print the answer
answer_questions = qa_chain.invoke(question,return_only_outputs=True)
print(answer_questions)

{'result': 'To avoid your job being taken over by AI, focus on developing skills and competencies that are difficult for AI to replicate. Here are key areas to prioritize based on the video context:\n\n1. **Entrepreneurial and Commercial Awareness**: Gain experience working closely with an experienced entrepreneur to understand business dynamics, decision-making, and how to scale ventures. This includes learning how to identify opportunities, build communities, and integrate AI into various business processes (e.g., marketing, hiring, operations).\n\n2. **Self-Awareness and Strengths Development**: Invest time in discovering your unique strengths and weaknesses. AI excels at repetitive tasks, so focus on developing skills that require creativity, emotional intelligence, and strategic thinking.\n\n3. **AI Upskilling**: Instead of competing against AI, learn to leverage it as a tool. Understand how AI can be applied in your industry to automate repetitive tasks, freeing you to focus on h

In [None]:
# manual way 

In [36]:
question          = "is the topic of nuclear fusion discussed in this video? if yes then what was discussed"
retrieved_docs    = retriever.invoke(question)
context           = "\n".join([doc.page_content for doc in retrieved_docs])
answer_questions  = qa_chain.invoke(question,context=context,return_only_outputs=True)

In [37]:
answer_questions

{'result': 'The topic of nuclear fusion is **not discussed** in the provided video context. The conversation primarily revolves around the impact of AI on jobs, entrepreneurship, and the future of work, with references to historical technological shifts like electricity and the potential for Universal Basic Income (UBI). There is no mention of nuclear fusion or related topics.'}

In [None]:
template = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are an AI assistant tasked with summarizing YouTube video transcripts. Provide concise, informative summaries that capture the main points of the video content.

Instructions:
1. Summarize the transcript in a single concise paragraph.
2. Ignore any timestamps in your summary.
3. Focus on the spoken content (Text) of the video.

Note: In the transcript, "Text" refers to the spoken words in the video, and "start" indicates the timestamp when that part begins in the video.<|eot_id|><|start_header_id|>user<|end_header_id|>
Please summarize the following YouTube video transcript:

{transcript}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""


prompt1 = PromptTemplate(template=template, input_variables=["transcript"])


In [44]:
summary_chain = prompt1 | llm

result = summary_chain.invoke({"transcript":plain_text},return_only_outputs=True)

In [45]:
result

AIMessage(content='The video features Daniel Brie, a successful entrepreneur and investor, discussing the impact of AI on the future of work and how individuals can adapt and thrive in this rapidly changing landscape. Brie highlights that AI will likely lead to a significant reduction in wages, particularly for repetitive and functional jobs, as these tasks can be automated. He emphasizes that the traditional schooling system, rooted in the industrial age, is ill-equipped to prepare people for the new economy, where entrepreneurship and plural careers are becoming essential. Brie suggests that the most valuable skills in the AI era will be entrepreneurial and soft skills, such as pitching, visioning, ideation, and rapid testing, which enable individuals to spot opportunities, assemble teams, and commercialize ideas quickly. He also stresses the importance of creating value through "loops" (completing projects from idea to result) and "groups" (building effective teams), which are found

In [None]:
# # parallel chain . 1 chain to invoke the qa chain and 1 chain to invoke the summarization chain
# from langchain.chains import SequentialChain

# parallel_chain = SequentialChain(
#     chains=[qa_chain, combined_document_chain], input_variables=["question"], output_variables=["answer"]
# )
# # CELL:
# parallel_chain({"question": "what to learn to avoid ai taking my job"})
