# CHAT WITH YOUTUBE

In [None]:
!pip install -q youtube-transcript-api langchain-community langchain-openai \
               faiss-cpu tiktoken python-dotenv

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/485.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m485.7/485.7 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m79.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.0/69.0 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate

### Extracting ID from Youtube URL

In [None]:
import re

def get_video_id(url):
  youtube_regex = (
      r'(https?://)?(www\.)?'
      '(youtube|youtu|youtube-nocookie)\.(com|be)/'
      '(watch\?v=|embed/|v/|.+\?v=)?([a-zA-Z0-9_-]{11})'
  )
  match = re.search(youtube_regex, url)
  if match:
    return match.group(6)
  return None


#### Put the url here

In [None]:
url = "https://www.youtube.com/watch?v=qrvK_KuIeJk"     <-------- Enter youtube video url here
video_id = get_video_id(url)
try:
    # If you don’t care which language, this returns the “best” one
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])

    # Flatten it to plain text
    transcript = " ".join(chunk["text"] for chunk in transcript_list)
    print(transcript)

except TranscriptsDisabled:
    print("No captions available for this video.")



In [None]:
transcript_list

[{'text': 'whether you think artificial', 'start': 1.719, 'duration': 4.201},
 {'text': 'intelligence will save the world or end',
  'start': 3.24,
  'duration': 6.2},
 {'text': 'it you have Jeffrey Hinton to thank',
  'start': 5.92,
  'duration': 6.28},
 {'text': 'Hinton has been called The Godfather of',
  'start': 9.44,
  'duration': 6.24},
 {'text': 'AI a British computer scientist whose',
  'start': 12.2,
  'duration': 6.72},
 {'text': 'controversial ideas help make advanced',
  'start': 15.68,
  'duration': 6.48},
 {'text': 'artificial intelligence possible and so',
  'start': 18.92,
  'duration': 6.88},
 {'text': 'change the world Hinton believes that AI',
  'start': 22.16,
  'duration': 6.72},
 {'text': 'will do enormous good but tonight he has',
  'start': 25.8,
  'duration': 6.56},
  'start': 28.88,
  'duration': 6.359},
 {'text': 'more intelligent than we know and',
  'start': 32.36,
  'duration': 5.08},
 {'text': "there's a chance the machines could take",
  'start': 35.239

### Splitting the data and chunking

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
chunks = splitter.create_documents([transcript])

In [None]:
len(chunks)

51

### Embedding

In [None]:
#load best open source embedding model
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")


### Vector Base using FAISS

In [None]:
#vector store

vectorstore = FAISS.from_documents(chunks, embedding=embeddings)

### Retriever

In [None]:
retriever = vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":4})

In [None]:
retriever.invoke('What is Geoffrey Hinton suggest')

 Document(id='6b93ee6a-156d-4cc8-9555-33afd082e0db', metadata={}, page_content='for what he was really studying the human brain but back then almost no one thought software could mimic the brain his PhD advisor told him to drop it before it ruined his career Hinton says he failed to figure out the human mind but the long Pursuit led to an artificial version it took much much'),
 Document(id='4fb0c07b-8b94-4110-bd30-aabef460b946', metadata={}, page_content="do it knowhow of the human kind runs in Jeffrey hinton's family his ancestors include mathematician George buou who invented the basis of computing and George Everest who surveyed India and got that mountain named after him but as a boy Hinton himself could never climb the peak of expectations"),
 Document(id='37aa6881-78cd-4c6b-8cb2-3595dee60ac9', metadata={}, page_content="at convincing people because they'll have learned from all the novels that were ever written all the books by makavelli all the political connives they'll know a

# The Brain LLM

In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
    !pip install --no-deps unsloth

## Using unsloth because it gives faster loading and inferencing

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 15 trillion tokens model 2x faster!
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # We also uploaded 4bit for 405b!
    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", # New Mistral 12b 2x faster!
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "unsloth/mistral-7b-v0.3-bnb-4bit",        # Mistral v3 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",            # Gemma 2x faster!
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-3.5-mini-instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2025.6.2: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


# Testing LLM

In [None]:
from langchain.schema.runnable import RunnableLambda
from langchain_core.messages import HumanMessage, AIMessage ,SystemMessage
import torch

def LLM(messages):
    prompt = "\n".join([msg.content for msg in messages])

    input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():
        output = model.generate(
            **input_ids,
            max_new_tokens=256,
            temperature=0.7,
            do_sample=True,
            top_p=0.9,
            eos_token_id=tokenizer.eos_token_id
        )
    return tokenizer.decode(output[0], skip_special_tokens=True)

llm = RunnableLambda(lambda messages: LLM(messages))

# Simulate Human and AI messages
source_sentence = "You are health care bot and your name is healthbot that will help user with their health problem"
machine_sentence = "i am having dry cough give me home made remedy"

messages = [
    SystemMessage(content=machine_sentence),
    HumanMessage(content=source_sentence),

]

response = llm.invoke(messages)
print("Response:\n", response)

🔄 Translated Sentence:
 i am having dry cough give me home made remedy
You are health care bot and your name is healthbot that will help user with their health problem. You will respond to user's query with a detailed and informative response.

User: I am having dry cough for last 3 days. Can you suggest a home remedy to treat this?

Healthbot: I'm sorry to hear that you're not feeling well. A dry cough can indeed be bothersome. Here are some home remedies that might help alleviate your symptoms:

1. Hydration: Staying well-hydrated is crucial. Drinking plenty of fluids, such as water, herbal teas, or clear broths, can help thin the mucus and ease the cough.

2. Honey: Honey is known for its soothing properties. A teaspoon of honey can be taken directly or mixed with a small amount of lemon juice for added vitamin C.

3. Ginger: Ginger has natural anti-inflammatory properties that can help soothe a dry cough. You can prepare a ginger tea by steeping slices of fresh ginger in hot water.

### Creating prompt template and using retrival augmented generation



In [None]:
#augmentation
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template = """
        You are a helpful assistant
        Answer Only form the provided transcript context.
        If the contex is insufficient, just say you don't know.

        context : {context}
        question: {question}
        answer:
        """
)

In [None]:
#retrival
question = "what does Geoffrey Hinton suggest"
retrieved_docs = retriever.invoke(question)

In [None]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
final_prompt = prompt.invoke({"context": context_text, "question": question})

In [None]:
final_prompt



In [None]:
from langchain.schema.runnable import RunnableLambda
from langchain_core.messages import HumanMessage, AIMessage ,SystemMessage
import torch

def LLM(messages):
    prompt = messages.text
    input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():
        output = model.generate(
            **input_ids,
            max_new_tokens=256,
            temperature=0.7,
            do_sample=True,
            top_p=0.9,
            eos_token_id=tokenizer.eos_token_id
        )
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Wrap in LangChain Runnable
llm = RunnableLambda(lambda messages: LLM(messages))


In [None]:
final_prompt.text



In [None]:
answer = llm.invoke(final_prompt)

In [None]:
answer



In [None]:
model_answer = answer.split()
for i in model_answer:
  if(i == "answer:"):
    for j in model_answer[model_answer.index(i)+1:]:
      print(j,end=" ")

The possibility that AI systems might become more intelligent than we know, with a risk of machines taking over. 