# Imports

In [1]:
import os
from langchain_core.output_parsers import StrOutputParser
from langchain.llms import Ollama
from langchain.prompts import ChatPromptTemplate

import tempfile
import whisper
from pytube import YouTube

from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings

from langchain_core.runnables import RunnableParallel, RunnablePassthrough

from langchain_experimental.text_splitter import SemanticChunker
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document

YOUTUBE_VIDEO = "https://youtu.be/T-D1OfcDW1M"


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
model = Ollama(model="hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF")

response = model.invoke("hi")

print(response)


How are you today? Is there something I can help you with or would you like to chat?


In [5]:
parser = StrOutputParser()

chain = model | parser 
chain.invoke('Hi')

'How are you today? Is there something I can help you with or would you like to chat?'

In [6]:
template = """
You are a helpful assistant. Use the provided context to answer the question as accurately as possible.
If the answer cannot be found in the context, respond with "I don't know".

Context:
{context}

Question:
{question}

Answer:"""

prompt = ChatPromptTemplate.from_template(template)


prompt.format(context="Komai's brother is Obai", question="Who is Komai's brother?")

'Human: \nYou are a helpful assistant. Use the provided context to answer the question as accurately as possible.\nIf the answer cannot be found in the context, respond with "I don\'t know".\n\nContext:\nKomai\'s brother is Obai\n\nQuestion:\nWho is Komai\'s brother?\n\nAnswer:'

In [9]:
chain = prompt | model | parser

chain.invoke({
    "context": "Komai's brother is Obai",
    "question": "Who is Komai's brother?"
})

'Obai'

In [7]:
import yt_dlp

output_path = "audio.%(ext)s"  

ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': output_path,
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',
        'preferredquality': '192',
    }],
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([YOUTUBE_VIDEO])

[youtube] Extracting URL: https://youtu.be/T-D1OfcDW1M
[youtube] T-D1OfcDW1M: Downloading webpage




[youtube] T-D1OfcDW1M: Downloading initial data API JSON
[youtube] T-D1OfcDW1M: Downloading tv client config
[youtube] T-D1OfcDW1M: Downloading player 4b357d1b-tv
[youtube] T-D1OfcDW1M: Downloading tv player API JSON
[youtube] T-D1OfcDW1M: Downloading ios player API JSON
[youtube] T-D1OfcDW1M: Downloading web player API JSON




[youtube] T-D1OfcDW1M: Downloading m3u8 information
[info] Testing format 234


[download] Got error: HTTP Error 403: Forbidden


[info] Testing format 233


[download] Got error: HTTP Error 403: Forbidden


[info] T-D1OfcDW1M: Downloading 1 format(s): 18
[download] Destination: audio.mp4
[download] 100% of   10.23MiB in 00:00:08 at 1.25MiB/s   
[ExtractAudio] Destination: audio.mp3
Deleting original file audio.mp4 (pass -k to keep)


In [9]:
# Whisper model ( "base", "small", "medium", or "large")
model = whisper.load_model("medium")



100%|█████████████████████████████████████| 1.42G/1.42G [08:55<00:00, 2.85MiB/s]


In [10]:
# Transcribe the audio file
result = model.transcribe("audio.mp3")

with open("transcription.txt", "w", encoding="utf-8") as f:
    f.write(result["text"])

# Print the transcript
print(result["text"])



 Large language models, they are everywhere. They get some things amazingly right and other things very interestingly wrong. My name is Marina Danilevsky. I am a senior research scientist here at IBM Research, and I want to tell you about a framework to help large language models be more accurate and more up-to-date. Retrieval Augmented Generation, or RAG. Let's just talk about the generation part for a minute. So, forget the retrieval augmented. So, the generation, this refers to large language models, or LLMs, that generate text in response to a user query referred to as a prompt. These models can have some undesirable behavior. I want to tell you an anecdote to illustrate this. So, my kids, they recently asked me this question. In our solar system, what planet has the most moons? And my response was, oh, that's really great that you're asking me this question. I loved space when I was your age. Of course, that was like 30 years ago. But I know this. I read an article, and the articl

In [None]:
# # clear whisper cache

# import shutil
# import os

# cache_dir = os.path.expanduser("~/.cache/whisper")
# if os.path.exists(cache_dir):
#     shutil.rmtree(cache_dir)

In [12]:
embedder = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")


  embedder = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")


In [13]:
chunker = SemanticChunker(
    embeddings=embedder,
    buffer_size=1,
    breakpoint_threshold_type="percentile",
    breakpoint_threshold_amount=95.0, 
    min_chunk_size=50 
)

raw = open("transcription.txt", encoding="utf-8").read()
docs = [Document(page_content=raw)]

# تقسيم دلالي
semantic_chunks = chunker.create_documents([raw])  # أو split_documents(docs)

print(f"عدد القطع الدلالية: {len(semantic_chunks)}")
print(semantic_chunks[0].page_content)

  return forward_call(*args, **kwargs)


عدد القطع الدلالية: 5
 Large language models, they are everywhere. They get some things amazingly right and other things very interestingly wrong. My name is Marina Danilevsky. I am a senior research scientist here at IBM Research, and I want to tell you about a framework to help large language models be more accurate and more up-to-date. Retrieval Augmented Generation, or RAG. Let's just talk about the generation part for a minute. So, forget the retrieval augmented. So, the generation, this refers to large language models, or LLMs, that generate text in response to a user query referred to as a prompt. These models can have some undesirable behavior.


In [14]:
db = FAISS.from_documents(semantic_chunks,embedding=embedder)
db.save_local("faiss.index")


In [15]:
def fetch_context(question):
    return "\n\n".join([
        doc.page_content for doc in db.similarity_search(question, k=4)
    ])

setup = RunnableParallel({
    "question": RunnablePassthrough(),
    "context": fetch_context
})


In [16]:
chain = setup | prompt | model | parser

In [22]:
chain.invoke("What are Large Language Models?")

  return forward_call(*args, **kwargs)


"Large language models (LLMs) are a type of artificial intelligence that can process and generate human-like text based on input from users, such as questions or prompts. They have been trained on vast amounts of data and use complex algorithms to understand the context and intent behind the user's query.\n\nIn this specific scenario, Large Language Models get some things amazingly right and other things very interestingly wrong. The question was about what Large Language Models are, but the response didn't accurately convey their capabilities or limitations.\n\nI don't know."

In [24]:
chain.invoke("what planet has the most moons?")

  return forward_call(*args, **kwargs)


'Saturn.'