In [2]:
import os
from dotenv import load_dotenv

load_dotenv()


# This is the YouTube video we're going to use.
#YOUTUBE_VIDEO = "https://www.youtube.com/watch?v=cdiD-9MMpb0"
#YOUTUBE_VIDEO = "https://www.youtube.com/watch?v=NiKtZgImdlY"
YOUTUBE_VIDEO = "https://www.youtube.com/watch?v=ufQEqi4LUZ4z"

In [4]:
from langchain_ollama.llms import OllamaLLM

from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()
model = OllamaLLM(model="llama2")

chain = model | parser


chain.invoke("What MLB team won the World Series during the COVID-19 pandemic?")


'During the COVID-19 pandemic, which began in 2020 and continued into the 2021 season, there was no MLB World Series played. The pandemic caused significant disruptions to the baseball season, including canceled games, modified schedules, and changes to the postseason format. As a result, there was no MLB team that won the World Series during this time period.'

In [5]:
from langchain.prompts import ChatPromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt.format(context="Mary's sister is Susana", question="Who is Mary's sister?")

'Human: \nAnswer the question based on the context below. If you can\'t \nanswer the question, reply "I don\'t know".\n\nContext: Mary\'s sister is Susana\n\nQuestion: Who is Mary\'s sister?\n'

In [6]:
chain = prompt | model | parser
chain.invoke({
    "context": "Mary's sister is Susana",
    "question": "Who is Mary's sister?"
})

"Of course! Based on the context provided, Mary's sister is Susana."

In [7]:
translation_prompt = ChatPromptTemplate.from_template(
    "Translate {answer} to {language}"
)

In [8]:
from operator import itemgetter

translation_chain = (
    {"answer": chain, "language": itemgetter("language")} | translation_prompt | model | parser
)

translation_chain.invoke(
    {
        "context": "Mary's sister is Susana. She doesn't have any more siblings.",
        "question": "How many sisters does Mary have?",
        "language": "Spanish",
    }
)

'\nSure! Here\'s the translation of "Based on the context provided, Mary has only one sister, which is Susana. Therefore, the answer to the question is \'1\'" in Spanish:\n\n"Based on the context provided, Mar√≠a solo tiene una hermana, que es Susana. Por lo tanto, la respuesta a la pregunta es \'1\'."\n\nI hope this helps! Let me know if you have any other questions.'

In [None]:
from pytubefix import YouTube
from pytubefix.cli import on_progress
 
#url = "https://www.youtube.com/watch?v=cdiD-9MMpb0"
#url = "https://www.youtube.com/watch?v=NiKtZgImdlY"
url = "https://www.youtube.com/watch?v=ufQEqi4LUZ4z"

 
yt = YouTube(url, on_progress_callback = on_progress)
print(yt.title)
 
ys = yt.streams.get_highest_resolution()
ys.download()

In [None]:
from moviepy.editor import VideoFileClip

# Load the downloaded video
#video = VideoFileClip("Andrej Karpathy Tesla AI, Self-Driving, Optimus, Aliens, and AGI  Lex Fridman Podcast #333.mp4")
#video = VideoFileClip("The danger of silence  Clint Smith  TED.mp4")
video = VideoFileClip("Mindset Reset Take Control of Your Mental Habits  The Mel Robbins Podcast.mp4")

# Extract audio and save it as an MP3 file

#audio_path = "c:\\Users\\ahmed\\RAGTRANS\\audio.mp3"
audio_path = "C:\\Users\\ahmed\Videos\\RAGTRANS\\podcast1H20.mp3"


video.audio.write_audiofile(audio_path)

print(f"Audio extracted and saved to: {audio_path}")


In [None]:
import whisper
import os

# Load the Whisper model
whisper_model = whisper.load_model("base")

# Use raw string literals for the audio file path
#audio_path = r"c:\Users\ahmed\RAGTRANS\audio.mp3"
#audio_path = r"C:\\Users\\ahmed\Videos\\RAGTRANS\\test.mp3"
audio_path = r"C:\\Users\\ahmed\Videos\\RAGTRANS\\podcast1H20.mp3"

# Check if the file exists
if os.path.exists(audio_path):
    print(f"Audio file found at: {audio_path}")
    
    # Transcribe the audio
    try:
        transcription = whisper_model.transcribe(audio_path, fp16=False)["text"].strip()
        
        # Print the transcription
        print("Transcription: ", transcription)

        # Save the transcription to a text file
        with open("transcription.txt", "w") as file:
            file.write(transcription)

    except Exception as e:
        print(f"An error occurred while transcribing: {e}")
else:
    print("Audio file not found.")


In [9]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document

# Read the transcription file (assuming it's in the current directory)
with open("transcription.txt", "r") as file:
    transcription_text = file.read()

# Create a Document object
document = Document(page_content=transcription_text)

# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)

# Split the transcription into chunks
chunks = text_splitter.split_documents([document])

# Display the first 5 chunks
for chunk in chunks[:5]:
    print(chunk.page_content)


Today we've got a crazy cool topic. We are talking about mindset. Your mind is either working for
either working for you or against you. That's what it's doing. So whether you're listening to this
listening to this episode because you struggle right now with overthinking or feeling unworthy or
feeling unworthy or maybe you have a really positive outlook but you just want to level up. You
to level up. You want to play a bigger game. That's where I am right now. So today you and I are


In [10]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document

# Step 1: Read the transcription file
with open("transcription.txt", "r") as file:
    transcription_text = file.read()

# Step 2: Create a Document object with the transcription content
document = Document(page_content=transcription_text, metadata={'source': 'transcription.txt'})

# Step 3: Initialize the text splitter with a chunk size of 1000 characters and 20 characters overlap
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)

# Step 4: Split the transcription into chunks
documents = text_splitter.split_documents([document])

# Step 5: Display the first few chunks
for chunk in documents[:5]:
    print(chunk.page_content)


Today we've got a crazy cool topic. We are talking about mindset. Your mind is either working for you or against you. That's what it's doing. So whether you're listening to this episode because you struggle right now with overthinking or feeling unworthy or maybe you have a really positive outlook but you just want to level up. You want to play a bigger game. That's where I am right now. So today you and I are going to get serious about making your mind work for you. Hey, it's your friend Mel and welcome to a mind bending and really cool episode of the Mel Robbins podcast. Okay, I wanted to just start today by saying thank you. Thank you, thank you, thank you to you. I often say that the Mel Robbins podcast is not my podcast, it's our podcast because this is a conversation between you and me and I wanted to start off by saying thank you because about 90 seconds ago, I got word that you have voted the Mel Robbins podcast as the most inspirational podcast of 2022. We have won the
We have

In [11]:

# Initialize the Llama2 embedding model
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model="llama2",
)
# Embed a query using Llama2
embedded_query = embeddings.embed_query("Who is Mary's sister?")

# Output the length and the first 10 elements of the embedding
print(f"Embedding length: {len(embedded_query)}")
print(embedded_query[:10])


Embedding length: 4096
[0.02356694, 0.005193902, 0.0052631935, 0.031144299, 0.005877835, -0.017635036, 0.0054636756, 0.0056269797, -0.0033483005, -0.005124399]


In [12]:
sentence1 = embeddings.embed_query("Mary's sister is Susana")
sentence2 = embeddings.embed_query("Pedro's mother is a teacher")

In [13]:
from sklearn.metrics.pairwise import cosine_similarity

query_sentence1_similarity = cosine_similarity([embedded_query], [sentence1])[0][0]
query_sentence2_similarity = cosine_similarity([embedded_query], [sentence2])[0][0]

query_sentence1_similarity, query_sentence2_similarity

(0.0030565065691788122, 0.5271359943533441)

In [14]:
import langchain_community
print(langchain_community.__version__)


0.3.13


In [20]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore1 = DocArrayInMemorySearch.from_texts(
    [
        "Mary's sister is Susana",
        "John and Tommy are brothers",
        "Patricia likes white cars",
        "Pedro's mother is a teacher",
        "Lucia drives an Audi",
        "Mary has two siblings",
    ],
    embedding=embeddings,
)

In [21]:
vectorstore1.similarity_search_with_score(query="Who is Mary's sister?", k=3)


[(Document(metadata={}, page_content="Pedro's mother is a teacher"),
  0.5271360416397188),
 (Document(metadata={}, page_content='Patricia likes white cars'),
  0.5088044373421162),
 (Document(metadata={}, page_content='Mary has two siblings'),
  0.48031196976187845)]

In [22]:
retriever1 = vectorstore1.as_retriever()
retriever1.invoke("Who is Mary's sister?")

[Document(metadata={}, page_content="Pedro's mother is a teacher"),
 Document(metadata={}, page_content='Patricia likes white cars'),
 Document(metadata={}, page_content='Mary has two siblings'),
 Document(metadata={}, page_content='John and Tommy are brothers')]

In [23]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

setup = RunnableParallel(context=retriever1, question=RunnablePassthrough())
setup.invoke("What color is Patricia's car?")

{'context': [Document(metadata={}, page_content='Patricia likes white cars'),
  Document(metadata={}, page_content='Mary has two siblings'),
  Document(metadata={}, page_content="Pedro's mother is a teacher"),
  Document(metadata={}, page_content='John and Tommy are brothers')],
 'question': "What color is Patricia's car?"}

In [24]:
chain = setup | prompt | model | parser
chain.invoke("What color is Patricia's car?")

"I don't know. The context you provided does not provide enough information to determine the color of Patricia's car."

In [25]:
chain.invoke("What car does Lucia drive?")


"I don't know the answer to your question as there is no information in the provided context about Lucia or her car."