In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
YOUTUBE_VIDEO = "https://www.youtube.com/watch?v=cdiD-9MMpb0"

In [3]:
from langchain_openai.chat_models import ChatOpenAI

model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo")

In [None]:
model.invoke("What is the meaning of life?")

In [None]:
model.invoke("How much is 2 + 2?")

In [None]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser
chain.invoke("What is the meaning of life?")


In [None]:
from langchain.prompts import ChatPromptTemplate

template = """
Answer the question based on the contect below. If you can't answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""    

prompt = ChatPromptTemplate.from_template(template)
prompt.format(context="Mary's favorite food is pizza.", question="What is Mary's favorite food?")

In [None]:
chain = prompt | model | parser
chain.invoke({
    "context":"Mary's favorite food is pizza.",
    "question":"What is Mary's favorite food?"
    })

In [9]:
translation_prompt = ChatPromptTemplate.from_template(
    "Translate {answer} to {language}"
)

In [None]:
from operator import itemgetter

# First, answer the question in the specified language
initial_translation_chain = (
    {"answer": chain, "language": itemgetter("language")} | translation_prompt | model | parser
)

initial_answer = initial_translation_chain.invoke({
    "context": "Mary's sister is Susana. She does not have any more siblings.",
    "question": "How many siblings does Mary have?",
    "language": "Polish"
})

# Now, translate the already translated answer to another language
sec_translation_prompt = ChatPromptTemplate.from_template(
    "Translate {answer} to {language} and add info from what language are you translating from"
)

sec_translation_chain = (
    {"answer": itemgetter("answer"), "language": itemgetter("language")} | sec_translation_prompt | model | parser
)

final_translation = sec_translation_chain.invoke({
    "answer": initial_answer,
    "language": "English"
})

print(final_translation)
print(initial_answer)

In [29]:
from operator import itemgetter

# First, answer the question in the specified language
initial_translation_chain = (
    {"answer": chain, "language": itemgetter("language")} | translation_prompt | model | parser
)

initial_answer = initial_translation_chain.invoke({
    "context": "Mary's sister is Susana. She does not have any more siblings.",
    "question": "How many siblings does Mary have?",
    "language": "Polish"
})

In [30]:
import tempfile
import whisper
from pytube import YouTube  

if not os.path.exists("transcription.txt"):
    youtube = YouTube(YOUTUBE_VIDEO)
    audio = youtube.streams.filter(only_audio=True).first()
    whisper_model = whisper.load_model("base")

    with tempfile.TemporaryDirectory() as tmpdir:
        file = audio.download(output_path=tmpdir)
        transcription = whisper_model.transcribe(file, fp16=False)["text"].strip()

        with open("transcription.txt", "w") as f:
            f.write(transcription)


In [None]:
with open("transcription.txt", "r") as f:
    transcription = f.read()
transcription[:100]

In [None]:
try:
    chain.invoke({
        "context": transcription,
        "question": "Do aliens exist?"
    })
except Exception as e:
    print(e)

In [None]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("transcription.txt")
text_documents = loader.load()
text_documents

In [50]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
documents = text_splitter.split_documents(text_documents)

In [None]:
from langchain_openai.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
embedded_query = embeddings.embed_query("Do aliens exist?")

print(f"Embedding length: {len(embedded_query)}")
print(embedded_query[:10])

In [53]:
sentence1 = embeddings.embed_query("Are we alone in the universe?")
sentence2 = embeddings.embed_query("Are neural networks like human brains?")

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

query_sentence1_similarity = cosine_similarity([embedded_query], [sentence1])[0][0]
query_sentence2_similarity = cosine_similarity([embedded_query], [sentence2])[0][0]

query_sentence1_similarity, query_sentence2_similarity

In [64]:
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_core.runnables import RunnablePassthrough, RunnableParallel

vectorstore = DocArrayInMemorySearch.from_documents(documents, embeddings)


In [None]:
setup = RunnableParallel(context = vectorstore.as_retriever(), question = RunnablePassthrough())

chain = setup | prompt | model | parser
chain.invoke("What is AGI?")

In [None]:
chain = (
    {"context": vectorstore.as_retriever(), "question": RunnablePassthrough()} 
    | prompt 
    | model 
    | parser
)
chain.invoke("Are we alone in the universe?")