# Setup

In [1]:
from dotenv import load_dotenv

In [2]:
_ = load_dotenv()

In [3]:
YOUTUBE_VIDEO = "https://www.youtube.com/watch?v=cdiD-9MMpb0"

# First Simple Chain

In [4]:
from langchain_openai.chat_models import ChatOpenAI

In [5]:
# OPENAI_API_KEY is automatically sourced
model = ChatOpenAI(model="gpt-3.5-turbo")

# Example of query
model.invoke("What MLB team won the World Series during the COVID-19 pandemic?")

AIMessage(content='The Los Angeles Dodgers won the World Series during the COVID-19 pandemic in 2020.', response_metadata={'token_usage': {'completion_tokens': 19, 'prompt_tokens': 21, 'total_tokens': 40}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-894f9501-5c2c-435a-a79f-7503da8f08f6-0', usage_metadata={'input_tokens': 21, 'output_tokens': 19, 'total_tokens': 40})

# Output Parser

In [6]:
from langchain_core.output_parsers import StrOutputParser

In [7]:
parser = StrOutputParser()

# 2-steps chain
chain = model | parser

chain.invoke("What MLB team won the World Series during the COVID-19 pandemic?")

'The Los Angeles Dodgers won the World Series during the COVID-19 pandemic in 2020.'

# Chat Prompt Template

In [8]:
from langchain.prompts import ChatPromptTemplate

In [9]:
template = """
Answer the question based on the context below. If you can't \
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

print(template)

prompt = ChatPromptTemplate.from_template(template)
prompt.format(context="Mary's sister is Susana.", question="Who is Mary's sister?")


Answer the question based on the context below. If you can't answer the question, reply "I don't know".

Context: {context}

Question: {question}



'Human: \nAnswer the question based on the context below. If you can\'t answer the question, reply "I don\'t know".\n\nContext: Mary\'s sister is Susana.\n\nQuestion: Who is Mary\'s sister?\n'

In [10]:
# 3-steps chain
chain = prompt | model | parser

chain.invoke({
    "context": "Mary's sister is Susana",
    "question": "Who is Mary's sister?"
})

'Susana'

# Combining Chains

In [11]:
translation_prompt = ChatPromptTemplate.from_template(
    "Translate {answer} to {language}"
)

In [12]:
from operator import itemgetter

In [13]:
translation_chain = (
    {"answer": chain, "language": itemgetter("language")}
    | translation_prompt
    | model
    | parser
)

In [14]:
# # WEIRD BUG, SEE LATER...
# translation_chain.invoke(
#     {
#         "context": "Mary's sister is Susana. She doesn't have any more siblings.",
#         "question": "How many sisters does Mary have?",
#         "language": "German",
#     }
# )

# Transcribing the YouTube Video

In [15]:
import os
import tempfile

import whisper
from pytube import YouTube

> **NOTE**
> 
> In order to run the following script, you need to have the `ffmpeg` library installed on your OS, which, on Linux, will require:
> ```bash
> sudo apt update
> sudo apt install ffmpeg
> ```

In [17]:
if not os.path.exists("transcription.txt"):
    youtube = YouTube(YOUTUBE_VIDEO)
    audio = youtube.streams.filter(only_audio=True).first()

    # Let's load the base model. This is not the most accurate
    # model but it's fast.
    whisper_model = whisper.load_model("base")

    with tempfile.TemporaryDirectory() as tmpdir:
        file = audio.download(output_path=tmpdir)
        transcription = whisper_model.transcribe(file, fp16=False)["text"].strip()

        with open("transcription.txt", "w") as file:
            file.write(transcription)