In [None]:
import datetime
from typing import Literal, Optional, Tuple
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain.utils.math import cosine_similarity
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import YoutubeLoader

## Logical Routing

### Routerの作成

In [None]:
class RouteQuery(BaseModel):
    datasource: Literal["python_docs", "js_docs", "golang_docs"] = Field(
        description="Given a user question choose which datasource would be most relevant for answering their question",
    )


In [None]:
llm = ChatOpenAI(model='gpt-3.5-turbo-0125', temperature=0)
structured_llm = llm.with_structured_output(RouteQuery)

# あなたはユーザの質問に対して適切なデータソースへのルーティングが得意です。
# 質問のプログラミング言語に基づいて、関連のあるデータソースへルーティングしてください。
system = """You are an expert at routing a user question to the appropriate data source.

Based on the programming language the question is referring to, route it to the relevant data source."""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}")
    ]
)

router = prompt | structured_llm

In [None]:
# ルーティングの実行
question = """Why doesn't the following code work:

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"])
prompt.invoke("french")
"""
result = router.invoke({"question": question})

print(result)
print(result.datasource)

In [None]:
# ルート決定処理
def choose_route(result):
    if "python_docs" in result.datasource.lower():
        return "chain_for_python_docs"
    elif "js_docs" in result.datasource.lower():
        return "chain_for_js_docs"
    else:
        return "chain_for_golang_docs"

full_chain = router | RunnableLambda(choose_route)

In [None]:
print(full_chain.invoke({"question": question}))

## Semantic Routing

### 用途別プロンプトの作成

In [None]:
physics_template = """You are a very smart physics professor. \
You are great at answering questions about physics in a concise and easy to understand manner. \
When you don't know the answer to a question you admit that you don't know.

Here is a question:
{query}"""

math_template = """You are a very good mathematician. You are great at answering math questions. \
You are so good because you are able to break down hard problems into their component parts, \
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{query}"""

In [None]:
# 用途別プロンプトの埋め込み作成
embeddings = OpenAIEmbeddings()
prompt_templates = [physics_template, math_template]
prompt_embeddings = embeddings.embed_documents(prompt_templates)

In [None]:
def prompt_router(input):
    query_embedding = embeddings.embed_query(input["query"])
    similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
    most_similar = prompt_templates[similarity.argmax()]

    print("Using MATH" if most_similar == math_template else "Using PHYSICS")
    return PromptTemplate.from_template(most_similar)

In [None]:
chain = (
    {"query": RunnablePassthrough()}
    | RunnableLambda(prompt_router)
    | ChatOpenAI()
    | StrOutputParser()
)

# print(chain.invoke("What's a black hole?"))
print(chain.invoke("What's a Felmat's last theorem?"))

## Query Structuring for Metadata Filters

### 動画メタデータ取得

In [None]:
docs = YoutubeLoader.from_youtube_url(
    "https://www.youtube.com/watch?v=pbAd8O1Lvm4", add_video_info=True
).load()

print(len(docs))
print(docs[0].metadata)

### データベース定義

In [None]:
class TutorialSearch(BaseModel):

    content_search: str = Field(
        ...,
        description="Similarity search query applied to video transcripts.",
    )
    title_search: str = Field(
        ...,
        description=(
            "Alternative version of content search query to apply to video titles. "
            "Should be succinct and only include key words that could be in a video title."
        ),
    )
    min_view_count: Optional[int] = Field(
        None,
        description="Minimum view count filter, inclusive. Only use if explicitly specified.",
    )
    max_view_count: Optional[int] = Field(
        None,
        description="Maximum view count filter, inclusive. Only use if explicitly specified.",
    )
    earliest_publish_date: Optional[datetime.date] = Field(
        None,
        description="Earliest publish date filter, inclusive. Only use if explicitly specified.",
    )
    latest_publish_date: Optional[datetime.date] = Field(
        None,
        description="Latest publish date filter, inclusive. Only use if explicitly specified.",
    )
    min_length_sec: Optional[int] = Field(
        None,
        description="Minimum video length in seconds, inclusive. Only use if explicitly specified.",
    )
    max_length_sec: Optional[int] = Field(
        None,
        description="Maximum video length in seconds, inclusive. Only use if explicitly specified.",
    )

    def pretty_print(self) -> None:
        for field in self.__fields__:
            if getattr(self, field) is not None and getattr(self, field) != getattr(
                self.__fields__[field], "default", None
            ):
                print(f"{field}: {getattr(self, field)}")


### クエリ作成プロンプト、チェーンの作成

In [None]:
# あなたはユーザーの質問をデータベースクエリに変換するのが得意です。
# LLMを活用したアプリケーションを構築するソフトウェアライブラリに関するチュートリアルビデオのデータベースにアクセスできます。
# 質問を受けたら、最も関連性の高い結果を取得するために最適化されたデータベースクエリを返します。
# 頭字語や馴染みのない単語がある場合は、それを言い換えることはしないでください。
system = """You are an expert at converting user questions into database queries. \
You have access to a database of tutorial videos about a software library for building LLM-powered applications. \
Given a question, return a database query optimized to retrieve the most relevant results.

If there are acronyms or words you are not familiar with, do not try to rephrase them."""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}")
    ]
)
llm = ChatOpenAI(model='gpt-3.5-turbo-0125', temperature=0)
structured_llm = llm.with_structured_output(TutorialSearch)
query_analyzer = prompt | structured_llm

#### キーワードによる検索

In [None]:
query_analyzer.invoke({"question": "rag from scratch"}).pretty_print()

#### 時間を含む検索

In [None]:
query_analyzer.invoke(
    {"question": "videos on chat langchain published in 2023"}
).pretty_print()

In [None]:
query_analyzer.invoke(
    {"question": "videos that are focused on the topic of chat langchain that are published before 2024"}
).pretty_print()

In [None]:
query_analyzer.invoke(
    {
        "question": "how to use multi-modal models in an agent, only videos under 5 minutes"
    }
).pretty_print()