In [278]:
# ingestion: Metadata Enrichment
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
import getpass

OPENAI_API_KEY = getpass.getpass('Enter your OPENAI_API_KEY')

Enter your OPENAI_API_KEY ········


In [279]:
uk_with_metadata_collection = Chroma(
    collection_name = "uk_with_metadata_collection",
    embedding_function = OpenAIEmbeddings(openai_api_key = OPENAI_API_KEY)
)

uk_with_metadata_collection.reset_collection() #incase it already exists

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [280]:
#Define ingestion content and splitting strategy
from langchain_community.document_loaders import AsyncHtmlLoader
from langchain_community.document_transformers import Html2TextTransformer
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document

html2text_transformer = Html2TextTransformer()

text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000, chunk_overlap=100
    )

def split_docs_into_chunks(docs):
    text_docs = html2text_transformer.transform_documents(docs) #transform HTML docs into clean text docs
    chunks = text_splitter.split_documents(text_docs)

    return chunks

uk_destinations = [
    ("Cornwall", "Cornwall"), ("North_Cornwall", "Cornwall"),
    ("South_Cornwall", "Cornwall"), ("West_Cornwall", "Cornwall"),
    ("Tintagel", "Cornwall"), ("Bodmin", "Cornwall"), ("Wadebridge", "Cornwall"),
    ("Penzance", "Cornwall"), ("Newquay", "Cornwall"), ("St_Ives", "Cornwall"),
    ("Port_Isaac", "Cornwall"), ("Looe", "Cornwall"), ("Polperro", "Cornwall"),
    ("Porthleven", "Cornwall"),
    ("East_Sussex", "East_Sussex"), ("Brighton", "East_Sussex"),
    ("Battle", "East_Sussex"), ("Hastings_(England)", "East_Sussex"),
    ("Rye_(England)", "East_Sussex"), ("Seaford", "East_Sussex"),
    ("Ashdown_Forest", "East_Sussex")
]

wikivoyage_root_url = "https://en.wikivoyage.org/wiki"

uk_destination_url_with_metadata = [ #C
    ( f'{wikivoyage_root_url}/{destination}', destination, region)
    for destination, region in uk_destinations]

In [281]:
uk_destination_url_with_metadata

[('https://en.wikivoyage.org/wiki/Cornwall', 'Cornwall', 'Cornwall'),
 ('https://en.wikivoyage.org/wiki/North_Cornwall',
  'North_Cornwall',
  'Cornwall'),
 ('https://en.wikivoyage.org/wiki/South_Cornwall',
  'South_Cornwall',
  'Cornwall'),
 ('https://en.wikivoyage.org/wiki/West_Cornwall', 'West_Cornwall', 'Cornwall'),
 ('https://en.wikivoyage.org/wiki/Tintagel', 'Tintagel', 'Cornwall'),
 ('https://en.wikivoyage.org/wiki/Bodmin', 'Bodmin', 'Cornwall'),
 ('https://en.wikivoyage.org/wiki/Wadebridge', 'Wadebridge', 'Cornwall'),
 ('https://en.wikivoyage.org/wiki/Penzance', 'Penzance', 'Cornwall'),
 ('https://en.wikivoyage.org/wiki/Newquay', 'Newquay', 'Cornwall'),
 ('https://en.wikivoyage.org/wiki/St_Ives', 'St_Ives', 'Cornwall'),
 ('https://en.wikivoyage.org/wiki/Port_Isaac', 'Port_Isaac', 'Cornwall'),
 ('https://en.wikivoyage.org/wiki/Looe', 'Looe', 'Cornwall'),
 ('https://en.wikivoyage.org/wiki/Polperro', 'Polperro', 'Cornwall'),
 ('https://en.wikivoyage.org/wiki/Porthleven', 'Porthlev

In [282]:
#Ingest content with metadata
#Enriching chunks with related metadata
for (url, destination, region) in uk_destination_url_with_metadata:
    html_loader = AsyncHtmlLoader(url)
    docs = html_loader.load()

    docs_with_metadata = [
        Document(page_content=d.page_content,
                 metadata = {
                     'source': url,
                     'destination': destination,
                     'region': region})
                 for d in docs]
    chunks = split_docs_into_chunks(docs_with_metadata)

    print(f'Importing: {destination}')
    uk_with_metadata_collection.add_documents(documents=chunks)

Fetching pages: 100%|##############################################################################################################################| 1/1 [00:00<00:00,  1.85it/s]


Importing: Cornwall


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.27s/it]


Importing: North_Cornwall


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.04s/it]


Importing: South_Cornwall


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.27s/it]


Importing: West_Cornwall


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.51s/it]


Importing: Tintagel


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.17s/it]


Importing: Bodmin


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.39s/it]


Importing: Wadebridge


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.43s/it]


Importing: Penzance


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:02<00:00,  2.09s/it]


Importing: Newquay


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.19s/it]


Importing: St_Ives


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.41s/it]


Importing: Port_Isaac


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:00<00:00,  1.10it/s]


Importing: Looe


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.17s/it]


Importing: Polperro


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.20s/it]


Importing: Porthleven


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.64s/it]


Importing: East_Sussex


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:05<00:00,  5.93s/it]


Importing: Brighton


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:00<00:00,  1.19it/s]


Importing: Battle


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:02<00:00,  2.36s/it]


Importing: Hastings_(England)


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.86s/it]


Importing: Rye_(England)


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.07s/it]


Importing: Seaford


Fetching pages: 100%|##############################################################################################################################| 1/1 [00:01<00:00,  1.41s/it]


Importing: Ashdown_Forest


In [283]:
# Q&A on Metadata-Enriched Collection
# Querying with an explicit metadata filter
question = "Events or festivals"
metadata_retriever = uk_with_metadata_collection.as_retriever(search_kwargs={'k':2, 'filter':{'destination': 'Newquay'}})

result_docs = metadata_retriever.invoke(question)


Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


In [284]:
# When you print the result, you will see that only chunks tagged with 'destination: Newquay' are returned. Pretty neat!!
result_docs

[Document(id='5f0d3398-0593-4bc3-8255-cf6bc80874a9', metadata={'destination': 'Newquay', 'region': 'Cornwall', 'source': 'https://en.wikivoyage.org/wiki/Newquay'}, page_content="## Do\n\n[edit]\n\n  * Cornish Film Festival. Held annually for two weeks each November around Newquay. (updated Jan 2024)\n  * 50.415741-5.0914781 Newquay Golf Club, Tower Road, TR7 1LT, ☏ +44 1637 872091, info@newquaygolfclub.co.uk. 9AM-4PM. A semi-private golf club established in 1890. Total yardage Championship: 6141, Men: 5708, and Women: 5364. £31 for non-members. (updated Apr 2019)\n\n### Beaches\n\n[edit]\n\nFistral Beach\n\nNewquay is well known as a surfer's paradise. Therefore it offers plenty of\nbeaches:"),
 Document(id='2f170179-3129-44ba-be48-9c98d37881d6', metadata={'destination': 'Newquay', 'region': 'Cornwall', 'source': 'https://en.wikivoyage.org/wiki/Newquay'}, page_content='Hidden categories:\n\n  * Has custom banner\n  * Has map markers\n  * Airport listing\n  * Has mapframe\n  * Do listin

In [285]:
#Automatically generating metadata filter with selfqueryretriever
#importing necessary libraries

from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever 
from langchain_openai import ChatOpenAI


In [286]:
#Next, define the metadata attributes to infer from the question:
metadata_field_info = [
    AttributeInfo(
        name="destination",
        description="The specific UK destination to be searched",
        type="string",
    ),
    AttributeInfo(
        name="region",
        description="The name of the UK region to be searched",
        type="string",
        )
]

In [23]:
# Now set up the SelfQueryRetriever with question, without specifying a manual filter. 
question = "Tell me about events or festivals in the UK town of Newquay"

llm = ChatOpenAI(model="gpt-4o-mini", openai_api_key=OPENAI_API_KEY)

self_query_retriever = SelfQueryRetriever.from_llm(
    llm, uk_with_metadata_collection, question, metadata_field_info, verbose=True)


In [288]:
#testing with Local LLM
llm = ChatOpenAI(
    model="llama-2-7b-chat",  # or the exact name from /v1/models
    openai_api_base="http://localhost:1234/v1",
    openai_api_key="lm-studio"  # dummy
)

self_query_retriever = SelfQueryRetriever.from_llm(
    llm, uk_with_metadata_collection, question, metadata_field_info, verbose=True)



In [290]:
result_example = llm.invoke("Things to do in South Korea")
print(result_example)

content="\nSouth Korea is a fascinating country with a rich culture and history. Here are some things you can do while visiting South Korea:\n\n1. Visit the Demilitarized Zone (DMZ): The DMZ is a buffer zone between North and South Korea, and it offers a unique glimpse into the country's complex history. You can see the border between the two countries, as well as various monuments and museums.\n2. Explore Seoul's historic neighborhoods: Seoul has several historic neighborhoods that are worth exploring, such as Bukchon Hanok Village, which is home to traditional Korean houses called hanoks, and Gyeongbokgung Palace, which was built in the 14th century and offers a glimpse into Korea's royal past.\n3. Enjoy Korean cuisine: Korean food is known for its bold flavors and spices, and there are many restaurants throughout the country that serve delicious dishes such as bibimbap (mixed rice bowl) and kimchi (spicy fermented cabbage).\n4. Visit temples and shrines: Korea has a rich spiritual h

In [24]:
#Invike the retriever with the question
result_docs = self_query_retriever.invoke(question)

In [25]:
result_docs

[Document(id='5ee83bb7-42e9-4200-8438-1bfe0712ed81', metadata={'destination': 'Newquay', 'region': 'Cornwall', 'source': 'https://en.wikivoyage.org/wiki/Newquay'}, page_content="## Do\n\n[edit]\n\n  * Cornish Film Festival. Held annually for two weeks each November around Newquay. (updated Jan 2024)\n  * 50.415741-5.0914781 Newquay Golf Club, Tower Road, TR7 1LT, ☏ +44 1637 872091, info@newquaygolfclub.co.uk. 9AM-4PM. A semi-private golf club established in 1890. Total yardage Championship: 6141, Men: 5708, and Women: 5364. £31 for non-members. (updated Apr 2019)\n\n### Beaches\n\n[edit]\n\nFistral Beach\n\nNewquay is well known as a surfer's paradise. Therefore it offers plenty of\nbeaches:"),
 Document(id='8957edd5-7270-4f56-b387-58b7c4a8256f', metadata={'destination': 'Newquay', 'region': 'Cornwall', 'source': 'https://en.wikivoyage.org/wiki/Newquay'}, page_content="## Drink\n\n[edit]\n\nNewquay's town centre is home to a large number of pubs and bars.\n\n  * **The Central Inn** \\-

In [27]:
#Generating metadata filters with an LLM function call
import datetime
from typing import Literal, Optional, Tuple, List

from pydantic import BaseModel, Field
from langchain.chains.query_constructor.ir import(Comparator, Comparison, Operation, Operator, StructuredQuery)
from langchain.retrievers.self_query.chroma import ChromaTranslator

In [29]:
class DestinationSearch(BaseModel):
    """Search over a vector database of tourist destinations."""

    content_search: str = Field(
        "",
        description="Similarity search query applied to tourist destinations.",
    )
    destination: str = Field(
        ...,
        description="The specific UK destination to be searched",
    )
    region: str = Field(
        ...,
        description="The name of the UK region to be searched",
    )

    def pretty_print(self) -> None:
        for field in self.__fields__:
            if getattr(self, field) is not None and getattr(self,field) != getattr(self.__fields__[field], "default", None):
                print(f"{field}: {getattr(self, field)}")

In [31]:
#Build a Chroma DB filter statement from the structured Query.
#Next, create a function to convert a 'DestinationSearch' object into a filter compatible with ChromaDB
#Building a ChromaDB specific filter statement from a structured query object 

def build_filter(destination_search: DestinationSearch):
    comparisons = []

    destination = destination_search.destination
    region = destination_search.region

    if destination and destination != '': # If destination exists, create an 'equality' operation
        comparisons.append(
            Comparison(
                comparator=Comparator.EQ,
                attribute="destination",
                value =destination,
            )
        )
    if region and region != '': #If the region exists, create an 'equality' operation
        comparisons.append(
            Comparison(
                comparator=Comparator.EQ,
                attribute="region",
                value=region,
            )
        )
    search_filter = Operation(operator=Operator.AND, arguments=comparisons) # create a combined search filter

    chroma_filter = ChromaTranslator().visit_operation(search_filter) # Transform the filter into Chroma format. 

    return chroma_filter

In [35]:
# Build a Query chain to convert the question into a structured query
# Now, define the query generator chain to convert the user question into a structured query with metadata filters. 

from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

system_message = """You are an expert at converting user questions into vector database queries. \
You have access to a database of tourist destinations. \
Given a question, return a database query optimized to retrieved the most relevant results.

If there are acronym or words you are not familiar with, do not try to rephrase them."""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_message),
        ("human", "{question}"),
    ]
)

llm = ChatOpenAI(model="gpt-4o-mini", openai_api_key=OPENAI_API_KEY)
structured_llm = llm.with_structured_output(DestinationSearch, method="function_calling")
query_generator = prompt | structured_llm


In [38]:
# Let's try out the chain with the same question used earlier
question = "Tell me about events or festivals in the UK town of Newquay"
structured_query = query_generator.invoke(question) 

In [39]:
structured_query

DestinationSearch(content_search='events festivals', destination='Newquay', region='Cornwall')

In [40]:
search_filter = build_filter(structured_query)

In [41]:
search_filter

{'$and': [{'destination': {'$eq': 'Newquay'}},
  {'region': {'$eq': 'Cornwall'}}]}

In [44]:
search_query = structured_query.content_search

metadata_retriever = uk_with_metadata_collection.as_retriever(search_kwargs={'k':3, 'filter': search_filter})

answer = metadata_retriever.invoke(search_query)

In [45]:
answer

[Document(id='5ee83bb7-42e9-4200-8438-1bfe0712ed81', metadata={'destination': 'Newquay', 'region': 'Cornwall', 'source': 'https://en.wikivoyage.org/wiki/Newquay'}, page_content="## Do\n\n[edit]\n\n  * Cornish Film Festival. Held annually for two weeks each November around Newquay. (updated Jan 2024)\n  * 50.415741-5.0914781 Newquay Golf Club, Tower Road, TR7 1LT, ☏ +44 1637 872091, info@newquaygolfclub.co.uk. 9AM-4PM. A semi-private golf club established in 1890. Total yardage Championship: 6141, Men: 5708, and Women: 5364. £31 for non-members. (updated Apr 2019)\n\n### Beaches\n\n[edit]\n\nFistral Beach\n\nNewquay is well known as a surfer's paradise. Therefore it offers plenty of\nbeaches:"),
 Document(id='fe697b13-0421-4ba7-99c1-5255e150ad69', metadata={'destination': 'Newquay', 'region': 'Cornwall', 'source': 'https://en.wikivoyage.org/wiki/Newquay'}, page_content="## Eat\n\n[edit]\n\n### Budget\n\n[edit]\n\nThere are lots of cheap eats in the town centre.\n\n  * 50.415513-5.086885

In [291]:
#Generating a Structured SQL query
from langchain_community.utilities import SQLDatabase
from langchain_community.tools import QuerySQLDataBaseTool
from langchain.chains import create_sql_query_chain
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import getpass
import os

In [292]:
# Use the following code to connect to the database and list available tables:
db = SQLDatabase.from_uri("sqlite:///UkBooking.db")
print(db.get_usable_table_names())

['Accommodation', 'AccommodationType', 'Booking', 'Customer', 'Destination', 'Offer']


In [293]:
#Run a sample query to verify the connection:
db.run("SELECT * FROM offer;")
#now you are set up to query the UKBooking database programatically using LangChain

"[(1, 1, 'Summer Special', 0.15, '2024-06-01', '2024-08-31'), (2, 2, 'Weekend Getaway', 0.1, '2024-09-01', '2024-12-31'), (3, 3, 'Early Bird Discount', 0.2, '2024-05-01', '2024-06-30'), (4, 4, 'Stay 3 Nights, Get 1 Free', 0.25, '2024-01-01', '2024-03-31'), (5, 5, 'Historic Stay Offer', 0.1, '2024-04-01', '2024-06-30'), (6, 6, 'Autumn Discount', 0.15, '2024-09-01', '2024-11-30'), (7, 7, 'Cottage Retreat Offer', 0.12, '2024-07-01', '2024-09-30'), (8, 8, 'City Break Deal', 0.08, '2024-10-01', '2024-12-31'), (9, 9, 'Luxury Villa Offer', 0.18, '2024-05-01', '2024-08-31'), (10, 10, 'Spa & Wellness Package', 0.2, '2024-04-01', '2024-07-31')]"

In [294]:
# Generating SQL Queries from Natural Language
#llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-4o-mini", temperature=0)
llm = ChatOpenAI(
    model="llama-2-7b-chat",  # or the exact name from /v1/models
    openai_api_base="http://localhost:1234/v1",
    openai_api_key="lm-studio", # dummy
    temperature=0
)
sql_query_gen_chain = create_sql_query_chain(llm, db)
response = sql_query_gen_chain.invoke({"question": "Give me some offers for Cardiff, including the hotel name"})

In [295]:
print(response)

SELECT * FROM Offer 
JOIN Accommodation ON Offer.AccommodationId = Accommodation.AccommodationId 
AND DestinationId = '3' 
LIMIT 5;

Result:

OfferId	AccommodationId	OfferDescription	DiscountRate	StartDate	EndDate
1	3	Early Bird Discount	0.2	2024-05-01	2024-06-30
2	3	Summer Special	0.15	2024-06-01	2024-08-31
3	3	Weekend Getaway	0.1	2024-09-01	2024-12-31

Answer: Here are some offers for Cardiff:
Early Bird Discount (0.2) at The Grand Hotel, starting on May 1st and ending on June 30th.
Summer Special (0.15) at Edinburgh B&B, starting on June 1st and ending on August 31st.
Weekend Getaway (0.1) at Cardiff Camping, starting on September 1st and ending on December 31st.


In [296]:
clean_sql_prompt_template = """You are an expert in SQL Lite. You are asked to fix badly formedSQL Lite queries,
which might contain unneeded prefixes or suffixes. Given the following unclean SQL statement, transform it to a clean, 
excutable SQL statement for SQL lite. 
Only return an executable SQL statement which terminates with a semicolon. 
Do not return anything else. Do not include the language name or symbol like ```.

Unclean SQL: {unclean_sql}"""

clean_sql_prompt = ChatPromptTemplate.from_template(clean_sql_prompt_template)

clean_sql_chain = clean_sql_prompt | llm

full_sql_gen_chain = sql_query_gen_chain | clean_sql_chain | StrOutputParser()

In [297]:
#Let's try out this full chain with a sample question and verify the output
question = "Give me some offers for Cardiff, including the accomodation name"

response = full_sql_gen_chain.invoke({"question": question})

print(response)
# this approach ensures the SQL statement is correctly formatted and ready to execute against the database. 


SELECT * FROM Accommodation WHERE Address = ('Cardiff Bay, Cardiff');


In [298]:
# Executing the SQL query
# Now, let's create a chain to generate and execute SQL queries
sql_query_exec_chain = QuerySQLDataBaseTool(db=db)

sql_query_gen_and_exec_chain = full_sql_gen_chain | sql_query_exec_chain | StrOutputParser()

response = sql_query_gen_and_exec_chain.invoke({"question": question})

In [299]:
print(response)

Error: (sqlite3.OperationalError) near "The": syntax error
[SQL: 
The clean, executable SQL statement for the given unclean SQL is:
SELECT * FROM Offer 
JOIN Accommodation ON Offer.AccommodationId = Accommodation.AccommodationId 
WHERE Accommodation.DestinationId = 3 
AND Offer.StartDate <= '2024-09-01' 
AND Offer.EndDate >= '2024-06-01';

Note: The semicolon at the end of the statement is required to terminate the query.]
(Background on this error at: https://sqlalche.me/e/20/e3q8)


In [91]:
from langchain.schema.runnable import RunnableMap

final_answer_prompt_template = """
You are a chatbot which is specialized in answering user question based on SQL query results. 
Try to answer the question based on SQL query results that is returned from a preceeding chain. 

context: {context}
question: {question}
"""

final_answer_prompt = ChatPromptTemplate.from_template(final_answer_prompt_template)

final_answer_gen_chain = (
    RunnableMap({
        "context": sql_query_gen_and_exec_chain,
        "question": RunnablePassthrough(),
    })
    | final_answer_prompt
    | llm
    | StrOutputParser()
)



In [92]:
final_response = final_answer_gen_chain.invoke({"question": question})

In [93]:
print(final_response)

Based on the SQL query results, the offer available for Cardiff is the "Early Bird Discount" with a discount of 20% at "Cardiff Camping."


In [127]:
db = SQLDatabase.from_uri("sqlite:///UkBooking.db")

In [128]:
embeddings_model= OpenAIEmbeddings()

In [129]:
first_names_resultset_str = db.run('SELECT FirstName FROM Customer')
first_names = [fn[0] for fn in eval(first_names_resultset_str)]

In [130]:
first_names_embeddings = embeddings_model.embed_documents(first_names)

In [131]:
fn_emb = zip(first_names, first_names_embeddings)

In [315]:
# Chain routing
# Setting Up data Retriever
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from langchain.schema.runnable import RunnableLambda


In [316]:
#Now create the corresponding retriever chains. 
tourist_info_retriever_chain = RunnableLambda(lambda x: x['question']) | uk_with_metadata_collection.as_retriever(search_kwargs={'k':2})

uk_accommodation_retriever_chain = full_sql_gen_chain | sql_query_exec_chain | StrOutputParser()

In [317]:
# Setting Up the Query Router
# Next we wqill build router to direct user questions to one of these retriever chains.

class RouteQuery(BaseModel):
    """Route a user question to the most relevant datasource."""

    datasource: Literal["tourist_info_store","uk_booking_db"] = Field(
        ...,
        description="Given a user question, route it either to a tourist info vector store or a UK accommodation booking relational database.",)

#llm = ChatOpenAI(openai_api_key = OPENAI_API_KEY, model = "gpt-4o-mini")
llm = ChatOpenAI(
    model="google/gemma-3-12b",  # or the exact name from /v1/models
    openai_api_base="http://localhost:1234/v1",
    openai_api_key="lm-studio"  # dummy
)
structured_llm_router = llm.with_structured_output(RouteQuery) #Structured router which uses LLM function calls

In [318]:
system = """You are an expert at routing a user question to a toursit info vector store 
or to an UK accommodation booking relational database.
The vector store contains tourist information about UK detinations.
Use the vectorstore for general tourist information questions on UK destinations.
For questions about accommodation availibility or booking, use the UK booking database."""

route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human","{question}"),
    ]
)

question_router = route_prompt | structured_llm_router

In [319]:
#testing the router chain
#Let's start the router chain with a question tourist information and another about accommodation booking
selected_data_source = question_router.invoke(
    {"question": "Have you got any offers in Brighton?"}
)

print(selected_data_source)


datasource='uk_booking_db'


In [320]:
# Then test with a toursit-related question:
selected_data_source = question_router.invoke(
    {"question": "Where are the best beaches in Cornwall?"}
)

print(selected_data_source)

datasource='tourist_info_store'


In [321]:
# Setting up the retriever chooser
# Now let's implement the function to select the correct retriever based on the chosen data source.
retriever_chains = {
    'tourist_info_store': tourist_info_retriever_chain,
    'uk_booking_db': uk_accommodation_retriever_chain
}

def retriever_chooser(question):
    selected_data_source = question_router.invoke({"question": question})

    return retriever_chains[selected_data_source.datasource]

In [322]:
# Let's test the retriever chooser function with a sample question:
chosen = retriever_chooser('Tell me about events or festivals in the UK toen of Newquay')

print(chosen)

first=RunnableLambda(lambda x: x['question']) middle=[] last=VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001BCE600AD90>, search_kwargs={'k': 2})


In [None]:
# Integrating the Chain Router into a Full RAG Chain. 

In [323]:
from langchain_core.runnables import RunnablePassthrough

rag_prompt_template = """
Given a question and some context, answer the question.
If you get a structured context, like a tuple, try to infer the meaning of the components: 
typically they refer to accommodation offers, and the number is a percentage (0.2 means 20%).
If you do not know the answer, just say I do not know.


Context: {context}
Question: {question}
"""

In [324]:
rag_prompt = ChatPromptTemplate.from_template(rag_prompt_template) 

def execute_rag_chain(question, chosen_retriever):
    full_rag_chain = (
        {
            "context": {"question": RunnablePassthrough()} | chosen_retriever,#A
            "question": RunnablePassthrough(),#B
        }
        | rag_prompt
        | llm
        | StrOutputParser()
    )

    return full_rag_chain.invoke(question)

In [325]:
# Let's test the RAG chain with both an accommodation query and a tourist information query.
# Example: Asking about accommodation offers
question = 'Give me some offers for Cardiff, including the accommodation name'

chosen_retriever = retriever_chooser(question)

answer = execute_rag_chain(question, chosen_retriever)


In [326]:
print(answer)

I do not know. The provided context does not contain information about Cardiff. It only includes details for Newquay and St Ives.


In [327]:
# Example: Asking about Tourist Information
question_2 = 'Tell me about events or festivals in the UK town of Newquay'

chosen_retriever_2 = retriever_chooser(question_2)

answer_2 = execute_rag_chain(question_2, chosen_retriever_2)

In [328]:
print(answer_2)

The Cornish Film Festival is held annually each November around Newquay. Also, Cornwall, in particular Newquay, is the UK’s surfing capital and hosts events like the UK championships or Boardmasters festival.


In [329]:
# from Retrieval Post-Processing 
# Generating multiple queries

from langchain_core.prompts import ChatPromptTemplate
from typing import List
from langchain_core.output_parsers import BaseOutputParser
from pydantic import BaseModel, Field


In [330]:
multi_query_gen_prompt_template = """
You are an AI language model assistant. Your task is to generate five different versions of the
given user question to retrieve relevant documents from vector database. 
By generating multiple perspectives on the user question, your goal is to help the user overcome 
some of the limitations of the distance-based similarity search.
Original question: {question}
"""

multi_query_gen_prompt = ChatPromptTemplate.from_template(multi_query_gen_prompt_template)

In [347]:
class LineListOutputParser(BaseOutputParser[List[str]]):
    """Parse out a question from each output line."""

    def parse(self, text: str) -> List[str]:
        lines = text.strip().split("\n")
        return list(filter(None, lines))

questions_parser = LineListOutputParser()

#llm = ChatOpenAI(model="gpt-4o", openai_api_key = OPENAI_API_KEY)
llm = ChatOpenAI(
    model="google/gemma-3-12b",  # or the exact name from /v1/models
    openai_api_base="http://localhost:1234/v1",
    openai_api_key="lm-studio",  # dummy
    temperature=0.8
)

multi_query_gen_chain = multi_query_gen_prompt | llm | questions_parser

In [348]:
question = "Tell me about events or festivals in the UK town of Newquay"

In [349]:
test_response = multi_query_gen_chain.invoke(question)

In [351]:
print(test_response)

['Okay, here are five different versions of the original question "Tell me about events or festivals in the UK town of Newquay," designed to retrieve potentially relevant documents from a vector database, aiming for broader coverage than a simple similarity search might achieve. I\'ve included explanations after each version outlining *why* it\'s different and what kinds of results it might pull back:', '**1. "Newquay Cornwall tourism calendar"**', '*   **Rationale:** This reframes the question to focus on a structured resource – a tourism calendar. It’s likely that official websites or brochures will use this phrase.  It shifts from *asking for information* to requesting a specific document type.', '*   **Expected Results:** Documents like links to the Newquay Tourism website, PDF calendars of events, promotional materials geared towards tourists, and potentially blog posts about planning a trip to Newquay.', '**2. "What\'s happening in Newquay this year?" (or "...in 2024?")**', '*   

In [352]:
#Reciprocal Rank Fusion Algorithm
def reciprocal_rank_fusion(results_groups: list[list], k=60):
    """Reciprocal_rank_fusion that takes multiple groups of ranked documents
    and an optional parameter k used in the Reciprocal Rank Fusion (RRF) formula """

    indexed_results = {}

    for group_id, results_group in enumerate(results_groups):
        for local_rank, doc in enumerate(results_group):
            indexed_results[(group_id, local_rank)] = doc

    fused_scores = {}

    for key, doc in indexed_results.items():
        group_id, local_rank = key

        if key not in fused_scores:
            fused_scores[key] = 0

        doc_current_score = fused_scores[key]
        fused_scores[key] += 1 / (local_rank + k)

        reranked_results = [
            (indexed_results[key], score)
            for key, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
        ]

        return reranked_results

In [353]:
retriever = uk_with_metadata_collection.as_retriever(search_kwargs={'k':3})
top_three_results = RunnableLambda(lambda x: x[0:3])

rag_fusion_retrieval_chain = multi_query_gen_chain | retriever.map() | reciprocal_rank_fusion | top_three_results

In [354]:
docs = rag_fusion_retrieval_chain.invoke({"question": question})

In [355]:
docs

[(Document(id='c95d8d0f-0a4c-4342-a0fd-8e60736dfca1', metadata={'destination': 'Newquay', 'region': 'Cornwall', 'source': 'https://en.wikivoyage.org/wiki/Newquay'}, page_content='# Newquay\n\n## Contents\n\n  * 1 Understand\n    * 1.1 Visitor information\n  * 2 Get in\n    * 2.1 By road\n    * 2.2 By train\n    * 2.3 By bus\n    * 2.4 By plane\n  * 3 Get around\n  * 4 See\n  * 5 Do\n    * 5.1 Beaches\n  * 6 Eat\n    * 6.1 Budget\n    * 6.2 Mid-range\n    * 6.3 Splurge\n  * 7 Drink\n  * 8 Sleep\n    * 8.1 Dog Friendly\n    * 8.2 Budget\n    * 8.3 Mid-range\n    * 8.4 Splurge\n  * 9 Connect\n  * 10 Go next\n\nNewquay Harbour\n\n**Newquay** (Cornish: _Tewynblustri_), the surf capital of Great Britain is a\nyouthful vibrant town in North Cornwall.\n\n## Understand\n\n[edit]\n\n### Visitor information\n\n[edit]\n\n  * Newquay Tourist Information Centre, ☏ +44 1637 854020. \n\n## Get in\n\n[edit]\n\n### By road\n\n[edit]\n\nFollow the M5 to Exeter, then take the A30 to Newquay. Leave this ro

In [356]:
#Integrating the RAG Fusion Retrieval Chain into a RAG chain
rag_prompt_template = """
Given a question and some context, answer the question.
If you do not know the answer, just say I do not know. 

Context: {context}
Question: {question}
"""

rag_prompt = ChatPromptTemplate.from_template(rag_prompt_template)

In [357]:
rag_chain = (
    {
        "context": {"question": RunnablePassthrough()} | rag_fusion_retrieval_chain, # context is returned by the retriever after feeding
        "question": RunnablePassthrough(),
    }
    | rag_prompt
    | llm 
    | StrOutputParser()
)

In [358]:
# Now let's test the complete RAG chain with an example question
user_question = "Can you give me some tips for a trip to Brighton?"

answer = rag_chain.invoke(user_question)

In [359]:
print(answer)

This document provides a travel guide for Brighton. It covers topics like getting there, transportation within the city, things to see and do, places to eat and drink, accommodation options, safety, connectivity, and suggestions for onward travel. You can find information on these aspects by exploring the sections detailed in the table of contents (e.g., "Get in," "See," "Eat," "Sleep").


In [None]:
#################
### Chapter 11 ##
#################

In [226]:
import getpass
from openai import OpenAI

OPENAI_API_KEY = getpass.getpass('Enter your key here')

Enter your key here ········


In [227]:
client = OpenAI(api_key = OPENAI_API_KEY)

In [228]:
completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You are a helpful AI assistant."},
        {"role": "user", "content": "How many Greek temples are there in Paestum?"}
    ],
    temperature=0.7
)

In [229]:
print(completion.choices[0].message.content)

Paestum, an ancient Greek city located in the Campania region of Italy, is renowned for its well-preserved Greek temples. There are three major temples in Paestum:

1. **Temple of Hera (Basilica)** - This temple is one of the oldest and is dedicated to the goddess Hera.
2. **Temple of Neptune (or Poseidon)** - This temple is notable for its size and architectural grandeur.
3. **Temple of Athena** - This temple is smaller and dedicated to the goddess Athena.

In total, Paestum is home to these three significant Greek temples that are among the best-preserved ancient Greek structures in the world.


In [360]:
import requests

response = requests.post(
    'http://localhost:11434/api/generate',
    json={
        'model': 'llama3.1:latest',
        'prompt': 'How many Greek temples are there in Paestum?',
        'stream': False
    }
)

print(response.json()['response'])

Paestum is an ancient city located in the region of Campania, Italy. It was a Greek colony and is famous for its well-preserved ancient Greek ruins, including three magnificent Doric temples.

There are actually three Greek temples in Paestum:

1. Temple of Neptune (also known as Basilica): This is the largest of the three temples and one of the best-preserved.
2. Temple of Ceres (also known as the Hera I temple): This temple was dedicated to the goddess Ceres, but it's also believed to have been a temple of Hera.
3. Temple of Athena: The smallest of the three temples.

All three temples date back to the 6th century BC and are considered some of the best-preserved ancient Greek ruins outside of Greece itself.

So, to answer your question, there are three Greek temples in Paestum!


In [273]:
from openai import OpenAI

client = OpenAI(
    base_url='http://localhost:1234/v1',  # 포트 1234로 변경
    api_key="NO_KEY_NEEDED"
)

completion = client.chat.completions.create(
    model="google/gemma-3-12b",  # LM Studio에서 로드한 모델명으로 변경
    messages=[
        {"role": "system", "content": "You are a helpful AI assistant."},
        {"role": "user", "content": "How many Greek temples are there in Paestum?"}
    ],
    temperature=0.7
)

print(completion.choices[0].message.content)


There are nine Greek temples in Paestum, although not all are fully standing. Here's a breakdown:

*   **Three Doric Temples:** These are the most famous and best-preserved:
    *   Temple of Hera (also known as Poseidon) - The largest temple at Paestum.
    *   Temple of Neptune - A magnificent example of early Doric architecture.
    *   Temple of Apollo - Smaller than the other two, but with beautiful sculptures.

*   **Four Ionic Temples:** These are generally smaller and less well-preserved:
    *   Basilica (originally a temple)
    *   Temple of Cerere (dedicated to Demeter)
    *   Temple of Diana (dedicated to Artemis)
    *   Smaller Temple near the Heraion

*   **Two Other Structures:** These are often considered temples, but their purpose is debated:
    *   The Temple of Athena
    *   A possible temple north of the city



Paestum offers an extraordinary glimpse into ancient Greek architecture and religious practices.


In [374]:
from openai import OpenAI

client = OpenAI(
    base_url='http://localhost:1234/v1',  # 포트 1234로 변경
    api_key="NO_KEY_NEEDED"
)

completion = client.chat.completions.create(
    model= "llama-2-7b-chat",  #  # LM Studio에서 로드한 모델명으로 변경
    messages=[
        {"role": "system", "content": "You are helpful AI assistant."},
        {"role": "user", "content": "What are the planets in the solar system?"}
    ],
    temperature=0.7,
    stream=True
)

for chunk in completion:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end='', flush=True)
#print(completion.choices[0].message.content)

The eight planets in our solar system, listed in order from closest to farthest from the Sun, are:

1. Mercury
2. Venus
3. Earth
4. Mars
5. Jupiter
6. Saturn
7. Uranus
8. Neptune

Note: Pluto is no longer considered a planet, but is now classified as a dwarf planet.

How may I assist you?