### Installations

In [None]:
%pip install -qU langchain_community jq 

In [1]:
import warnings
warnings.filterwarnings("ignore")

### Initialization

In [2]:
from langchain_community.document_loaders import JSONLoader
import json
from pathlib import Path

file_path='./Journey_details.json'
data = json.loads(Path(file_path).read_text())
loader = JSONLoader(
         file_path=file_path,
         jq_schema=".user.flights[]",
         text_content=False)

In [3]:
# Load the data
file_path = Path("Journey.json")
json_data = json.loads(file_path.read_text())

json_data

{'ticket_info': {'ticket_id': 'ticket_227_3183',
  'namespace': 'flight_bookings',
  'pnr': 'HZAVJJ',
  'class': 'Economy',
  'user_id': 227,
  'flight_route': {'source_airport': {'name': 'Cape Town International Airport',
    'iata_code': 'CPT',
    'city': 'Cape Town',
    'country': 'South Africa'},
   'destination_airport': {'name': 'Indira Gandhi International Airport',
    'iata_code': 'DEL',
    'city': 'New Delhi',
    'country': 'India'},
   'intermediate_airports': [{'name': 'Addis Ababa Bole International Airport',
     'iata_code': 'ADD',
     'city': 'Addis Ababa',
     'country': 'Ethiopia'}],
   'layover_duration': '55 minutes'},
  'flight_schedule': {'departure': {'date': '2024-07-11',
    'time': '14:35:00 UTC',
    'full_datetime': '2024-07-11T14:35:00.000Z'},
   'arrival': {'date': '2024-07-12',
    'time': '08:10:00 UTC',
    'full_datetime': '2024-07-12T08:10:00.000Z'}},
  'flight_segments': [{'segment_number': 1,
    'flight_number': 'ET846',
    'departure': {'ai

### Loader

In [4]:
# Assuming loader is already defined and initialized
docs = loader.load()
docs[0]

Document(metadata={'source': 'C:\\Users\\tarak\\Downloads\\Assignments\\LangChain Chatbot\\Journey_details.json', 'seq_num': 1}, page_content='{"ticket_id": 3183, "pnr": "HZAVJJ", "class": "ECONOMY", "source": "Cape Town International Airport (CPT)", "destination": "Indira Gandhi International Airport (DEL)", "departure_date": "2024-07-11T14:35:00.000Z", "arrival_date": "2024-07-12T08:10:00.000Z", "layover_duration": "55m", "segments": [{"flight_number": "ET846", "departure": {"airport": "Cape Town International Airport", "iata": "CPT", "date": "2024-07-11T14:35:00.000Z"}, "arrival": {"airport": "Addis Ababa Bole International Airport", "iata": "ADD", "date": "2024-07-11T22:00:00.000Z"}, "passengers": [{"first_name": "surendra", "last_name": "singh", "seat_number": "21a", "cabin_baggage": "7kg", "check_in_baggage": "23kg"}, {"first_name": "narinder", "last_name": "kaur", "seat_number": "21b", "cabin_baggage": "7kg", "check_in_baggage": "23kg"}, {"first_name": "samik", "last_name": "sin

In [5]:
type(docs)

list

In [6]:
print(docs[0].metadata)

{'source': 'C:\\Users\\tarak\\Downloads\\Assignments\\LangChain Chatbot\\Journey_details.json', 'seq_num': 1}


In [7]:
print(docs[0].page_content)

{"ticket_id": 3183, "pnr": "HZAVJJ", "class": "ECONOMY", "source": "Cape Town International Airport (CPT)", "destination": "Indira Gandhi International Airport (DEL)", "departure_date": "2024-07-11T14:35:00.000Z", "arrival_date": "2024-07-12T08:10:00.000Z", "layover_duration": "55m", "segments": [{"flight_number": "ET846", "departure": {"airport": "Cape Town International Airport", "iata": "CPT", "date": "2024-07-11T14:35:00.000Z"}, "arrival": {"airport": "Addis Ababa Bole International Airport", "iata": "ADD", "date": "2024-07-11T22:00:00.000Z"}, "passengers": [{"first_name": "surendra", "last_name": "singh", "seat_number": "21a", "cabin_baggage": "7kg", "check_in_baggage": "23kg"}, {"first_name": "narinder", "last_name": "kaur", "seat_number": "21b", "cabin_baggage": "7kg", "check_in_baggage": "23kg"}, {"first_name": "samik", "last_name": "singh", "seat_number": "21c", "cabin_baggage": "7kg", "check_in_baggage": "23kg"}]}, {"flight_number": "ET686", "departure": {"airport": "Addis Ab

In [8]:
type(docs[0].page_content)

str

### Recursive JSON Splitter

In [9]:
from langchain_text_splitters import RecursiveJsonSplitter

splitter = RecursiveJsonSplitter(max_chunk_size=100)
splitter

<langchain_text_splitters.json.RecursiveJsonSplitter at 0x2970927fe10>

In [10]:
json_chunks = splitter.split_json(json_data=json_data)

In [11]:
len(json_chunks)

23

In [12]:
docs = splitter.create_documents(texts=[json_data],convert_lists=True)

for doc in docs[:3]:
    print(doc)

page_content='{"ticket_info": {"ticket_id": "ticket_227_3183", "namespace": "flight_bookings", "pnr": "HZAVJJ"}}'
page_content='{"ticket_info": {"class": "Economy", "user_id": 227}}'
page_content='{"ticket_info": {"flight_route": {"source_airport": {"name": "Cape Town International Airport"}}}}'


In [13]:
len(docs)

58

### Hugging Face Embeddings

In [14]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [15]:
text = "This is a test document."
query_result = embeddings.embed_query(text)
query_result[:3]

[-0.04895174875855446, -0.039861924946308136, -0.021562796086072922]

In [16]:
doc_result = embeddings.embed_documents([text])

### Hugging Face Inference API

In [17]:
import getpass

inference_api_key = getpass.getpass("Enter your HF Inference API Key:\n\n")

In [18]:
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key=inference_api_key, model_name="sentence-transformers/all-MiniLM-l6-v2"
)
text_1 = "This is a test document."
query_result = embeddings.embed_query(text_1)
query_result[:3]

[-0.03833853453397751, 0.12346471101045609, -0.028642931953072548]

### Hugging Face Hub

In [19]:
! pip install huggingface_hub



In [20]:
from langchain_huggingface.embeddings import HuggingFaceEndpointEmbeddings
embeddings = HuggingFaceEndpointEmbeddings()
text_1 = "This is a test document."
query_result = embeddings.embed_query(text_1)
query_result[:3]

[-0.048951830714941025, -0.03986202925443649, -0.021562786772847176]

### Pinecone

In [21]:
%pip install -qU langchain-pinecone pinecone-notebooks

Note: you may need to restart the kernel to use updated packages.


In [22]:
import getpass
import os
import time

from pinecone import Pinecone, ServerlessSpec

if not os.getenv("PINECONE_API_KEY"):
    os.environ["PINECONE_API_KEY"] = getpass.getpass("Enter your Pinecone API key: ")

pinecone_api_key = os.environ.get("PINECONE_API_KEY")

pc = Pinecone(api_key=pinecone_api_key)

In [23]:
import time

index_name = "langchain-chatbot"  # change if desired

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [24]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [25]:
from langchain_pinecone import PineconeVectorStore

namespace = "langchain-chatbot" 

docsearch = PineconeVectorStore.from_documents(
    documents=docs,
    index_name=index_name,
    embedding=embeddings,
    namespace=namespace
)

time.sleep(5)

# See how many vectors have been upserted
print("Index after upsert:")
print(pc.Index(index_name).describe_index_stats())
print("\n")
time.sleep(2)

Index after upsert:
{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'langchain-chatbot': {'vector_count': 58}},
 'total_vector_count': 58}




In [26]:
index = pc.Index(index_name)
namespace = "langchain-chatbot"

for ids in index.list(namespace=namespace):
    query = index.query(
        id=ids[0], 
        namespace=namespace, 
        top_k=1,
        include_values=True,
        include_metadata=True
    )
    # print(query)
    # print("\n")

In [27]:
from uuid import uuid4

from langchain_core.documents import Document
uuids = [str(uuid4()) for _ in range(len(docs))]

vector_store.add_documents(documents=docs, ids=uuids)

['37a8ea01-3234-4265-baa4-4e03b920f595',
 'f456249f-e4a6-4afa-9177-d81357c01191',
 '824def2e-ae18-4219-a64b-8a4fa37652fc',
 '7293683b-54a4-474f-9889-697fc57d4dd1',
 'e9747259-207f-4e00-a663-da05aad5abb2',
 'bcae3a69-3596-4dc0-9c82-cec4316b5661',
 '3123786e-7cb7-4d7b-a66a-671e5c486a35',
 'd9d83c2d-0cae-4137-880c-f8903918d40c',
 '160c1c36-e920-4e86-a72f-1659070a9699',
 'b24669de-a901-4ee5-969b-6b8499a0a52d',
 'e325c394-f1e8-4518-8c46-a743d1b62b1e',
 '3c9f3ba3-30ce-402d-8f91-28d51d800f5f',
 '3d88a097-3793-4936-88d3-d6ea46aa8c56',
 '119b8129-c12b-4700-b215-dd2a9c0a772f',
 '96d451c3-dd5a-4b2c-bafa-1c5b195e18c1',
 'ce783f33-1004-4f73-b74f-34db3ef96864',
 '60ce9e9f-9f56-4019-a8bb-d98a3429f76f',
 'ee16953b-ecbf-4655-9f77-cfb0223decd8',
 'd177058c-08be-4c27-8f6d-7cc544e42e9e',
 '62cecf5a-b02b-434f-ab15-c04181fa561c',
 '45eb09e9-749c-4a89-abd3-8cb2bbbafbba',
 '90f4b39e-8d30-4fdb-b949-44a28b860307',
 '51289e95-0018-4686-8f8b-d52d7750733f',
 '79585666-6d5a-4b1f-aa8b-d312ec5eee2e',
 'a24fedae-0932-

### Query vector store

##### Similarity search

In [28]:
results = vector_store.similarity_search(
    "What time is my flight from Cape Town to Addis Ababa, and what’s the arrival time?",
    k=2,
)
for res in results:
    print(f"* {res.page_content}")

##### Similarity search with score

In [29]:
results = vector_store.similarity_search_with_score(
    "What time is my flight from Cape Town to Addis Ababa, and what’s the arrival time?", k=1, 
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content}")

* [SIM=0.547294] {"ticket_info": {"flight_segments": {"0": {"departure": {"airport": "Cape Town International Airport"}}}}}


### Query by turning into retriever

In [30]:
retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 3, "score_threshold": 0.5},
)
retriever.invoke("What time is my flight from Cape Town to Addis Ababa, and what’s the arrival time?")

[Document(id='62cecf5a-b02b-434f-ab15-c04181fa561c', metadata={}, page_content='{"ticket_info": {"flight_segments": {"0": {"departure": {"airport": "Cape Town International Airport"}}}}}'),
 Document(id='a24fedae-0932-4b64-8727-7d4dfc2cd5a8', metadata={}, page_content='{"ticket_info": {"flight_segments": {"0": {"arrival": {"airport": "Addis Ababa Bole International Airport"}}}}}'),
 Document(id='d62d40bb-e51b-49c3-8f09-959abff35497', metadata={}, page_content='{"ticket_info": {"embedded_text_descriptions": {"route_description": "Cape Town International Airport (CPT) to Indira Gandhi International Airport (DEL) via Addis Ababa Bole International Airport (ADD)"}}}')]

In [31]:
retrieved_docs = retriever.invoke("What time is my flight from Cape Town to Addis Ababa, and what’s the arrival time?")

In [32]:
len(retrieved_docs)

3

In [33]:
print(retrieved_docs[0].page_content)

{"ticket_info": {"flight_segments": {"0": {"departure": {"airport": "Cape Town International Airport"}}}}}


In [34]:
print(retrieved_docs[1].page_content)

{"ticket_info": {"flight_segments": {"0": {"arrival": {"airport": "Addis Ababa Bole International Airport"}}}}}


### LLM

In [35]:
import getpass
import os

os.environ["GROQ_API_KEY"] = getpass.getpass()

from langchain_groq import ChatGroq

llm = ChatGroq(model="llama3-8b-8192")

In [36]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()

example_messages

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: filler question \nContext: filler context \nAnswer:", additional_kwargs={}, response_metadata={})]

In [37]:
print(example_messages[0].content)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: filler question 
Context: filler context 
Answer:


### Runnables

In [38]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream("Can you tell me about the layover time for my journey?"):
    print(chunk, end="", flush=True)

According to the ticket information, your layover to the ticket information, your layover time is 55 minutes. Your first flight arrives on July 12, 2024, at 08:10:00 UTC, and your next flight departs from the same destination at 22:00:00 UTC, which means you have a layover of 55 minutes.

### Built-in-chains

In [39]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are a travel assistant specialized in providing concise and accurate answers about flight itineraries. "
    "Use the following context to answer questions about flight segments, layovers, seat details, baggage allowances, and airport information. "
    "Respond to each question based on the retrieved context. If the answer isn’t available in the data, respond with “I’m not sure” or “That information is not available.” "
    "Keep answers to a maximum of three sentences and ensure they are clear and direct."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input": "Do I have any checked baggage allowance for my flights?"})
print(response["answer"])

Based on the provided information, each passenger has a total check-in baggage allowance of 23kg.


##### Returning sources

In [40]:
for document in response["context"]:
    print(document)
    print()

page_content='{"ticket_info": {"baggage_summary": {"total_check_in_baggage": "69kg"}}}'

page_content='{"ticket_info": {"baggage_summary": {"total_cabin_baggage": "21kg"}}}'

page_content='{"ticket_info": {"embedded_text_descriptions": {"passenger_summary": "Passengers: Surendra Singh, Narinder Kaur, Samik Singh, traveling with 7kg cabin baggage and 23kg check-in baggage each."}}}'



### Customizing the prompt

In [42]:
from langchain_core.prompts import PromptTemplate

template = """"You are a travel assistant specialized in providing concise and accurate answers about flight itineraries. "
    "Use the following context to answer questions about flight segments, layovers, seat details, baggage allowances, and airport information. "
    "Respond to each question based on the retrieved context. If the answer isn’t available in the data, respond with “I’m not sure” or “That information is not available.” "
    "Keep answers to a maximum of three sentences and ensure they are clear and direct."
    "\n\n"
    

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("Where is my layover?")

'According to the flight itinerary, your layover is in Addis Ababa, Ethiopia, and it will last approximately 55 minutes.'

In [43]:
# Query the model

rag_chain.invoke("Do I have checked baggage?")

'Based on the provided context, the answer is:\n\nYes, you have checked baggage. According to the ticket information, the total checked-in baggage allowance is 69kg.'

In [45]:
rag_chain.invoke("What’s my seat for the first flight?")

'According to the provided ticket information, your seat for the first flight is 21A.'

In [47]:
rag_chain.invoke("Which airport am I arriving at in Delhi, and what’s the expected arrival time?")

"You are arriving at Indira Gandhi International Airport in Delhi. The expected arrival time is not provided in the available data, so I'm not sure of the exact arrival time."