# Set Up

In [1]:
from google.oauth2 import service_account
import vertexai
import json

# load the JSON file
with open('./probable-life-441114-n1-d2f8fa3aef61.json') as source:
    info = json.load(source)

vertex_cred = service_account.Credentials.from_service_account_info(info)

PROJECT_ID = "probable-life-441114-n1"
REGION = "asia-south1"
vertexai.init(project=PROJECT_ID,
             location=REGION,
             credentials=vertex_cred)

# Using Langchain official docs
from langchain_google_vertexai import VertexAIEmbeddings

# Initialize the a specific Embeddings Model version
embeddings = VertexAIEmbeddings(model_name="textembedding-gecko@003")

# Imports

In [2]:
import numpy as np
import torch
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain_core.vectorstores import InMemoryVectorStore

# Load and split data

In [3]:
DATA_PATH = "data"

def load_data(DATA_PATH):
    data_loader = PyPDFDirectoryLoader(DATA_PATH)
    return data_loader.load()

data = load_data(DATA_PATH)

In [4]:
type(data[0])

langchain_core.documents.base.Document

In [5]:
# Split them into chunks
def split_data(document):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 500,
        chunk_overlap= 50,
        length_function=len,
        is_separator_regex=False,  # use if your separators are plain text and not regex patterns.
    )
    return text_splitter.split_documents(document)
    

In [6]:
chunks = split_data(data)

In [7]:
chunks[0]

Document(metadata={'source': 'data/BA_FAQs_and_Policy.pdf', 'page': 0}, page_content='British Airways Policy\nOur Customer Commitment\nWe are fully committed to helping each and every one of our customers enjoy the\nbest possible experience, wherever you might be travelling. This page sets out the\nlevels of service that you can expect when travelling with British Airways, as well\nas relevant information on how we can help, should things not go according to\nplan.\nDelays and cancellations\nWhilst it is sometimes unavoidable, we understand that flight delays and')

In [8]:
# Validation for page_content type
for chunk in chunks:
    if not isinstance(chunk.page_content, str):
        raise ValueError(f"Invalid page_content: {chunk.page_content}")
    if not isinstance(chunk.metadata, dict):
        raise ValueError(f"Invalid metadata: {chunk.metadata}")

# Create Vector DB

In [9]:
# Preprocessing
# InMemoryVectorStore expects a list of string as input and our chunks consists of list of documsnts
# So we are going to grab page_content from chunks

texts = [chunk.page_content for chunk in chunks]
metadatas = [chunk.metadata for chunk in chunks]

In [10]:
from langchain_core.vectorstores import InMemoryVectorStore

vectorstore = InMemoryVectorStore.from_texts(
    texts,
    metadatas=metadatas,
    embedding=embeddings,
)

In [11]:
# Define a prompt
# PROMPT_TEMPLATE = """
# Answer the question based only on the following context:

# {context}

# ---

# Answer the question based on the above context: {question}
# """

# Retriever

In [12]:
from langchain.prompts import ChatPromptTemplate

In [13]:
# Search the DB and fetch the similar context
# Use the vectorstore as a retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

# Put your query here...
query_text = "What to do if my flight is delayed?"

# Retrieve the most similar text
retrieved_documents = retriever.invoke(query_text)  # of type list

# show the retrieved document's content
print(retrieved_documents[0].page_content)

compensation if you're eligible.
We’ll be in touch as quickly as we can if your flight is delayed or cancelled – so be
sure to update your details onManage My
Booking (https://www.britishairways.com/travel/managebooking/public/en_gb) if
your contact information changes.
Delayed flights
Head to the airport ahead of your original departure time if your flight is delayed.
You cancheck the status of your flightand thelatest travel newsbefore you leave
for the airport.


In [26]:
PROMPT_TEMPLATE = """
Given the context below, answer the question in a concise manner.

Context: {context}

Question: {question}
"""

In [27]:
# Combine the content (page_content) from the retrieved documents into a single context string to be used in the prompt.
context = "\n".join([doc.page_content for doc in retrieved_documents])
# Put into prompt
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
#Format the Prompt: Insert the context and query into your PROMPT_TEMPLATE to create the final prompt
prompt = prompt_template.format(context=context, question=query_text)

# Generate

In [16]:
from huggingface_hub import login
# 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
login() # You will be prompted for your HF key, which will then be saved locally

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [17]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

model_name = "microsoft/Phi-3-mini-4k-instruct"

# Load model in 8-bit precision
bnb_config = BitsAndBytesConfig(
    quantization_config=bnb_config,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",  # Automatically maps layers to GPU/CPU
    load_in_8bit=True,  # Enables 8-bit quantization
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Create a pipeline for text generation
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512, do_sample=False)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   3%|2         | 147M/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

In [18]:
model.save_pretrained("quantized_model")
tokenizer.save_pretrained("quantized_model")

('quantized_model/tokenizer_config.json',
 'quantized_model/special_tokens_map.json',
 'quantized_model/tokenizer.model',
 'quantized_model/added_tokens.json',
 'quantized_model/tokenizer.json')

In [61]:
from langchain.prompts import PromptTemplate
prompt_template = PromptTemplate(
    input_variables=["context", "query_text"],
    template="""Given the context:{context}
    ------
    Answer the following question:{query_text}"""
)

In [62]:
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
# Create a LangChain LLM instance
llm = HuggingFacePipeline(pipeline=pipe)

# Use the LLM in a LangChain application
# chain = llm | prompt_template
chain = LLMChain(prompt=prompt_template, llm=llm)

In [63]:
response = chain.invoke({"context": context, "query_text": query_text})
print(response)

{'context': "compensation if you're eligible.\nWe’ll be in touch as quickly as we can if your flight is delayed or cancelled – so be\nsure to update your details onManage My\nBooking (https://www.britishairways.com/travel/managebooking/public/en_gb) if\nyour contact information changes.\nDelayed flights\nHead to the airport ahead of your original departure time if your flight is delayed.\nYou cancheck the status of your flightand thelatest travel newsbefore you leave\nfor the airport.\nplease get in touch with them directly if you've got any queries.\nCompensation\nYou may be able to claim back costs caused as a result of a delayed or cancelled\nflight – for example, accommodation costs or food and drink while waiting at an\nairport.\nYou could also be entitled to compensation for a delayed or cancelled flight under\ncertain circumstances.\nVisit our Expenses and compensation page\nhttps://www.britishairways.com/content/information/delayed-or-cancelled-\nflights/compensation.\nflights/

In [74]:
print(response.keys())

dict_keys(['context', 'query_text', 'text'])


In [75]:
# Assuming the answer is inside a key like 'text' or 'response'
question = response.get('query_text', '')
answer = response.get('text', '')
# question_answer = answer.split("Answer:")[-1].strip()
print(f"Human: {question}\n")
print(f"Alex: {answer}")

Human: What to do if my flight is delayed?

Alex: Given the context:compensation if you're eligible.
We’ll be in touch as quickly as we can if your flight is delayed or cancelled – so be
sure to update your details onManage My
Booking (https://www.britishairways.com/travel/managebooking/public/en_gb) if
your contact information changes.
Delayed flights
Head to the airport ahead of your original departure time if your flight is delayed.
You cancheck the status of your flightand thelatest travel newsbefore you leave
for the airport.
please get in touch with them directly if you've got any queries.
Compensation
You may be able to claim back costs caused as a result of a delayed or cancelled
flight – for example, accommodation costs or food and drink while waiting at an
airport.
You could also be entitled to compensation for a delayed or cancelled flight under
certain circumstances.
Visit our Expenses and compensation page
https://www.britishairways.com/content/information/delayed-or-cance

In [23]:
# from langchain_huggingface import HuggingFaceEndpoint,ChatHuggingFace

# llm = HuggingFaceEndpoint(
#     repo_id="microsoft/Phi-3-mini-4k-instruct",
#     task="text-generation",
#     max_new_tokens=512,
#     do_sample=False,
#     repetition_penalty=1.03,
# )

# chat = ChatHuggingFace(llm=llm, verbose=True)

In [33]:
# !pip install langchain-huggingface

In [67]:
# from transformers import AutoTokenizer, AutoModelForCausalLM

In [66]:
# # Load the LLM
# model_ckp ="EleutherAI/pythia-410m"
# tokenizer = AutoTokenizer.from_pretrained(model_ckp)
# model = AutoModelForCausalLM.from_pretrained(model_ckp)

In [64]:
# inputs = tokenizer(prompt, return_tensors='pt')

# outputs = model.generate(
#     inputs["input_ids"],
#     attention_mask = inputs["attention_mask"],
#     max_new_tokens = 500
# )
# # Decode output
# print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [65]:
# sources = [doc.metadata.get("id", None) for doc, _score in retrieved_documents]
# formatted_response = f"Response: {outputs}\nSources: {sources}"
# print(formatted_response)