In [5]:
from langchain_google_genai import GoogleGenerativeAI
from dotenv import load_dotenv
import os
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_community.docstore.in_memory import InMemoryDocstore
import faiss
import time
import numpy as np
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders.csv_loader import CSVLoader

In [6]:
load_dotenv()
GOOGLE_KEY = os.getenv("GOOGLE_API_KEY")
HF_TOKEN = os.getenv("HF_TOKEN")

In [3]:
# Loading the docs using CSVLoader

loader = CSVLoader("F:\Buyogo_assignment\hotel_bookings.csv")
documents = loader.load()

  loader = CSVLoader("F:\Buyogo_assignment\hotel_bookings.csv")


In [4]:
documents[0]

Document(metadata={'source': 'F:\\Buyogo_assignment\\hotel_bookings.csv', 'row': 0}, page_content='hotel: Resort Hotel\nis_canceled: 0\nlead_time: 342\narrival_date_year: 2015\narrival_date_month: July\narrival_date_week_number: 27\narrival_date_day_of_month: 1\nstays_in_weekend_nights: 0\nstays_in_week_nights: 0\nadults: 2\nchildren: 0\nbabies: 0\nmeal: BB\ncountry: PRT\nmarket_segment: Direct\ndistribution_channel: Direct\nis_repeated_guest: 0\nprevious_cancellations: 0\nprevious_bookings_not_canceled: 0\nreserved_room_type: C\nassigned_room_type: C\nbooking_changes: 3\ndeposit_type: No Deposit\nagent: NULL\ncompany: NULL\ndays_in_waiting_list: 0\ncustomer_type: Transient\nadr: 0\nrequired_car_parking_spaces: 0\ntotal_of_special_requests: 0\nreservation_status: Check-Out\nreservation_status_date: 01-07-15')

In [7]:
# Initializing the LLM and Embedding model using Google models.

llm = GoogleGenerativeAI(model="gemini-2.0-pro-exp-02-05",api_key=GOOGLE_KEY)

# embeddings = HuggingFaceEmbeddings(
#     model_name="sentence-transformers/all-MiniLM-L6-v2",
#     encode_kwargs={"batch_size": 64, "normalize_embeddings": True}
#     )

In [8]:
llm.invoke("Hi")

'Hi there! How can I help you today?'

In [6]:
# Adding metadata manually (useful for filtering/querying later)
for doc in documents:
    lines = doc.page_content.split("\n")
    meta = {}
    for line in lines:
        if ":" in line:
            key, value = line.split(":", 1)
            meta[key.strip()] = value.strip()
    doc.metadata.update(meta)

In [7]:
doc

Document(metadata={'source': 'F:\\Buyogo_assignment\\hotel_bookings.csv', 'row': 119389, 'hotel': 'City Hotel', 'is_canceled': '0', 'lead_time': '205', 'arrival_date_year': '2017', 'arrival_date_month': 'August', 'arrival_date_week_number': '35', 'arrival_date_day_of_month': '29', 'stays_in_weekend_nights': '2', 'stays_in_week_nights': '7', 'adults': '2', 'children': '0', 'babies': '0', 'meal': 'HB', 'country': 'DEU', 'market_segment': 'Online TA', 'distribution_channel': 'TA/TO', 'is_repeated_guest': '0', 'previous_cancellations': '0', 'previous_bookings_not_canceled': '0', 'reserved_room_type': 'A', 'assigned_room_type': 'A', 'booking_changes': '0', 'deposit_type': 'No Deposit', 'agent': '9', 'company': 'NULL', 'days_in_waiting_list': '0', 'customer_type': 'Transient', 'adr': '151.2', 'required_car_parking_spaces': '0', 'total_of_special_requests': '2', 'reservation_status': 'Check-Out', 'reservation_status_date': '07-09-17'}, page_content='hotel: City Hotel\nis_canceled: 0\nlead_tim

In [None]:
# Embedding all the document contents
texts = [doc.page_content for doc in documents]
doc_embeddings = embeddings.embed_documents(texts)

In [None]:
doc_embeddings[0]

[0.06092626601457596,
 0.01246693730354309,
 -0.09562069177627563,
 0.07169980555772781,
 -0.01373897772282362,
 0.02479085698723793,
 0.02508923038840294,
 -0.05124780163168907,
 -0.006339529063552618,
 0.0012486475752666593,
 0.10322640091180801,
 -0.08402818441390991,
 -0.02681547962129116,
 0.04489441215991974,
 -0.005366082768887281,
 -0.035926543176174164,
 -0.049154315143823624,
 -0.05279989168047905,
 0.04808090627193451,
 -0.014071335084736347,
 -0.041201550513505936,
 0.02626643143594265,
 -0.04100552573800087,
 -0.013863072730600834,
 0.028344837948679924,
 -0.033870451152324677,
 -0.017010202631354332,
 0.055755916982889175,
 -0.025947580114006996,
 -0.09697087854146957,
 -0.02002907544374466,
 0.11255601048469543,
 0.03562292084097862,
 -0.026950446888804436,
 0.128088116645813,
 0.0259380042552948,
 -0.07370774447917938,
 -0.07425805926322937,
 0.014979624189436436,
 0.005196178797632456,
 0.0018378598615527153,
 -0.03125954791903496,
 6.568845856236294e-05,
 -0.038695324

In [16]:
# Create FAISS index
dimension = len(doc_embeddings[0])
faiss_index = faiss.IndexFlatL2(dimension)

# Initialize docstore and id mapping
docstore = InMemoryDocstore()
index_to_docstore_id = {}

# Populate FAISS index and docstore
for i, (doc, embedding) in enumerate(zip(documents, doc_embeddings)):
    faiss_index.add(np.array(embedding).reshape(1, -1))
    doc_id = str(i)
    docstore.add({doc_id: doc})
    index_to_docstore_id[i] = doc_id

In [18]:
# Create FAISS vector store

vector_store = FAISS(
    embedding_function=embeddings,
    index=faiss_index,
    docstore=docstore,
    index_to_docstore_id=index_to_docstore_id,
)

# Embedding only once and save FAISS index for reuse (Optional)
vector_store.save_local("faiss_index_hotelbookings")

In [19]:
# Setup retriever and QA chain
retriever = vector_store.as_retriever()

In [20]:
prompt_template = PromptTemplate.from_template(
    "You are a helpful assistant for hotel booking analytics.\n"
    "Use the following context to answer the question.\n\n{context}\n\nQuestion: {input}"
)

In [25]:
# Creating the chains

document_chain = create_stuff_documents_chain(llm, prompt_template)
qa_chain = create_retrieval_chain(retriever, document_chain)

In [37]:
# Example query + API response time measurement
start_time = time.time()
response = qa_chain.invoke({"input": "What is the most common meal type?"})
end_time = time.time()
response_time = end_time - start_time

print("Response:", response['answer'])
print(f"API response time: {response_time:.2f} seconds")

Response: Based on the provided data, the most common meal type is BB.
API response time: 1.03 seconds
