In [1]:
#pip install langchain langchain-community faiss-cpu pandas openpyxl

In [2]:
#pip install langchain-groq

In [3]:
#pip install torch torchvision transformers

In [4]:
#%pip install --upgrade --quiet langchain-community unstructured openpyxl

In [5]:
#pip install faiss-cpu

In [6]:
#pip install sentence-transformers

In [7]:
#pip install tf-keras

In [8]:
import os
from dotenv import load_dotenv 

In [9]:
#load environment variable (API key) from a .env file
load_dotenv() 
groq_key = os.getenv('GROQ_API_KEY')

In [10]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

In [11]:
#initialize the LLM with a specific model
chat = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768")

In [12]:
import pandas as pd

In [13]:
#load data from an Excel file
df = pd.read_excel("intercontinental_cleaned_reviews.xlsx")
df

Unnamed: 0.1,Unnamed: 0,0,1,2,3
0,1,1,Andreas Lukasczyk,5/5,The Intercontinental Warsaw exceeded all expec...
1,3,3,Philip Smith,5/5,Stayed here on a journey around Europe. As a L...
2,4,4,Andrea Giovanni Mundo,5/5,Great hotel in a great position!\nRooms are cl...
3,5,5,Paul Coates,1/5,Everything you'd expect in a luxury hotel at t...
4,6,6,Adam Pietrasiak,1/5,"It looks old in a bad way, the staff was unfri..."
...,...,...,...,...,...
289,324,324,Melisa Kirsz,5/5,I recently spent some amazing moments at this ...
290,325,325,Jan R.,4/5,The swimming pool upstairs with a view of the ...
291,326,326,Marcin Huwer,2/5,"Another hotel stay, IHG membership, Platinum.\..."
292,328,328,Pawel Z,3/5,"Pros:\n- Very large, clean rooms, comfortable ..."


In [48]:
df[3][0]

'The Intercontinental Warsaw exceeded all expectations – a full 5-star experience!\n\nThe hotel’s central location is perfect, offering breathtaking views of the city that truly elevate the stay. A standout feature is the Club Lounge, which provides an exceptional atmosphere, outstanding service, and adds a special touch to the overall experience.\n\nThe rooms are spacious, impeccably clean, and incredibly comfortable. The beds ensure top-tier sleep quality, making it easy to relax after a busy day exploring Warsaw. The staff were consistently friendly, professional, and attentive, enhancing the entire stay.\n\nThe value for money is remarkable. The quality and comfort offered are worth every penny.\n\nA fantastic hotel that I would highly recommend to anyone visiting Warsaw. I’m already looking forward to my next stay!'

In [14]:
#import LangChain tools for text processing
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.dataframe import DataFrameLoader

In [15]:
rating = df[2]
review = df[3]

# Concatenate the two columns as strings and put them into a list. 
concatenated_list = [str(a).replace("\n", " ") + " " + str(b).replace("\n", " ") for a, b in zip(review, rating)]

review_df = pd.DataFrame(concatenated_list)


loader = DataFrameLoader(review_df, 0)

# Initialize RecursiveCharacterTextSplitter with chunk_size and chunk_overlap
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=400,           #max chunk size
    chunk_overlap=30,         #overlap between chunks
    length_function=len,
    add_start_index=True,
    strip_whitespace=True
)

#split the documents
docs = loader.load_and_split(text_splitter=text_splitter)
print(len(docs))

525


In [16]:
for i in range(5):
    print(docs[i])
    print("-----------------------------\n")

page_content='The Intercontinental Warsaw exceeded all expectations – a full 5-star experience!  The hotel’s central location is perfect, offering breathtaking views of the city that truly elevate the stay. A standout feature is the Club Lounge, which provides an exceptional atmosphere, outstanding service, and adds a special touch to the overall experience.  The rooms are spacious, impeccably clean, and' metadata={'start_index': 0}
-----------------------------

page_content='impeccably clean, and incredibly comfortable. The beds ensure top-tier sleep quality, making it easy to relax after a busy day exploring Warsaw. The staff were consistently friendly, professional, and attentive, enhancing the entire stay.  The value for money is remarkable. The quality and comfort offered are worth every penny.  A fantastic hotel that I would highly recommend to anyone visiting' metadata={'start_index': 372}
-----------------------------

page_content='recommend to anyone visiting Warsaw. I’m alr

In [17]:
#import Transformer library to use embedding model
from transformers import AutoModel

In [18]:
#load embedding model
embedding_model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-en', trust_remote_code=True)

In [19]:
#define a function to generate embeddings from text
def get_embedding(text):
  return embedding_model.encode(text).tolist()

In [20]:
#embeddings for document chunks
embeddings = [get_embedding(elem.page_content) for elem in docs]
embeddings[0]

[-0.4270114302635193,
 -0.23767408728599548,
 0.7974557876586914,
 0.4106813967227936,
 -0.13096587359905243,
 0.3300248682498932,
 -0.37509825825691223,
 -0.952001690864563,
 0.7272571325302124,
 0.16498589515686035,
 -0.6879838109016418,
 -0.8207939863204956,
 -0.39522239565849304,
 0.5114736557006836,
 0.07909237593412399,
 0.5228529572486877,
 -0.09808088093996048,
 0.20104730129241943,
 0.1773175299167633,
 -0.19787117838859558,
 -0.31944283843040466,
 -0.16833817958831787,
 0.07277265191078186,
 -0.2855072617530823,
 0.11945456266403198,
 0.4611414074897766,
 0.3523777723312378,
 -0.15145157277584076,
 0.31761282682418823,
 0.1306292712688446,
 0.2524340748786926,
 0.3566110134124756,
 -0.5620642900466919,
 -0.2602449655532837,
 0.261967271566391,
 -0.07213827967643738,
 -0.047015853226184845,
 -0.21850094199180603,
 0.31190401315689087,
 0.6340776681900024,
 -0.24203233420848846,
 -0.07995321601629257,
 -0.43634575605392456,
 1.020460844039917,
 -0.6091925501823425,
 0.107097782

In [21]:
#import faiss for vector storage
import faiss

In [22]:
#create a faiss index to store embeddings
def create_faiss_index(embeddings):
    dimension = embeddings.shape[1] #define number of dimensions
    index = faiss.IndexFlatL2(dimension) #create a vector store
    index.add(embeddings) #add embeddings to the index
    return index

In [23]:
#function to search for the most relevant document embedding from query.
def search(query, index, docs, k=3):
    query_embedding = get_embedding(query) #convert query to embedding
    distances, indices = index.search(np.array([query_embedding]), k) #find top k closest embeddings
    return [docs[i].page_content for i in indices[0]]

In [24]:
import numpy as np

In [25]:
faiss_index = create_faiss_index(np.array(embeddings))

In [26]:
#define a query. query -> change to embedding -> take the closest embeddings for documents
query = "What are the downsides of the hotel?"
search(query, faiss_index, docs, k=4)

["empty.  In addition, a great location - right in the center, close to the metro and railway station, next to a large shopping center.  The biggest disadvantage of the hotel is apparently the passage of time. It's not bad here either, the design is not that anachronistic, but you can still feel it a bit in the rooms and restaurants (which are quite boring in terms of decor). Moreover, the rooms",
 'On the plus side, cleanliness, class of the hotel, beautiful view from every higher room, no matter which side, location, well-equipped gym, pleasant swimming pool with a view, saunas.  The disadvantages are the shower - leaking, bathroom flooded due to leaky shower door and water temperature impossible to set, no clear indication of which one to choose. And when it does, it can alternately fly',
 'example, additional points apart from those from IHG  To sum up, my negative rating is due to the experience with the front desk, which for me is unacceptable in a 5* hotel, especially in the Int

In [27]:
#import LangChain embedding model for vector storage
from langchain.embeddings import HuggingFaceEmbeddings

In [28]:
#load an embedding model using LangChain
embedding_model = HuggingFaceEmbeddings(model_name='jinaai/jina-embeddings-v2-base-en',
                                        model_kwargs={'trust_remote_code': True})

  embedding_model = HuggingFaceEmbeddings(model_name='jinaai/jina-embeddings-v2-base-en',


In [29]:
#import FAISS vector store from langchain
from langchain_community.vectorstores import FAISS

In [30]:
#build vector store with FAISS from langchain.
vector_store = FAISS.from_documents(docs, embedding_model)

In [31]:
#prompt template
prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:
<context>
{context} 
</context>
Question: {input}""")

In [32]:
from langchain.chains.combine_documents import create_stuff_documents_chain

In [33]:
#document processing chain
document_chain = create_stuff_documents_chain(chat,prompt)

In [34]:
from langchain.chains import create_retrieval_chain

In [35]:
#create retriever from the FAISS vector store
retreiver = vector_store.as_retriever()

In [36]:
#combine retrieval and generation into a RAG 
retreival_chain = create_retrieval_chain(retreiver,document_chain)

In [38]:
def askChat(user_prompt):
    print(retreival_chain.invoke({"input": user_prompt})["answer"])

In [39]:
askChat("what are some good things about this hotel?")

Based on the provided context, here are some good things about this hotel:

1. Good location
2. Clean and spacious rooms
3. Good service
4. Excellent rooftop gym and swimming pool
5. Varied and tasty breakfast
6. Staff friendliness and excellent service
7. Highlight of swimming pool and spa on the rooftop
8. Comfortable beds
9. Great spa
10. Well-located for business trips
11. Superb cleanliness
12. Cozy and pleasant atmosphere in the One Bar.


In [42]:
askChat("What is the most reported downside of the Intercontinental Hotel?")

Based on the provided context, the most reported downside of the Intercontinental Hotel in Warsaw is the unprofessionalism and discriminatory treatment experienced during check-in. The reviewer mentions that this experience did not align with their expectations of exceptional service and hospitality that the Intercontinental brand usually symbolizes for them.


In [43]:
askChat("what are some breakfasts?")

Based on the provided context, some of the breakfast options include:

* Freshly prepared egg dishes, including pre-cooked fresh eggs and an omelette station
* Bacon and sausage
* A cheese and meat section
* Fresh fruit
* Jams and jellies
* Honeycomb
* Breads and pastries
* Vegan and vegetarian dishes
* Fresh juices and fruits
* Dumplings and miso soup
* Delicious yogurts
* Sweet pastries
* Warm savory dishes

The variety of dishes offered is described as impressive and the taste is said to remain at a high level.


In [44]:
askChat("what is the pool temperature?")

The context does not provide specific information about the temperature of the pool water. The first guest mentioned that the swimming pool was too cold to use, while the second guest did not comment on the temperature. Therefore, it is not possible to give a definitive answer based on the provided context.
