In [139]:
from langchain_chroma import Chroma
from text_retrieval import create_vector_embeddings
from image_retrieval_v2 import load_images_from_folder, generate_image_embedding, retrieve_images_clip
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
import re

from langchain import hub
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain.tools import tool
import json

import matplotlib.pyplot as plt
from PIL import Image
import os

from IPython.display import Image, display
import requests
import time
from openai import RateLimitError
from dotenv import load_dotenv
import psycopg2
from sqlalchemy import create_engine


In [140]:
from sentence_transformers import SentenceTransformer
import torch
model_id = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(model_id)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)    

SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

In [141]:
chosen_hotel = 'Sheffield Metropolitan'
hotel_email = 'stay@sheffieldmet.co.uk'

In [129]:
# Load environment variables from .env file
load_dotenv()

# Get database credentials from environment variables
db_host = os.getenv("DB_HOST")
db_user = os.getenv("DB_USER")
db_password = os.getenv("DB_PASSWORD")
db_port = os.getenv("DB_PORT")
db_name = os.getenv("DB_NAME")

In [142]:
#Test DB Connection
try:
    connection = psycopg2.connect(
        host=db_host,
        database=db_name,
        user=db_user,
        password=db_password,
        port=db_port
    )
    
    # Create a cursor object to execute SQL queries
    cursor = connection.cursor()
    
    # Example: Execute a simple query
    cursor.execute("SELECT version();")
     
    # Fetch the result
    db_version = cursor.fetchone()
    print(f"PostgreSQL database version: {db_version}")

except (Exception, psycopg2.Error) as error:
    print("Error while connecting to PostgreSQL:", error)

finally:
    # Close the cursor and connection
    if connection:
        cursor.close()
        connection.close()
        print("PostgreSQL connection is closed")

PostgreSQL database version: ('PostgreSQL 16.3 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 7.3.1 20180712 (Red Hat 7.3.1-12), 64-bit',)
PostgreSQL connection is closed


In [160]:
#Choose File Directories
if chosen_hotel == 'Sheffield Metropolitan':
    image_dir = '/Users/lohithkonathala/hc-autoresponse/sheffield_image_data'
    response_info_dir = '/Users/lohithkonathala/hc-autoresponse/sheffield_response_data'
elif chosen_hotel == 'Mercure Hyde Park':
    response_info_dir = '/Users/lohithkonathala/hc-autoresponse/multimodal_agent/mercurehydepark/docs'
    image_dir = '/Users/lohithkonathala/hc-autoresponse/multimodal_agent/mercurehydepark/images'


#Generate Embeddings for Text Information and Intialise Chroma Vector Database 
vectorstore = create_vector_embeddings(response_info_dir)
retriever = vectorstore.as_retriever()

#Generate Captions for Images
image_batch, image_paths = load_images_from_folder(image_dir)
img_embeddings = generate_image_embedding(image_batch)


print('Resources Intialised')

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Resources Intialised


In [241]:
from image_retrieval import caption_images, find_nearest_caption, get_image_path_from_caption
captioned_images = caption_images(image_dir)
captions = list(captioned_images.values())
image_names = [os.path.splitext(filename)[0] for filename in captioned_images.keys()]
captions_revised = [f"{filename}: {caption}" for filename, caption in zip(image_names, captions)]
captioned_images_revised = dict(zip(image_names, captions_revised))

In [239]:
query = 'Show me a standard king'
best_caption, css_score = find_nearest_caption(query, captions_revised)
image_path = get_image_path_from_caption(best_caption, captioned_images_revised, image_dir)

Batches: 100%|██████████| 1/1 [00:00<00:00, 12.73it/s]


In [240]:
image_path = get_image_path_from_caption(best_caption, captioned_images_revised, image_dir)
image_path += '.jpg'
print(image_path)

/Users/lohithkonathala/hc-autoresponse/sheffield_image_data/standard-king-room.jpg


In [132]:
def extract_and_remove_images(text, image_dir='/Users/lohithkonathala/hc-autoresponse/sheffield_image_data'):
    pattern = r'!\[(.*?)\]\((.*?)\)'
    matches = re.findall(pattern, text)
    cleaned_text = re.sub(pattern, '', text).strip()
    
    captions = [match[0] for match in matches]
    paths = []
    
    for match in matches:
        path = re.sub(r'^file:///', '', match[1])
        if not os.path.isabs(path) and image_dir:
            path = os.path.join(image_dir, path)
        paths.append(path)
    
    return captions, paths, cleaned_text

In [133]:
@tool 
def get_information_retrieval_tool(query: str, chunk_size: int = 1000):
    """
    Retrieves relevant documents based on a query, splits them into chunks, and returns a formatted string.

    This function uses a retriever to find relevant documents, splits them into chunks,
    and returns a string containing the first k chunks.

    Args:
        query (str): The search query to retrieve relevant documents.
        retriever: An object with a get_relevant_documents method to retrieve documents.
        k (int): The maximum number of chunks to return.
        chunk_size (int, optional): The size of each text chunk. Defaults to 1000.

    Returns:
        str: A string containing the first k chunks of text from the retrieved documents,
             separated by double newlines.
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
    
    def format_docs(docs):
        chunks = []
        for doc in docs:
            chunks.extend(text_splitter.split_text(doc.page_content))
        return "\n\n".join(chunks[:5])
    
    relevant_docs = retriever.get_relevant_documents(query)
    return format_docs(relevant_docs)


@tool
def get_relevant_images_tool(query: str):
    """
    Retrieve the file path of a relevant image based on a given query.
    """
    _, image_paths = load_images_from_folder(image_dir)
    top_k_results = retrieve_images_clip(query, img_embeddings, image_paths, top_k=3)
    top_k_img_paths = [path for path, _ in top_k_results]
    captioned_images = caption_images(top_k_img_paths)
    image_paths = list(captioned_images.keys())
    most_relevant_image_path = image_paths[0]
    return most_relevant_image_path

@tool
def get_booking_info(first_name, last_name, booking_reference):
    """
    Retrieves booking information from the operadashboard table for a given guest.
    Args:
    first_name (str): The first name of the guest.
    last_name (str): The last name of the guest.
    booking_reference (str): The booking reference number.
    Returns:
    list: A list of tuples, where each tuple contains the booking information for a row.
    Returns an empty list if no results are found or if an error occurs.
    Note:
    - This function queries a PostgreSQL database for booking information.
    - It filters results for hotel_id = 6, the provided first name, last name, and booking reference.
    - Results are ordered by stay_date in descending order.
    """
    booking_info = []
    try:
        connection = psycopg2.connect(
            host=db_host,
            database=db_name,
            user=db_user,
            password=db_password,
            port=db_port
        )
        # Create a cursor object to execute SQL queries
        cursor = connection.cursor()
        # Your SQL query
        sql_query = """
            SELECT *
            FROM operadashboard
            WHERE hotel_id = 6
            AND first_name = %s
            AND last_name = %s
            AND booking_reference = %s
            ORDER BY stay_date DESC;
        """
        # Execute the query with parameters
        cursor.execute(sql_query, (first_name, last_name, booking_reference))
        # Fetch the results
        booking_info = cursor.fetchall()
    except (Exception, psycopg2.Error) as error:
        print("Error while connecting to PostgreSQL or executing query:", error)
    finally:
        # Close the cursor and connection
        if connection:
            cursor.close()
            connection.close()
    return booking_info

@tool
def get_room_rates(stay_date, room_type):
    """
    Retrieves room rates for a specific stay date and room type.
    Args:
    stay_date (date): The date of stay.
    room_type (str): The type of room.
    Returns:
    list: A list of dictionaries, where each dictionary contains the rate information for a row.
    Returns an empty list if no results are found or if an error occurs.
    Note:
    - This function queries a PostgreSQL database for room rate information.
    - It filters results for hotel_id = 6, the provided stay date, and room type.
    - Results include refundable and non-refundable rates, adult count, and meal information.
    """
    results = []
    try:
        connection = psycopg2.connect(
            host=db_host,
            database=db_name,
            user=db_user,
            password=db_password,
            port=db_port
        )
        cursor = connection.cursor()
        sql_query = """
        WITH latest_rate_update AS (
            SELECT MAX(date_update) AS latest_update
            FROM rate_update
            WHERE hotel_id = 6
        ),
        ota_rooms AS (
            SELECT DISTINCT
                o.ota_room_id,
                r."name",
                o.room_id
            FROM ota_room o
            JOIN room r ON r.room_id = o.room_id
            WHERE o.hotel_id = 6
        ),
        latest_rates AS (
            SELECT
                MIN(CASE WHEN r.refundable THEN r.amount END) AS refundable_rate,
                MIN(CASE WHEN NOT r.refundable THEN r.amount END) AS non_refundable_rate,
                r.stay_date,
                o.name AS room_name,
                r.adultcount,
                r.breakfast,
                r.lunch,
                r.dinner
            FROM
                rate_new r
            JOIN
                rate_update u ON r.rate_update_id = u.rate_update_id
            JOIN
                ota_rooms o ON o.ota_room_id = r.ota_room_id
            JOIN
                latest_rate_update lru ON u.date_update = lru.latest_update
            WHERE
                u.hotel_id = 6
                AND r.stay_date = %s
                AND o.name = %s
            GROUP BY
                r.stay_date,
                o.name,
                r.adultcount,
                r.breakfast,
                r.lunch,
                r.dinner
        )
        SELECT *
        FROM latest_rates
        ORDER BY stay_date DESC, room_name, adultcount;
        """
        cursor.execute(sql_query, (stay_date, room_type))
        column_names = [desc[0] for desc in cursor.description]
        results = [dict(zip(column_names, row)) for row in cursor.fetchall()]
    except (Exception, psycopg2.Error) as error:
        print("Error while connecting to PostgreSQL or executing query:", error)
    finally:
        if connection:
            cursor.close()
            connection.close()
    return results

@tool
def response_review_tool(query, response):
    """
    Reviews a multi-modal response for consistency and image relevance.
    
    This tool extracts images from a response, processes them, and evaluates both the 
    textual content and images for consistency and relevance using a language model.
    
    Args:
        query (str): The original query that prompted the response
        response (str): The complete response including text and embedded images
        
    Returns:
        str: A review analysis containing:
            - Identification of any inconsistencies in the response
            - Assessment of image relevance to the response
            - Overall evaluation of response quality
    """
    # Extract images and clean response
    captions, extracted_image_paths, cleaned_response = extract_and_remove_images(response)
    
    model = ChatOpenAI(model="gpt-4o-mini")
    
    # Load images into a format that can be passed to the prompt
    image_data = []
    for image_path in extracted_image_paths:
        with open(image_path, 'rb') as img_file:
            import base64
            image_data.append(base64.b64encode(img_file.read()).decode('utf-8'))
    
    # Create the prompt template with image data
    prompt = ChatPromptTemplate.from_template("""
        You will be provided with a query and a response from a Multi-Modal Response Agent. 
        Your goal is to review the response check whether the attached image(s) 
        are relevant to the response. If the image is not relevant remove it from the response.
        Query: {query}
        Response: {response}
        Images: {images}
    """)
    
    output_parser = StrOutputParser()
    chain = prompt | model | output_parser
    
    # Pass the image data as a list to the chain
    result = chain.invoke({
        "query": query, 
        "response": cleaned_response, 
        "images": image_data
    })
    
    return result

   



In [135]:
baseModelInstruction = f"""
    You are an AI assistant specializing in hotel guest services. Your primary objectives are to provide professional, informative responses to guest inquiries while identifying upsell opportunities. Follow these steps precisely:

    1. Task Decomposition 
    Analyze the user's inquiry to determine:
    - Information required to respond (including relevant image data if applicable)
    - Potential upsell opportunities

    2. Tool Selection and Usage:

    <get_booking_info>
    Use when: Adding extra nights, pre-registering for early check-in, or requesting room upgrades.
    Retrieves: Guest's booking information (rates, room name, check-in/out dates).
    Required inputs: first_name, last_name, booking_reference
    If missing information: Request it from the guest.
    </get_booking_info>

    <get_room_rates>
    Use when: Quoting prices for additional nights or room upgrades.
    Required inputs: stay_date, room_type
    Returns: refundable and non-refundable room rates in Pound Sterling (£) - only return the non-refundable rate to the user and DO not disclose that it is non-refundable.
    If missing information: Request it from the guest.
    </get_room_rates>

    <information_retrieval_tool>
    Use when: Answering queries about hotel amenities, check-in, food & breakfast options.
    Retrieves: Relevant information from the hotel's knowledge base.
    Required input: [guest-query]
    Note: Use ONLY the retrieved information in your response.
    </information_retrieval_tool>

    <image_selection_tool>
    Use when: Enhancing responses about hotel amenities, check-in, food & breakfast options.
    Required input: Context-based query from guest's inquiry
    Note: Use ONLY when an image adds meaningful value to the guest experience.
    </image_selection_tool>

    </response_review_tool>
    Use when: Reviewing responses generated by the agent to ensure accuracy. 
    Required input: guest enquiry, generated text response, image data (if present)
    </response_review_tool>

    3. Example Workflows

    </example1>
    Guest request: Add an extra night to booking
    Expected outcome: Provide a quote for the additional night
    Steps:
    1. Check for required booking information
    2. Retrieve booking details with get_booking_info
    3. Get room rate for extra night using get_room_rates
    4. Provide quote to guest
    </example1>

    </example2>
    Guest request: Early check-in
    Expected outcome: Offer pre-registration option with pricing
    Steps:
    1. Retrieve early check-in information with information_retrieval_tool
    2. Get booking details with get_booking_info
    3. Calculate pre-registration cost using get_room_rates
    4. Present option to guest
    </example2>

    </example3>
    Guest request: Breakfast timings
    Expected outcome: Provide information with relevant image
    Steps:
    1. Retrieve breakfast details with information_retrieval_tool
    2. Select relevant image using image_selection_tool
    3. Compose response with information and image
    </example3>

    </example4>
    Guest request: Room upgrade
    Expected outcome: offer details of potential room upgrades to guest and negotiate on price if needed
    Steps:
    1. Retrieve information on room-types using the information_retrieval_tool
    2. Get booking details with get_booking_info, specifically the room_name and rate at which the booking was made
    3. Use get_room_rates to find the new rate for the room_name requested for the desired stay-dates
    4. Calculate the upgrade cost/night and quote this to the guest. 
    5. If the guest refuses use negotiation tactices to convince them i.e. offer a complimentary free-breakfast (if applicable) or reduce the rate by 10%
    </example4>

    4. Request For Information:
    Structure:
    Dear [Guest Name/Guest],

    [Ordered list of information required e.g. 
    1. </info1>
    2. </info2>
    etc. ]

    Kind Regards,
    {chosen_hotel}

    5. Response Composition:
    Structure:
    Dear [Guest Name/Guest],

    [Direct answer to inquiry using retrieved information]

    [Upsell suggestion, if applicable]

    [Brief closing statement]
    [If booking amendment include: If you would like to proceed with this amendment please contact our hotel concierge at {chosen_hotel} via email: {hotel_email}]

    Kind regards,
    {chosen_hotel}

    - Personalize greeting when possible
    - Include relevant image file paths in the output
    - Maintain a professional, courteous tone throughout


    Key Guidelines:
    - Utilize the Memory Module for multi-turn conversations, ensuring context-aware responses
    - Provide clear, concise answers that directly address guest concerns
    - NEVER fabricate information; use only data from information_retrieval_tool
    - Identify and tactfully present relevant upsell opportunities
    - Ensure all image queries are specific and contextually appropriate
    - Optimize for clarity, accuracy, and guest satisfaction
    - Maintain consistency in tone and information across multiple interactions

    6. Check Response
    Call the response_review_tool with the query, response and any image data to review the response prior to displaying this to the user.
    Use the feedback from the response_review_tool to augment the response to improve its quality. 
    Accuracy is paramount and this tool will ensure the response is coherent, relevant and at the necessary standard for interacting with guests. 

    Execute these instructions precisely for each guest inquiry.
"""

In [137]:
def setup_response_agent():
  tools = [get_information_retrieval_tool, get_relevant_images_tool, get_booking_info, get_room_rates, response_review_tool]
  base_prompt = hub.pull("langchain-ai/openai-functions-template")
  prompt = base_prompt.partial(instructions=baseModelInstruction)
  llm = ChatOpenAI(temperature=0.1, model="gpt-4o")
  agent = create_openai_functions_agent(llm, tools, prompt)
  agent_executor = AgentExecutor(
      agent=agent,
      tools=tools,
      verbose=False,
  )
  return agent_executor

In [138]:
enquiry = 'Hi, I want to upgrade my room?'
agent = setup_response_agent()
response = agent.invoke({
    "input": {
        "email_content": enquiry

    }
})

# Pretty-print the JSON output
output_text = response["output"]
print(output_text)

Dear Guest,

To assist you with upgrading your room, I will need the following information:

1. Your first name
2. Your last name
3. Your booking reference number

Once I have this information, I can provide you with details on available room upgrades and any associated costs.

Kind Regards,  
Sheffield Metropolitan
