In [61]:
import numpy as np
import pandas as pd
import time
import redis
import time

from redis.commands.search.field import VectorField
from redis.commands.search.field import TextField
from redis.commands.search.field import TagField
from redis.commands.search.query import Query

In [62]:
redis_conn = redis.Redis(
  host='localhost',
  port=6379,
  password='')

In [63]:
MAX_TEXT_LENGTH=512
NUMBER_PRODUCTS=10000

def auto_truncate(val):
    return str(val)[:MAX_TEXT_LENGTH]


In [64]:
room_data = pd.read_csv(
    'app/data/airbnb.csv', delimiter=',', encoding_errors='ignore', on_bad_lines='skip',
    converters={'bullet_point': auto_truncate,'amenities':auto_truncate,'name':auto_truncate}
)

  room_data = pd.read_csv(


In [65]:
room_data = room_data[
    [
        'listing_id', 'name', 'host_id', 'host_since', 'host_location',
       'host_is_superhost', 'host_total_listings_count',
       'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
       'district', 'city', 'property_type',
       'room_type', 'accommodates', 'bedrooms', 'amenities', 'price',
       'minimum_nights', 'maximum_nights', 'review_scores_rating',
       'review_scores_accuracy', 'review_scores_cleanliness',
       'review_scores_checkin', 'review_scores_communication',
       'review_scores_location', 'review_scores_value', 'instant_bookable'
    ]
]

In [66]:
# room_data = room_data.drop_duplicates(subset=['Area'], keep='first', ignore_index=True)

In [67]:
room_data = room_data.sample(10000).reset_index(drop=True)

In [68]:
room_data.iloc[0]

listing_id                                                              42047437
name                             Bright & spacious condo off Williamsburg bridge
host_id                                                                  6880965
host_since                                                            2013-06-12
host_location                                  New York, New York, United States
host_is_superhost                                                              f
host_total_listings_count                                                    0.0
host_has_profile_pic                                                           t
host_identity_verified                                                         f
neighbourhood                                                       Williamsburg
district                                                                Brooklyn
city                                                                    New York
property_type               

In [69]:
#get the first 1000 products with non-empty item keywords
rooms_metadata = room_data.head(NUMBER_PRODUCTS).to_dict(orient='index')

In [70]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')


item_keywords =  [
    ', '.join(str(rooms_metadata[i][key]) for key in ['name', 'host_location', 'neighbourhood', 'property_type', 'amenities', 'price'])
    for i in rooms_metadata.keys()
]
room_context = {
    rooms_metadata[i]['listing_id']:', '.join(str(rooms_metadata[i][key]) for key in ['name', 'host_location', 'neighbourhood', 'property_type', 'amenities', 'price'])
    for i in rooms_metadata.keys()
}
item_keywords_vectors = [model.encode(sentence) for sentence in item_keywords]

In [71]:
item_keywords[0]

'Bright & spacious condo off Williamsburg bridge, New York, New York, United States, Williamsburg, Entire apartment, ["Refrigerator", "Microwave", "Shampoo", "Air conditioning", "Kitchen", "Oven", "Iron", "Hangers", "Smoke alarm", "Washer", "Dedicated workspace", "Host greets you", "Dryer", "Fire extinguisher", "Hot water", "Long term stays allowed", "Hair dryer", "Essentials", "Dishes and silverware", "TV", "Dishwasher", "Wifi", "Heating", "Cooking basics", "Stove", "Elevator", "Carbon monoxide alarm"], 175'

In [72]:
len(item_keywords_vectors)
len(rooms_metadata)
# Check one of the products
rooms_metadata[2]

{'listing_id': 26497309,
 'name': 'Sun Drenched One Bedroom in the East Village',
 'host_id': 40594187,
 'host_since': '2015-08-05',
 'host_location': 'New York, New York, United States',
 'host_is_superhost': 'f',
 'host_total_listings_count': 1.0,
 'host_has_profile_pic': 't',
 'host_identity_verified': 't',
 'neighbourhood': 'East Village',
 'district': 'Manhattan',
 'city': 'New York',
 'property_type': 'Entire apartment',
 'room_type': 'Entire place',
 'accommodates': 2,
 'bedrooms': 1.0,
 'amenities': '["Smoke alarm", "Essentials", "TV", "Wifi", "Shampoo", "Heating", "Air conditioning", "Long term stays allowed", "Carbon monoxide alarm", "Kitchen"]',
 'price': 150,
 'minimum_nights': 30,
 'maximum_nights': 1125,
 'review_scores_rating': 90.0,
 'review_scores_accuracy': 10.0,
 'review_scores_cleanliness': 9.0,
 'review_scores_checkin': 9.0,
 'review_scores_communication': 9.0,
 'review_scores_location': 9.0,
 'review_scores_value': 10.0,
 'instant_bookable': 'f'}

In [73]:
def load_vectors(client, product_metadata, vector_dict, vector_field_name):
    p = client.pipeline(transaction=False)
    for index in product_metadata.keys():    
        #hash key
        key='product_ID:'+ str(product_metadata[index]['listing_id'])
        
        #hash values
        item_metadata = product_metadata[index]
        item_keywords_vector = vector_dict[index].astype(np.float32).tobytes()
        item_metadata[vector_field_name]=item_keywords_vector
        
        # HSET
        p.hset(key,mapping=item_metadata)
            
    p.execute()

def create_flat_index (redis_conn,vector_field_name,number_of_vectors, vector_dimensions=512, distance_metric='L2'):
    redis_conn.ft().create_index([
        VectorField(vector_field_name, "FLAT", {"TYPE": "FLOAT32", "DIM": vector_dimensions, "DISTANCE_METRIC": distance_metric, "INITIAL_CAP": number_of_vectors, "BLOCK_SIZE":number_of_vectors }),
        TextField("property_type", as_name='property_type'),
        TextField("name", as_name='name'),
        TextField("amenities", as_name='amenities'),
        TextField("city", as_name='city')
    ]) 
    



In [74]:

ITEM_KEYWORD_EMBEDDING_FIELD='item_keyword_vector'
TEXT_EMBEDDING_DIMENSION=768
NUMBER_PRODUCTS=10000

print ('Loading and Indexing + ' +  str(NUMBER_PRODUCTS) + ' products')

#flush all data
redis_conn.flushall()

Loading and Indexing + 10000 products


True

In [75]:

#create flat index & load vectors
create_flat_index(
    redis_conn, ITEM_KEYWORD_EMBEDDING_FIELD,NUMBER_PRODUCTS,TEXT_EMBEDDING_DIMENSION,'COSINE'
)
load_vectors(redis_conn,rooms_metadata,item_keywords_vectors,ITEM_KEYWORD_EMBEDDING_FIELD)

In [76]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

MODEL = 'gpt-3.5-turbo'
openai_api_key = "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"


llm = OpenAI(
    model_name=MODEL,
    temperature=0.3,
    openai_api_key=openai_api_key
)

booking_prompt = PromptTemplate(
    input_variables=["product_description"],
    template="Create comma seperated product keywords to perform a query on a airbnb dataset for this user input: "
             "{product_description}",
)


from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=booking_prompt)




In [26]:
userinput = input("Hey im a Roo(m)bot, how can i help you today? ")
print("User:", userinput)

# Run the chain only specifying the input variable.
keywords = chain.run(userinput)

topK=3
#vectorize the query
query_vector = model.encode(keywords).astype(np.float32).tobytes()

#prepare the query
q = Query(
    f'*=>[KNN {topK} @{ITEM_KEYWORD_EMBEDDING_FIELD} $vec_param AS vector_score]'
).sort_by('vector_score').paging(0,topK).return_fields(
        'vector_score','property_type','name','amenities', 'city'
    ).dialect(2)
params_dict = {"vec_param": query_vector}

#Execute the query
results = redis_conn.ft().search(q, query_params = params_dict)


Hey im a Roo(m)bot, how can i help you today?  barcelona apartment close by to some good bars


User: barcelona apartment close by to some good bars


In [53]:
full_result_string = ''
for product in results.docs:
    full_result_string += ' '.join(
        [
            product.property_type, product.name, f", amenities are:", product.amenities," Located in city:", product.city,
            'ID of this booking is:', product.id,
            "\n\n\n"
        ]
    )


from langchain.memory import ConversationBufferMemory

BOOKING_AGENT_TEMPLATE = """
You are a room booking assistant. Be kind, detailed and try to sell the booking of the apartment to me. 
Present the three given queried search result in a nice way as answer to the user input.

Collect entities after user confirms booking choice, we need these entities: booking_date_start, booking_date_end, name, surname. 

dont ask questions back! 

ALWAYS PROVIDE RESPONSE AS JSON STRING with keys:
ANSWER, BOOKING_CONFIRMED, CONFIRMED_BOOKING_DATES, NAME and the ID of this.

PLEASE ALWAYS HAVE THE JSON STRING PART



{chat_history}
Human: {user_msg}
Chatbot:"""


BOOKING_PROMPT = PromptTemplate(
    input_variables=["chat_history", "user_msg"],
    template=BOOKING_AGENT_TEMPLATE
)

memory = ConversationBufferMemory(memory_key="chat_history")

llm_chain = LLMChain(
    llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0.8, openai_api_key=openai_api_key), 
    prompt=BOOKING_PROMPT, 
    verbose=False, 
    memory=memory,
)


answer = llm_chain.predict_and_parse(user_msg= f"{full_result_string} ---\n\n {userinput}")
print("Bot:", answer)
time.sleep(0.5)

while True:
    follow_up = input("")
    print("User:", follow_up)
    answer = llm_chain.predict(
        user_msg=follow_up
    )
    print("Bot:", answer)
    time.sleep(0.5)



Bot: ANSWER: I'd be happy to help you find a great apartment in Barcelona! Here are three options that match your search criteria:

1. Entire apartment in the heart of the city, close to great bars and restaurants. Amenities include heating, TV, kitchen, essentials, washer, smoke alarm, and Wi-Fi. ID of this booking is: product_ID:3719201
2. Cozy apartment in a lively neighborhood known for its nightlife, with amenities including heating, kitchen, essentials, washer, and Wi-Fi. ID of this booking is: product_ID:5478112
3. Spacious apartment located in the trendy Gothic Quarter, with easy access to bars and other attractions. Amenities include heating, TV, kitchen, essentials, washer, smoke alarm, and Wi-Fi. ID of this booking is: product_ID:6242839

Please let me know if you are interested in booking any of these options! If so, please provide me with the booking dates, your full name and surname so I can make it happen for you.

JSON STRING: {"ANSWER": "I'd be happy to help you find a

 the second one please


User: the second one please
Bot: ANSWER: Great choice! I have booked the cozy apartment in the lively neighborhood for you. The ID of this booking is: product_ID:5478112. Please confirm your booking dates and provide your full name and surname so I can finalize the booking for you.

JSON STRING: {"ANSWER": "Great choice! I have booked the cozy apartment in the lively neighborhood for you. The ID of this booking is: product_ID:5478112. Please confirm your booking dates and provide your full name and surname so I can finalize the booking for you.", "BOOKING_CONFIRMED": false, "CONFIRMED_BOOKING_DATES": null, "NAME": null, "ID": "product_ID:5478112"}


 Erik Berg, 15.04.2023 till 19.04.2023


User: Erik Berg, 15.04.2023 till 19.04.2023
Bot: ANSWER: Thank you for providing your booking details, Erik Berg! I have reserved the cozy apartment in the lively neighborhood for you from 15.04.2023 to 19.04.2023. Your booking is not yet confirmed, please confirm if these dates work for you. 

JSON STRING: {"ANSWER": "Thank you for providing your booking details, Erik Berg! I have reserved the cozy apartment in the lively neighborhood for you from 15.04.2023 to 19.04.2023. Your booking is not yet confirmed, please confirm if these dates work for you.", "BOOKING_CONFIRMED": false, "CONFIRMED_BOOKING_DATES": "15.04.2023 - 19.04.2023", "NAME": "Erik Berg", "ID": "product_ID:5478112"}


 That is perfect, thank you


User: That is perfect, thank you
Bot: ANSWER: Wonderful, I'm glad to hear that! Your booking for the cozy Barcelona apartment from 15.04.2023 to 19.04.2023 is now confirmed. Enjoy your stay! 

JSON STRING: {"ANSWER": "Wonderful, I'm glad to hear that! Your booking for the cozy Barcelona apartment from 15.04.2023 to 19.04.2023 is now confirmed. Enjoy your stay!", "BOOKING_CONFIRMED": true, "CONFIRMED_BOOKING_DATES": "15.04.2023 - 19.04.2023", "NAME": "Erik Berg", "ID": "product_ID:5478112"}


KeyboardInterrupt: Interrupted by user

In [45]:
answer

'ANSWER: Sure, I have found some great options for you in Barcelona! \n\nEntire apartment, cozy and modern 2BR near to bars, amenities are: ["Air conditioning", "Heating", "TV", "Kitchen", "Essentials", "Washer", "Wifi"] Located in city: Barcelona ID of this booking is: product_ID:6439201 \n\nPrivate room in stylish flat near to bars and restaurants, amenities are: ["Air conditioning", "Heating", "TV", "Kitchen", "Essentials", "Washer", "Wifi"] Located in city: Barcelona ID of this booking is: product_ID:7543129 \n\nEntire apartment, bright and modern 1BR near to bars and restaurants, amenities are: ["Air conditioning", "Heating", "TV", "Kitchen", "Essentials", "Washer", "Wifi"] Located in city: Barcelona ID of this booking is: product_ID:8352167 \n\nPlease let me know which one you would like to book, and provide me with your booking date start, booking date end, name, and surname. \n\nJSON STRING: {"ANSWER": "Sure, I have found some great options for you in Barcelona!", "BOOKING_CONF

In [52]:
print(re.findall(r'{.+}',answer)[0])

{"ANSWER": "Sure, I have found some great options for you in Barcelona!", "BOOKING_CONFIRMED": false, "CONFIRMED_BOOKING_DATES": null, "NAME": null, "ID": null}


In [41]:
import re

In [38]:
json_string = re

In [39]:
json.loads(json_string)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [77]:
BOOKING_AGENT_GET_INFO_TEMPLATE = """
You are a room booking assistant. Be nice and helpful and get these informations from the customer:
BOOKING_START, BOOKING_END, FULL_NAME, CITY, BUDGET, GUEST_COUNT

Example of how such conversation may go, only generate responses to the users last question:
Human: Hi, I'm looking to book a room in CITY.
assistant: Great, I can help with that. What's your budget?
Human: I'm looking to spend around BUDGET euros per month.
assistant: Okay, I've found a few options that fit your budget. Would you prefer to be in the city center or in a quieter area?
Human: I want to be close to the main attractions.
assistant: Got it. How many people will be staying in the room?
Human: It's just me.


Collect this information and ALWAYS ADD JSON STRING AT THE END OF THE RESPONSE AS JSON STRING with keys:
BOOKING_START, BOOKING_END, FULL_NAME, CITY, BUDGET, GUEST_COUNT

PLEASE ALWAYS HAVE THE JSON STRING PART

{chat_history}
Human: {user_msg}
Chatbot:"""

BOOKING_AGENT_TEMPLATE = """
You are a room booking assistant. Be kind, detailed and try to sell the booking of the apartment to me. 
Present the three given queried search result in a nice way as answer to the user input.

Collect entities when user confirms booking choice, we need these entities: CONFIRMED_BOOKING_START, CONFIRMED_BOOKING_END, FULL_NAME

ALWAYS ADD JSON STRING AT THE END OF THE RESPONSE, ALWAYS PROVIDE RESPONSE AS JSON STRING with keys:
ANSWER, BOOKING_CONFIRMED, CONFIRMED_BOOKING_START, CONFIRMED_BOOKING_END, NAME and the ID of this.

PLEASE ALWAYS HAVE THE JSON STRING PART


{chat_history}
Human: {user_msg}
Chatbot:"""

BOOKING_PROMPT_SELL = PromptTemplate(
    input_variables=["chat_history", "user_msg"],
    template=BOOKING_AGENT_TEMPLATE
)

BOOKING_PROMPT_ASK = PromptTemplate(
    input_variables=["chat_history", "user_msg"],
    template=BOOKING_AGENT_GET_INFO_TEMPLATE
)

In [83]:
def booking_get_requirement_info(userinput):
    memory = ConversationBufferMemory(memory_key="chat_history")
    llm_chain = LLMChain(
        llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0.8,
                   openai_api_key=openai_api_key),
        prompt=BOOKING_PROMPT_ASK,
        verbose=False,
        memory=memory,
    )

    answer = llm_chain.predict(user_msg=f"{userinput}")
    return "Bot:", answer

In [84]:
booking_get_requirement_info('I wanna book a room in Madrid')

('Bot:',
 'Sure, when would you like to check-in and check-out?\nJSON STRING: {"BOOKING_START": "", "BOOKING_END": "", "FULL_NAME": "", "CITY": "Madrid", "BUDGET": "", "GUEST_COUNT": ""}')