In [85]:
import numpy as np
import pandas as pd
import time
import redis
import time

from redis.commands.search.field import VectorField
from redis.commands.search.field import TextField
from redis.commands.search.field import TagField
from redis.commands.search.query import Query

In [86]:
redis_conn = redis.Redis(
  host='localhost',
  port=6379,
  password='')

In [87]:
MAX_TEXT_LENGTH=512
NUMBER_PRODUCTS=10000

def auto_truncate(val):
    return str(val)[:MAX_TEXT_LENGTH]


In [88]:
room_data = pd.read_csv(
    'app/data/airbnb.csv', delimiter=',', encoding_errors='ignore', on_bad_lines='skip',
    converters={'bullet_point': auto_truncate,'amenities':auto_truncate,'name':auto_truncate}
)

  room_data = pd.read_csv(


In [89]:
room_data = room_data[
    [
        'listing_id', 'name', 'host_id', 'host_since', 'host_location',
       'host_is_superhost', 'host_total_listings_count',
       'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
       'district', 'city', 'property_type',
       'room_type', 'accommodates', 'bedrooms', 'amenities', 'price',
       'minimum_nights', 'maximum_nights', 'review_scores_rating',
       'review_scores_accuracy', 'review_scores_cleanliness',
       'review_scores_checkin', 'review_scores_communication',
       'review_scores_location', 'review_scores_value', 'instant_bookable'
    ]
]

In [90]:
# room_data = room_data.drop_duplicates(subset=['Area'], keep='first', ignore_index=True)

In [91]:
room_data = room_data.sample(10000).reset_index(drop=True)

In [None]:
room_data[room_data.listing_id.str.contains('3444896')]

In [96]:
#get the first 1000 products with non-empty item keywords
rooms_metadata = room_data.head(NUMBER_PRODUCTS).to_dict(orient='index')

In [97]:
from sentence_transformers import SentenceTransformer
#from .autonotebook import tqdm as notebook_tqdm

model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')


item_keywords =  [
    ', '.join(str(rooms_metadata[i][key]) for key in ['name', 'host_location', 'neighbourhood', 'property_type', 'amenities', 'price'])
    for i in rooms_metadata.keys()
]
room_context = {
    rooms_metadata[i]['listing_id']:', '.join(str(rooms_metadata[i][key]) for key in ['name', 'host_location', 'neighbourhood', 'property_type', 'amenities', 'price'])
    for i in rooms_metadata.keys()
}
item_keywords_vectors = [model.encode(sentence) for sentence in item_keywords]

KeyboardInterrupt: 

In [98]:
item_keywords[0]

'Aluguel temporada olimpÃ\xadadas, Rio de Janeiro, State of Rio de Janeiro, Brazil, Barra da Tijuca, Entire apartment, ["Elevator", "Dedicated workspace", "Kitchen", "Washer", "Air conditioning", "Pool", "Long term stays allowed", "Gym", "Essentials", "Dryer", "Iron", "TV", "Wifi", "Hot tub", "Fire extinguisher", "Hangers", "Heating", "Free parking on premises", "Cable TV"], 3000'

In [99]:
len(item_keywords_vectors)
len(rooms_metadata)
# Check one of the products
rooms_metadata[2]

{'listing_id': 8325748,
 'name': 'Valmy',
 'host_id': 43888027,
 'host_since': '2015-09-10',
 'host_location': 'Paris, Ile-de-France, France',
 'host_is_superhost': 'f',
 'host_total_listings_count': 1.0,
 'host_has_profile_pic': 't',
 'host_identity_verified': 't',
 'neighbourhood': 'Palais-Bourbon',
 'district': nan,
 'city': 'Paris',
 'property_type': 'Entire apartment',
 'room_type': 'Entire place',
 'accommodates': 2,
 'bedrooms': 1.0,
 'amenities': '["Essentials", "Stove", "Hot water", "Hangers", "Smoke alarm", "Wifi", "Luggage dropoff allowed", "Long term stays allowed", "Dedicated workspace", "Host greets you", "TV", "Iron", "Dryer", "Refrigerator", "Oven", "Dishes and silverware", "Private entrance", "Heating", "Ethernet connection", "Kitchen", "Hair dryer", "Coffee maker", "Washer", "Microwave", "Cooking basics", "Cable TV", "Paid parking off premises", "Shampoo", "Shower gel", "Bed linens", "Carbon monoxide alarm", "Elevator"]',
 'price': 140,
 'minimum_nights': 3,
 'maximum

In [119]:
import logging

import numpy as np
import redis

from redis.commands.search.field import VectorField
from redis.commands.search.field import TextField
from redis.commands.search.query import Query


def get_redis_connector(host='localhost', port=6379, password=''):
    logging.warning('Connecting to redis...')
    try:
        return redis.Redis(
            host=host,
            port=port,
            password=password
        )
    except Exception as e:
        logging.error(e, exc_info=True)


def get_booking_query(topK):
    logging.warning('Querying redis...')
    return Query(
        f'*=>[KNN {topK} @item_keyword_vector $vec_param AS vector_score]'
    ).sort_by('vector_score').paging(0, topK).return_fields(
        'vector_score', 'property_type', 'name', 'amenities', 'city'
    ).dialect(2)


def create_flat_index(redis_conn, vector_field_name, number_of_vectors, vector_dimensions=512, distance_metric='L2'):
    logging.warning(f'Generating vector index for `{number_of_vectors}` records.')
    redis_conn.ft().create_index([
        VectorField(
            vector_field_name, "FLAT",
            {
                "TYPE": "FLOAT32",
                "DIM": vector_dimensions,
                "DISTANCE_METRIC": distance_metric,
                "INITIAL_CAP": number_of_vectors,
                "BLOCK_SIZE": number_of_vectors
            }
        ),
        TextField("property_type", as_name='property_type'),
        TextField("name", as_name='name'),
        TextField("amenities", as_name='amenities'),
        TextField("city", as_name='city')
    ])


def load_vectors(client, product_metadata, vector_dict, vector_field_name):
    logging.warning('Loading vectors to redis...')
    p = client.pipeline(transaction=False)
    for index in product_metadata.keys():
        # hash key
        key = 'listing_id: ' + str(product_metadata[index]['listing_id'])

        # hash values
        item_metadata = product_metadata[index]
        item_keywords_vector = vector_dict[index].astype(np.float32).tobytes()
        item_metadata[vector_field_name] = item_keywords_vector

        # HSET
        p.hset(key, mapping=item_metadata)

    p.execute()


In [108]:

ITEM_KEYWORD_EMBEDDING_FIELD='item_keyword_vector'
TEXT_EMBEDDING_DIMENSION=768
NUMBER_PRODUCTS=10000

print ('Loading and Indexing + ' +  str(NUMBER_PRODUCTS) + ' products')

#flush all data
redis_conn.flushall()

Loading and Indexing + 10000 products


True

In [109]:

#create flat index & load vectors
create_flat_index(
    redis_conn, ITEM_KEYWORD_EMBEDDING_FIELD,NUMBER_PRODUCTS,TEXT_EMBEDDING_DIMENSION,'COSINE'
)
load_vectors(redis_conn,rooms_metadata,item_keywords_vectors,ITEM_KEYWORD_EMBEDDING_FIELD)



In [103]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

MODEL = 'gpt-3.5-turbo'
openai_api_key = "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"


llm = OpenAI(
    model_name=MODEL,
    temperature=0.3,
    openai_api_key=openai.api_key
)

booking_prompt = PromptTemplate(
    input_variables=["product_description"],
    template="Create comma seperated product keywords to perform a query on a airbnb dataset for this user input: "
             "{product_description}",
)


from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)


In [110]:
userinput = input("Hey im a Roo(m)bot, how can i help you today? ")
print("User:", userinput)

# Run the chain only specifying the input variable.
keywords = chain.run(userinput)

topK=3
#vectorize the query
query_vector = model.encode(keywords).astype(np.float32).tobytes()

#prepare the query
q = get_booking_query(topK)
params_dict = {"vec_param": query_vector}

#Execute the query
results = redis_conn.ft().search(q, query_params = params_dict)


Hey im a Roo(m)bot, how can i help you today?  apartment balcony madrid


User: apartment balcony madrid




In [115]:
results

Result{0 total, docs: []}

In [118]:
q.get_args()

['*=>[KNN 3 @item_keyword_vector $vec_param AS vector_score]',
 'RETURN',
 5,
 'vector_score',
 'property_type',
 'name',
 'amenities',
 'city',
 'SORTBY',
 'vector_score',
 'ASC',
 'DIALECT',
 2,
 'LIMIT',
 0,
 3]

In [53]:
full_result_string = ''
for product in results.docs:
    full_result_string += ' '.join(
        [
            product.property_type, product.name, f", amenities are:", product.amenities," Located in city:", product.city,
            'ID of this booking is:', product.id,
            "\n\n\n"
        ]
    )


from langchain.memory import ConversationBufferMemory

BOOKING_AGENT_TEMPLATE = """
You are a room booking assistant. Be kind, detailed and try to sell the booking of the apartment to me. 
Present the three given queried search result in a nice way as answer to the user input.

Collect entities after user confirms booking choice, we need these entities: booking_date_start, booking_date_end, name, surname. 

dont ask questions back! 

ALWAYS PROVIDE RESPONSE AS JSON STRING with keys:
ANSWER, BOOKING_CONFIRMED, CONFIRMED_BOOKING_DATES, NAME and the ID of this.

PLEASE ALWAYS HAVE THE JSON STRING PART



{chat_history}
Human: {user_msg}
Chatbot:"""


BOOKING_PROMPT = PromptTemplate(
    input_variables=["chat_history", "user_msg"],
    template=BOOKING_AGENT_TEMPLATE
)

memory = ConversationBufferMemory(memory_key="chat_history")

llm_chain = LLMChain(
    llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0.8, openai_api_key=openai_api_key), 
    prompt=BOOKING_PROMPT, 
    verbose=False, 
    memory=memory,
)


answer = llm_chain.predict_and_parse(user_msg= f"{full_result_string} ---\n\n {userinput}")
print("Bot:", answer)
time.sleep(0.5)

while True:
    follow_up = input("")
    print("User:", follow_up)
    answer = llm_chain.predict(
        user_msg=follow_up
    )
    print("Bot:", answer)
    time.sleep(0.5)

In [None]:
answer

In [None]:
print(re.findall(r'{.+}',answer)[0])

In [None]:
import re

In [None]:
json_string = re

In [None]:
json.loads(json_string)

In [None]:
BOOKING_AGENT_GET_INFO_TEMPLATE = """
You are a room booking assistant. Be nice and helpful and get these informations from the customer:
BOOKING_START, BOOKING_END, FULL_NAME, CITY, BUDGET, GUEST_COUNT

Example of how such conversation may go, only generate responses to the users last question:
Human: Hi, I'm looking to book a room in CITY.
assistant: Great, I can help with that. What's your budget?
Human: I'm looking to spend around BUDGET euros per month.
assistant: Okay, I've found a few options that fit your budget. Would you prefer to be in the city center or in a quieter area?
Human: I want to be close to the main attractions.
assistant: Got it. How many people will be staying in the room?
Human: It's just me.


Collect this information and ALWAYS ADD JSON STRING AT THE END OF THE RESPONSE AS JSON STRING with keys:
BOOKING_START, BOOKING_END, FULL_NAME, CITY, BUDGET, GUEST_COUNT

PLEASE ALWAYS HAVE THE JSON STRING PART

{chat_history}
Human: {user_msg}
Chatbot:"""

BOOKING_AGENT_TEMPLATE = """
You are a room booking assistant. Be kind, detailed and try to sell the booking of the apartment to me. 
Present the three given queried search result in a nice way as answer to the user input.

Collect entities when user confirms booking choice, we need these entities: CONFIRMED_BOOKING_START, CONFIRMED_BOOKING_END, FULL_NAME

ALWAYS ADD JSON STRING AT THE END OF THE RESPONSE, ALWAYS PROVIDE RESPONSE AS JSON STRING with keys:
ANSWER, BOOKING_CONFIRMED, CONFIRMED_BOOKING_START, CONFIRMED_BOOKING_END, NAME and the ID of this.

PLEASE ALWAYS HAVE THE JSON STRING PART


{chat_history}
Human: {user_msg}
Chatbot:"""

BOOKING_PROMPT_SELL = PromptTemplate(
    input_variables=["chat_history", "user_msg"],
    template=BOOKING_AGENT_TEMPLATE
)

BOOKING_PROMPT_ASK = PromptTemplate(
    input_variables=["chat_history", "user_msg"],
    template=BOOKING_AGENT_GET_INFO_TEMPLATE
)

In [None]:
def booking_get_requirement_info(userinput):
    memory = ConversationBufferMemory(memory_key="chat_history")
    llm_chain = LLMChain(
        llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0.8,
                   openai_api_key=openai_api_key),
        prompt=BOOKING_PROMPT_ASK,
        verbose=False,
        memory=memory,
    )

    answer = llm_chain.predict(user_msg=f"{userinput}")
    return "Bot:", answer

In [None]:
booking_get_requirement_info('I wanna book a room in Madrid')