In [None]:
import numpy as np
import pandas as pd
import time
import redis
import time

from redis.commands.search.field import VectorField
from redis.commands.search.field import TextField
from redis.commands.search.field import TagField
from redis.commands.search.field import NumericField
from redis.commands.search.query import Query

In [None]:
redis_conn = redis.Redis(
  host='localhost',
  port=6379,
  password=''
)

In [None]:
MAX_TEXT_LENGTH=512

def auto_truncate(val):
    return str(val)[:MAX_TEXT_LENGTH]


In [None]:
room_data = pd.read_csv(
    'app/data/tripath.csv', delimiter='\t', encoding_errors='ignore', on_bad_lines='skip',
    converters={'bullet_point': auto_truncate,'amenities':auto_truncate,'name':auto_truncate}
)

In [None]:
room_data

In [None]:
room_data['city_id'] = room_data.city.astype('category').cat.codes
room_data = room_data.groupby(
    'city', group_keys=False
).apply(lambda x: x.sample(min(len(x), 2000))).reset_index(drop=True)
city_mapping = {
    city_item[0][0]: city_item[0][1] for city_item in room_data[['city', 'city_id']].value_counts().items()
}
room_data =  room_data[
    [
        'listing_id', 'description', 'city', 'city_id', 'status', 'CHECK-IN', 'CHECK-OUT', 'price'
    ]
]
city_mapping

In [None]:
room_data['city'].value_counts()

In [None]:
room_data['city_id'] = room_data.city.astype('category').cat.codes

In [None]:
city_mapping = {city_item[0][0]:city_item[0][1] for city_item in room_data[['city', 'city_id']].value_counts().items()}

In [None]:
room_data.groupby('city', group_keys=False).apply(lambda x: x.sample(min(len(x), 2000))).city.value_counts()

In [None]:
room_data = room_data.groupby('city', group_keys=False).apply(lambda x: x.sample(min(len(x), 2000))).reset_index(drop=True)

In [None]:
NUMBER_PRODUCTS = room_data.shape[0]
room_data.shape[0]

In [None]:
#get the first 1000 products with non-empty item keywords
rooms_metadata = room_data.to_dict(orient='index')

In [None]:
set([rooms_metadata[key]['city_id'] for key in rooms_metadata])

In [None]:
from sentence_transformers import SentenceTransformer
#from .autonotebook import tqdm as notebook_tqdm

model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')

rooms_metadata = room_data.to_dict(orient='index')
item_keywords = [
    ', '.join(auto_truncate(rooms_metadata[i][key]) for key in [
        'city', 'description', 'price'
        ]
    )
    for i in rooms_metadata.keys()
]
item_keywords_vectors = [model.encode(sentence) for sentence in item_keywords]

In [None]:
item_keywords[0]

In [None]:
len(item_keywords_vectors)
len(rooms_metadata)
# Check one of the products
rooms_metadata[2]

In [None]:

ITEM_KEYWORD_EMBEDDING_FIELD='item_keyword_vector'
TEXT_EMBEDDING_DIMENSION=768
NUMBER_PRODUCTS=19940

print ('Loading and Indexing + ' +  str(NUMBER_PRODUCTS) + ' products')

#flush all data
redis_conn.flushall()

In [None]:
import openai
import logging

from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains import LLMChain

MODEL = 'gpt-3.5-turbo'
openai_api_key = "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"
openai.api_key = "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"

llm = OpenAI(
    model_name=MODEL,
    temperature=0.3,
    openai_api_key=openai.api_key
)

booking_prompt = PromptTemplate(
    input_variables=["product_description"],
    template="Create comma seperated product keywords to perform a query on a airbnb dataset for this user input: "
             "{product_description}",
)


def get_booking_chain():
    logging.info('Building the chatgpt booking assistant object.')
    return LLMChain(llm=llm, prompt=booking_prompt)


booking_chain = get_booking_chain()


In [None]:
import logging

import numpy as np
import redis

from redis.commands.search.field import TextField, VectorField, NumericField
from redis.commands.search.query import Query, NumericFilter


def get_redis_connector(host='localhost', port=6379, password=''):
    logging.warning('Connecting to redis...')
    try:
        return redis.Redis(
            host=host,
            port=port,
            password=password
        )
    except Exception as e:
        logging.error(e, exc_info=True)


def get_booking_query(topK, city_code):
    logging.warning('Querying redis...')
    if city_code:
        city_code = int(city_code)
        return Query(
            f'*=>[KNN {topK} @item_keyword_vector $vec_param AS vector_score]'
        ).add_filter(
            NumericFilter('city_id', city_code, city_code),
        ).sort_by('vector_score').paging(0, topK).return_fields(
            'vector_score', 'description', 'city', 'city_id', 'price'
        ).dialect(2)
    else:
        return Query(
            f'*=>[KNN {topK} @item_keyword_vector $vec_param AS vector_score]'
        ).sort_by('vector_score').paging(0, topK).return_fields(
            'vector_score', 'description', 'city', 'city_id', 'price'
        ).dialect(2)


def create_flat_index(redis_conn, vector_field_name, number_of_vectors, vector_dimensions=512, distance_metric='L2'):
    logging.warning(f'Generating vector index for `{number_of_vectors}` records.')
    redis_conn.ft().create_index([
        VectorField(
            vector_field_name, "FLAT",
            {
                "TYPE": "FLOAT32",
                "DIM": vector_dimensions,
                "DISTANCE_METRIC": distance_metric,
                "INITIAL_CAP": number_of_vectors,
                "BLOCK_SIZE": number_of_vectors
            }
        ),
        TextField("description", as_name='description'),
        TextField("city", as_name='city'),
        NumericField("city_id", as_name='city_id'),
        TextField("price", as_name='price')
    ])


def load_vectors(client, product_metadata, vector_dict, vector_field_name):
    logging.warning('Loading vectors to redis...')
    p = client.pipeline(transaction=False)
    for index in product_metadata.keys():
        # hash key
        key = 'listing_id: ' + str(product_metadata[index]['listing_id'])

        # hash values
        item_metadata = product_metadata[index]
        item_keywords_vector = vector_dict[index].astype(np.float32).tobytes()
        item_metadata[vector_field_name] = item_keywords_vector

        # HSET
        p.hset(key, mapping=item_metadata)

    p.execute()



In [None]:
#create flat index & load vectors
create_flat_index(
    redis_conn, ITEM_KEYWORD_EMBEDDING_FIELD,room_data.shape[0],TEXT_EMBEDDING_DIMENSION,'COSINE'
)
load_vectors(redis_conn,rooms_metadata,item_keywords_vectors,ITEM_KEYWORD_EMBEDDING_FIELD)

In [None]:
set([rooms_metadata[key]['city_id'] for key in rooms_metadata])

In [None]:
city_mapping

In [None]:
#keywords = booking_chain.run(
#    user_input + ', in city: ' + context_entities['CITY'] + ', these are reservation specification: ' + str(context_entities)
#)
#logging.warning(user_input + ', in city: ' + context_entities['CITY'] + ', these are reservation specification: ' + str(context_entities))
keywords = 'Alicante, accommodation, balcony,budget Cheap'

top_k = 3
# vectorize the query
query_vector = model.encode(keywords).astype(np.float32).tobytes()
params_dict = {"vec_param": query_vecWirtor}

query = get_booking_query(top_k, 0)
print(query.query_string())

# Execute the query
results = redis_conn.ft().search(query, query_params=params_dict)
logging.warning(results)


In [None]:
results

In [None]:
#q.get_args()

In [None]:
full_result_string = ''
for product in results.docs:
    full_result_string += ' '.join(
        [
            product.property_type, product.name, f", amenities are:", product.amenities," Located in city:", product.city,
            'ID of this booking is:', product.id,
            "\n\n\n"
        ]
    )


from langchain.memory import ConversationBufferMemory

BOOKING_AGENT_TEMPLATE = """
You are a room booking assistant. Be kind, detailed and try to sell the booking of the apartment to me. 
Present the three given queried search result in a nice way as answer to the user input.

Collect entities after user confirms booking choice, we need these entities: booking_date_start, booking_date_end, name, surname. 

dont ask questions back! 

ALWAYS PROVIDE RESPONSE AS JSON STRING with keys:
ANSWER, BOOKING_CONFIRMED, CONFIRMED_BOOKING_DATES, NAME and the ID of this.

PLEASE ALWAYS HAVE THE JSON STRING PART



{chat_history}
Human: {user_msg}
Chatbot:"""


BOOKING_PROMPT = PromptTemplate(
    input_variables=["chat_history", "user_msg"],
    template=BOOKING_AGENT_TEMPLATE
)

memory = ConversationBufferMemory(memory_key="chat_history")

llm_chain = LLMChain(
    llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0.8, openai_api_key=openai_api_key), 
    prompt=BOOKING_PROMPT, Wir
    verbose=False, 
    memory=memory,
)


answer = llm_chain.predict_and_parse(user_msg= f"{full_result_string} ---\n\n {userinput}")
print("Bot:", answer)
time.sleep(0.5)

while True:
    follow_up = input("")
    print("User:", follow_up)
    answer = llm_chain.predict(
        user_msg=follow_up
    )
    print("Bot:", answer)
    time.sleep(0.5)

In [None]:
answer

In [None]:
print(re.findall(r'{.+}',answer)[0])

In [None]:
import re

In [None]:
json_string = re

In [None]:
json.loads(json_string)

In [None]:
BOOKING_AGENT_GET_INFO_TEMPLATE = """
You are a room booking assistant. Be nice and helpful and get these informations from the customer:
BOOKING_START, BOOKING_END, FULL_NAME, CITY, BUDGET, GUEST_COUNT

Example of how such conversation may go, only generate responses to the users last question:
Human: Hi, I'm looking to book a room in CITY.
assistant: Great, I can help with that. What's your budget?
Human: I'm looking to spend around BUDGET euros per month.
assistant: Okay, I've found a few options that fit your budget. Would you prefer to be in the city center or in a quieter area?
Human: I want to be close to the main attractions.
assistant: Got it. How many people will be staying in the room?
Human: It's just me.


Collect this information and ALWAYS ADD JSON STRING AT THE END OF THE RESPONSE AS JSON STRING with keys:
BOOKING_START, BOOKING_END, FULL_NAME, CITY, BUDGET, GUEST_COUNT

PLEASE ALWAYS HAVE THE JSON STRING PART

{chat_history}
Human: {user_msg}
Chatbot:"""

BOOKING_AGENT_TEMPLATE = """
You are a room booking assistant. Be kind, detailed and try to sell the booking of the apartment to me. 
Present the three given queried search result in a nice way as answer to the user input.

Collect entities when user confirms booking choice, we need these entities: CONFIRMED_BOOKING_START, CONFIRMED_BOOKING_END, FULL_NAME

ALWAYS ADD JSON STRING AT THE END OF THE RESPONSE, ALWAYS PROVIDE RESPONSE AS JSON STRING with keys:
ANSWER, BOOKING_CONFIRMED, CONFIRMED_BOOKING_START, CONFIRMED_BOOKING_END, NAME and the ID of this.

PLEASE ALWAYS HAVE THE JSON STRING PART


{chat_history}
Human: {user_msg}
Chatbot:"""

BOOKING_PROMPT_SELL = PromptTemplate(
    input_variables=["chat_history", "user_msg"],
    template=BOOKING_AGENT_TEMPLATE
)

BOOKING_PROMPT_ASK = PromptTemplate(
    input_variables=["chat_history", "user_msg"],
    template=BOOKING_AGENT_GET_INFO_TEMPLATE
)

In [None]:
def booking_get_requirement_info(userinput):
    memory = ConversationBufferMemory(memory_key="chat_history")
    llm_chain = LLMChain(
        llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0.8,
                   openai_api_key=openai_api_key),
        prompt=BOOKING_PROMPT_ASK,
        verbose=False,
        memory=memory,
    )

    answer = llm_chain.predict(user_msg=f"{userinput}")
    return "Bot:", answer

In [None]:
booking_get_requirement_info('I wanna book a room in Madrid')