In [147]:
import numpy as np
import pandas as pd
import time
import redis
import time

from redis.commands.search.field import VectorField
from redis.commands.search.field import TextField
from redis.commands.search.field import TagField
from redis.commands.search.query import Query

In [148]:
redis_conn = redis.Redis(
  host='localhost',
  port=6379,
  password='')

In [149]:
MAX_TEXT_LENGTH=512
NUMBER_PRODUCTS=10000

def auto_truncate(val):
    return str(val)[:MAX_TEXT_LENGTH]

#Load Product data and truncate long text fields
all_prods_df = pd.read_csv("product_data.csv", converters={'bullet_point': auto_truncate,'item_keywords':auto_truncate,'item_name':auto_truncate})
all_prods_df['primary_key'] = all_prods_df['item_id'] + '-' + all_prods_df['domain_name']
all_prods_df['item_keywords'].replace('', np.nan, inplace=True)
all_prods_df.dropna(subset=['item_keywords'], inplace=True)
all_prods_df.reset_index(drop=True,inplace=True)

#get the first 1000 products with non-empty item keywords
product_metadata = all_prods_df.head(NUMBER_PRODUCTS).to_dict(orient='index')

all_prods_df.head()


Unnamed: 0,item_id,marketplace,country,main_image_id,domain_name,bullet_point,item_keywords,material,brand,color,item_name,model_name,model_number,product_type,primary_key
0,B07T6RZ2CM,Amazon,IN,71dZhpsferL,amazon.in,3D Printed Hard Back Case Mobile Cover for Len...,mobile cover back cover mobile case phone case...,,Amazon Brand - Solimo,Others,Amazon Brand - Solimo Designer Couples Sitting...,Lenovo K4 Note,gz8115-SL40423,CELLULAR_PHONE_CASE,B07T6RZ2CM-amazon.in
1,B07T2JY31Y,Amazon,IN,71vX7qIEAIL,amazon.in,3D Printed Hard Back Case Mobile Cover for Son...,mobile cover back cover mobile case phone case...,Wood,Amazon Brand - Solimo,others,Amazon Brand - Solimo Designer Leaf on Wood 3D...,Sony Xperia Z1 L39H,gz8056-SL40528,CELLULAR_PHONE_CASE,B07T2JY31Y-amazon.in
2,B0849YGSCZ,Amazon,AE,A1EZF-2mB5L,amazon.ae,,small de fur rooms navidad woven girls shag pa...,,Stone & Beam,,Stone & Beam Contemporary Doily Wool Farmhouse...,,I59I8044IVYGRYC00-Parent,HOME_FURNITURE_AND_DECOR,B0849YGSCZ-amazon.ae
3,B081K6TCML,Amazon,IN,81o9EyZ-fAL,amazon.in,Solimo Plastic Multipurpose Modular Drawer; sm...,drawer modular drawer 3 rack modular drawer ki...,Plastic,Amazon Brand - Solimo,Multicolor,Amazon Brand - Solimo Plastic Multipurpose Mod...,,sol_cujo_13,HOME,B081K6TCML-amazon.in
4,B0854774X5,Amazon,IN,81xaJCVnl3L,amazon.in,"Snug fit for Nokia 8.1, with perfect cut-outs ...",Back Cover Designer Case Designer Take It Easy...,Silicon,Amazon Brand - Solimo,Multicolor,Amazon Brand - Solimo Designer Take It Easy UV...,Nokia 8.1,UV10714-SL40617,CELLULAR_PHONE_CASE,B0854774X5-amazon.in


In [150]:
import requests

from bs4 import BeautifulSoup
from easynmt import EasyNMT

translator = EasyNMT("opus-mt")
translator.translate("", source_lang="es", target_lang="en")

def get_description(link: str) -> str:
    text = requests.get(link).text
    parsed = BeautifulSoup(text)
    return '\n'.join(item.text for item in parsed.find_all('p')[:6])


def translate_to_eng(text: str) -> str:
    
    return translator.translate(text, source_lang="es", target_lang="en")


def get_and_translate(link:str) -> str:
    return translate_to_eng(get_description(link))

In [151]:
room_data = pd.read_csv(
    'airbnb.csv', delimiter=',', encoding_errors='ignore', on_bad_lines='skip',
    converters={'bullet_point': auto_truncate,'amenities':auto_truncate,'name':auto_truncate}
)

  room_data = pd.read_csv(


In [152]:
room_data = room_data[
    [
        'listing_id', 'name', 'host_id', 'host_since', 'host_location',
       'host_is_superhost', 'host_total_listings_count',
       'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
       'district', 'city', 'property_type',
       'room_type', 'accommodates', 'bedrooms', 'amenities', 'price',
       'minimum_nights', 'maximum_nights', 'review_scores_rating',
       'review_scores_accuracy', 'review_scores_cleanliness',
       'review_scores_checkin', 'review_scores_communication',
       'review_scores_location', 'review_scores_value', 'instant_bookable'
    ]
]

In [153]:
# room_data = room_data.drop_duplicates(subset=['Area'], keep='first', ignore_index=True)

In [154]:
room_data = room_data.sample(10000).reset_index(drop=True)

In [155]:
room_data.iloc[0]

listing_id                                                              24398637
name                              Manly Beachside Living with Spectacular Views!
host_id                                                                 72701034
host_since                                                            2016-05-18
host_location                                   Montville, Queensland, Australia
host_is_superhost                                                              t
host_total_listings_count                                                    6.0
host_has_profile_pic                                                           t
host_identity_verified                                                         t
neighbourhood                                                              Manly
district                                                                     NaN
city                                                                      Sydney
property_type               

In [156]:
#get the first 1000 products with non-empty item keywords
rooms_metadata = room_data.head(NUMBER_PRODUCTS).to_dict(orient='index')

In [157]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')


item_keywords =  [
    ', '.join(str(rooms_metadata[i][key]) for key in ['name', 'host_location', 'neighbourhood', 'property_type', 'amenities', 'price'])
    for i in rooms_metadata.keys()
]
room_context = {
    rooms_metadata[i]['listing_id']:', '.join(str(rooms_metadata[i][key]) for key in ['name', 'host_location', 'neighbourhood', 'property_type', 'amenities', 'price'])
    for i in rooms_metadata.keys()
}
item_keywords_vectors = [model.encode(sentence) for sentence in item_keywords]

In [158]:
item_keywords[0]

'Manly Beachside Living with Spectacular Views!, Montville, Queensland, Australia, Manly, Entire apartment, ["Patio or balcony", "Children\\u2019s books and toys", "Bed linens", "Coffee maker", "Hair dryer", "TV", "Fire extinguisher", "Heating", "Washer", "Iron", "Window guards", "Free parking on premises", "Essentials", "Single level home", "Cooking basics", "First aid kit", "Cable TV", "Extra pillows and blankets", "Kitchen", "Elevator", "Refrigerator", "Dishwasher", "Oven", "Stove", "Long term stays allowed", "Microwave", "BBQ grill", "Hot water", "Air conditioning", "Dryer", "Dedicated workspace", "Dishes and , 325'

In [159]:
len(item_keywords_vectors)
len(rooms_metadata)
# Check one of the products
rooms_metadata[2]

{'listing_id': 13846934,
 'name': 'Apartment near Central Park',
 'host_id': 81747071,
 'host_since': '2016-07-04',
 'host_location': 'Sterling Heights, Michigan, United States',
 'host_is_superhost': 'f',
 'host_total_listings_count': 1.0,
 'host_has_profile_pic': 't',
 'host_identity_verified': 't',
 'neighbourhood': 'Harlem',
 'district': 'Manhattan',
 'city': 'New York',
 'property_type': 'Entire apartment',
 'room_type': 'Entire place',
 'accommodates': 4,
 'bedrooms': 1.0,
 'amenities': '["Hair dryer", "Smoke alarm", "Essentials", "Hangers", "Washer", "TV", "Dryer", "Shampoo", "Heating", "Wifi", "Elevator", "Air conditioning", "Long term stays allowed", "Carbon monoxide alarm", "Kitchen"]',
 'price': 165,
 'minimum_nights': 30,
 'maximum_nights': 1125,
 'review_scores_rating': 93.0,
 'review_scores_accuracy': 9.0,
 'review_scores_cleanliness': 9.0,
 'review_scores_checkin': 10.0,
 'review_scores_communication': 10.0,
 'review_scores_location': 9.0,
 'review_scores_value': 9.0,
 '

In [160]:
def load_vectors(client, product_metadata, vector_dict, vector_field_name):
    p = client.pipeline(transaction=False)
    for index in product_metadata.keys():    
        #hash key
        key='product_ID:'+ str(product_metadata[index]['listing_id'])
        
        #hash values
        item_metadata = product_metadata[index]
        item_keywords_vector = vector_dict[index].astype(np.float32).tobytes()
        item_metadata[vector_field_name]=item_keywords_vector
        
        # HSET
        p.hset(key,mapping=item_metadata)
            
    p.execute()

def create_flat_index (redis_conn,vector_field_name,number_of_vectors, vector_dimensions=512, distance_metric='L2'):
    redis_conn.ft().create_index([
        VectorField(vector_field_name, "FLAT", {"TYPE": "FLOAT32", "DIM": vector_dimensions, "DISTANCE_METRIC": distance_metric, "INITIAL_CAP": number_of_vectors, "BLOCK_SIZE":number_of_vectors }),
        TextField("property_type", as_name='property_type'),
        TextField("name", as_name='name'),
        TextField("amenities", as_name='amenities'),
        TextField("city", as_name='city')
    ]) 
    



In [161]:

ITEM_KEYWORD_EMBEDDING_FIELD='item_keyword_vector'
TEXT_EMBEDDING_DIMENSION=768
NUMBER_PRODUCTS=10000

print ('Loading and Indexing + ' +  str(NUMBER_PRODUCTS) + ' products')

#flush all data
redis_conn.flushall()

Loading and Indexing + 10000 products


True

In [162]:

#create flat index & load vectors
create_flat_index(
    redis_conn, ITEM_KEYWORD_EMBEDDING_FIELD,NUMBER_PRODUCTS,TEXT_EMBEDDING_DIMENSION,'COSINE'
)
load_vectors(redis_conn,rooms_metadata,item_keywords_vectors,ITEM_KEYWORD_EMBEDDING_FIELD)

In [163]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

llm = OpenAI(model_name="gpt-3.5-turbo", temperature=0.3, openai_api_key="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")
prompt = PromptTemplate(
    input_variables=["product_description"],
    template="Create comma seperated product keywords to perform a query on a amazon dataset for this user input: {product_description}",
)

from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)




In [164]:
userinput = input("Hey im a Roo(m)bot, how can i help you today? ")
print("User:", userinput)

# Run the chain only specifying the input variable.
keywords = chain.run(userinput)

topK=3
#vectorize the query
query_vector = model.encode(keywords).astype(np.float32).tobytes()

#prepare the query
q = Query(
    f'*=>[KNN {topK} @{ITEM_KEYWORD_EMBEDDING_FIELD} $vec_param AS vector_score]'
).sort_by('vector_score').paging(0,topK).return_fields(
        'vector_score','property_type','name','amenities', 'city'
    ).dialect(2)
params_dict = {"vec_param": query_vector}

#Execute the query
results = redis_conn.ft().search(q, query_params = params_dict)


Hey im a Roo(m)bot, how can i help you today?  Find me a lovely chic apartment in Vienna


User: Find me a lovely chic apartment in Vienna


In [167]:
full_result_string = ''
for product in results.docs:
    full_result_string += ' '.join(
        [
            product.property_type, product.name, f", amenities are:", product.amenities," Located in city:", product.city,
            'ID of this booking is:', product.id,
            "\n\n\n"
        ]
    )


from langchain.memory import ConversationBufferMemory

template = """
You are a room booking assistant. Be kind, detailed and nice, but also try to sell the booking of the apartment to me. 
Present the given queried search result in a nice way as answer to the user input. dont ask questions back! 
just take the given context

{chat_history}
Human: {user_msg}
Chatbot:"""

prompt = PromptTemplate(
    input_variables=["chat_history", "user_msg"], 
    template=template
)
memory = ConversationBufferMemory(memory_key="chat_history")
llm_chain = LLMChain(
    llm=OpenAI(model_name="gpt-3.5-turbo", temperature=0.8, openai_api_key="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"), 
    prompt=prompt, 
    verbose=False, 
    memory=memory,
)


answer = llm_chain.predict(user_msg= f"{full_result_string} ---\n\n {userinput}")
print("Bot:", answer)
time.sleep(0.5)

while True:
    follow_up = input("Anything else you want to ask about this topic?")
    print("User:", follow_up)
    answer = llm_chain.predict(
        user_msg=follow_up
    )
    print("Bot:", answer)
    time.sleep(0.5)

Bot: I found the perfect place for you in Vienna! This chic apartment is the perfect combination of style and comfort. It features amenities such as heating, a fully equipped kitchen, essentials, and high-speed wifi. Plus, long term stays are allowed so you can really settle in and enjoy your time in Vienna. The product ID for this booking is 4581297. Don't hesitate to book today and experience the beauty of Vienna in style!


KeyboardInterrupt: Interrupted by user

In [140]:
full_result_string

'product_ID:30267645: Cozy apartment in Paris, Barcelona, Catalonia, Spain, Passy, Entire apartment, ["Shampoo", "Heating", "Washer", "Dryer", "Hair dryer", "Wifi", "Kitchen", "Elevator", "Essentials", "Hangers", "Long term stays allowed", "Breakfast"], 83\n\n\nproduct_ID:6011954: Beautiful sunny apartment, Barcelona, Catalonia, Spain, III Monte Sacro, Entire apartment, ["Washer", "Dedicated workspace", "Fire extinguisher", "Hangers", "Iron", "Refrigerator", "Dishes and silverware", "Host greets you", "Cooking basics", "Wifi", "Stove", "Hair dryer", "Kitchen", "Heating", "TV", "Patio or balcony", "Air conditioning", "Coffee maker", "Long term stays allowed", "Hot water", "Free street parking", "Free parking on premises", "Oven", "Elevator", "Extra pillows and blankets", "Shampoo", "BBQ grill", "Essentials", "First aid kit", "Bed linens"], 43\n\n\nproduct_ID:7179955: Nice and quiet parisian flat, Paris, Ile-de-France, France, Batignolles-Monceau, Entire apartment, ["Shampoo", "First aid

In [135]:
[room_context[int(doc.id.split(':')[1])] for doc in results.docs]

['Best flat to enjoy Paris, Paris, Ile-de-France, France, Vaugirard, Entire apartment, ["Shampoo", "Heating", "TV", "Iron", "Kitchen", "Hair dryer", "Essentials", "Washer", "Carbon monoxide alarm", "Hangers", "Smoke alarm", "Wifi", "Long term stays allowed"], 85',
 'Nice and quiet parisian flat, Paris, Ile-de-France, France, Batignolles-Monceau, Entire apartment, ["Shampoo", "First aid kit", "Heating", "TV", "Kitchen", "Essentials", "Washer", "Wifi", "Long term stays allowed"], 37',
 'Nice flat in Paris city center, Paris, Ile-de-France, France, Popincourt, Entire apartment, ["Heating", "Kitchen", "Washer", "Smoke alarm", "Wifi", "Long term stays allowed"], 55']

In [174]:
results.docs[0].__dict__

{'id': 'product_ID:7609238',
 'payload': None,
 'vector_score': '0.325037419796',
 'property_type': 'Entire apartment',
 'name': 'Nice flat near by the Eiffel Tower',
 'amenities': '["Heating", "TV", "Kitchen", "Essentials", "Washer", "Wifi", "Elevator", "Long term stays allowed"]',
 'city': 'Paris'}