In [1]:
import os
import torch
import numpy as np
from golemai.nlp.llm_resp_gen import LLMRespGen
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from dotenv import load_dotenv

load_dotenv()
os.environ['HF_HOME'] = '/net/tscratch/people/plgkonkie311/cache/'

In [2]:
cd spatial

/net/tscratch/people/plgkonkie311/spatial


In [17]:
from prompts import PROMPT_NER_DISTANCE_WHERE, PROMPT_FIND_LATITUDE_LONGITUDE, PROMPT_END

In [18]:
load_dotenv()
api_key = os.getenv("CLARIN_KEY")

In [19]:
llm_rg = LLMRespGen(
    id_col='id',
    model_type='api',
    system_msg='You are a helpful assistant.',
    prompt_template='',
    batch_size=1,
    api_url='https://services.clarin-pl.eu/api/v1/oapi',
    api_key=api_key,
    timeout=60,
).set_generation_config(
    model_id='mixtral-8x22B'
)


Model type is 'api'. Setting generation config for API.


In [20]:
embeddings = HuggingFaceEmbeddings(model_name="sdadas/mmlw-roberta-large")

location_vector_store = FAISS.load_local(
    'all_wroclaw',
    embeddings,
    allow_dangerous_deserialization=True,
)

In [21]:
import textwrap
TEXT_WRAP_WIDTH = 120

def generate_response(QUERY, verbose=False, **kwargs):
    prompts = llm_rg.prepare_prompt(
        query=QUERY,
        **kwargs,
    )

    if verbose:
        print("\n".join(textwrap.wrap(prompts, width=TEXT_WRAP_WIDTH)))
        print()

    result = llm_rg._generate_llm_response(
        inputs=prompts,
    )

    if verbose:
        print("\n".join(textwrap.wrap(result, width=TEXT_WRAP_WIDTH)))

    return result

def extract_entities_from_documents(query, documents):
    llm_rg.set_prompt_template(PROMPT_NER_DISTANCE_WHERE)

    result = generate_response(query, verbose=True)

    location = result.split("`LOCATION_VECTOR_DB_PROMPT`:")[-1].split("`PLACE_VECTOR_DB_PROMPT`")[0].strip()
    place = result.split("`PLACE_VECTOR_DB_PROMPT`:")[-1].split("`DISTANCE`")[0].strip()
    distance = result.split("`DISTANCE`:")[-1].split("km")[0].strip()
    distance = float(distance) if distance.isdigit() else distance

    return location, place, distance


def extract_lat_lon_from_documents(query, documents):
    llm_rg.set_prompt_template(PROMPT_FIND_LATITUDE_LONGITUDE)

    result = generate_response(query, documents=documents, verbose=True)

    latitude = result.split("`LATITUDE`:")[-1].split("`LONGITUDE`")[0].strip()
    longitude = result.split("`LONGITUDE`:")[-1].strip()
    latitude, longitude = float(latitude), float(longitude)

    return latitude, longitude

def summarize_results(query, documents):
    llm_rg.set_prompt_template(PROMPT_END)

    result = generate_response(query, documents=documents, verbose=True)

    return result

def documents_to_string(results):
    return "\n".join([f"{i}. Metadata: {doc.metadata}\n   Rest: {doc.page_content}" for i, doc in enumerate(results)])


def generate_square_filter(center, radius_km=5, amenity=None):
    lat, lon = center
    
    # Approximate degrees per kilometer for latitude and longitude
    lat_delta = radius_km / 110.574  # 1 degree latitude = 110.574 km
    lon_delta = radius_km / (111.320 * np.cos(np.radians(lat)))  # 1 degree longitude = 111.320 km * cos(latitude)

    # Create the bounding box as a filter
    filter_query = {
        "$and": [
            {"lattitude": {"$gte": lat - lat_delta}},  # min latitude
            {"lattitude": {"$lte": lat + lat_delta}},  # max latitude
            {"longitude": {"$gte": lon - lon_delta}},  # min longitude
            {"longitude": {"$lte": lon + lon_delta}},  # max longitude
        ]
    }
    if amenity:
        filter_query["$and"].append({"amenity": amenity})
    return filter_query

In [87]:
# QUERY = "Find me a Polish restaurant near ulica Legnicka 65, Wrocław"

QUERY = "Find me a place where i can buy a good coffee near to plac Grunwaldzki, Wrocław"

In [None]:
location, place, distance = extract_entities_from_documents(QUERY, None)

 Instruction for LLM:  Please extract the following fields from the user's query:  1. `LOCATION_VECTOR_DB_PROMPT`:
Provide a vector representation of the location mentioned in the user's query. This should correspond to a specific
place or landmark (e.g., "Dworzec Wrocław Główny"). It should have been extracted in OSM format, e.g., 'amenity':
'restaurant', 'cuisine': 'polish'. if you can find address also pass it      2. `PLACE_VECTOR_DB_PROMPT`: Provide a
vector representation of the type of place or business mentioned in the query (e.g., "restaurant", "hotel", etc.). In
the given example, it would be a "Polish restaurant". It should have been extracted in OSM format, e.g., 'amenity':
'restaurant', 'cuisine': 'polish'.  3. `DISTANCE`: If the query contains information about the proximity or distance,
either explicitly or implicitly, include it. This could be a specific distance (e.g., "5 km away") or a general
description (e.g., "near"). If available if not thy to change it to a numbe

In [89]:
location, place, distance

("'name': 'plac Grunwaldzki', 'description': 'city square', 'city': 'Wrocław'",
 "'amenity': 'cafe', 'description': 'coffee shop'",
 1.0)

In [90]:
results = location_vector_store.similarity_search(
    location,
    k=5,
)

results

[Document(id='f47e9bbe-97f2-4c3d-aaeb-3b74f1a8af69', metadata={'amenity': 'police', 'name': 'Komisariat Policji Wrocław-Stare Miasto', 'address': 'Unknown , Unknown', 'lattitude': 51.1124518, 'longitude': 17.0103449}, page_content='{"amenity": "police", "name": "Komisariat Policji Wroc\\u0142aw-Stare Miasto"}'),
 Document(id='0261c8db-9012-4f5e-b478-da110ccc6601', metadata={'amenity': 'police', 'name': 'I Rewir Dzielnicowych Komisariatu Policji Wrocław-Fabryczna', 'address': 'Unknown , Unknown', 'lattitude': 51.1285018, 'longitude': 16.9721794}, page_content='{"amenity": "police", "name": "I Rewir Dzielnicowych Komisariatu Policji Wroc\\u0142aw-Fabryczna"}'),
 Document(id='2d6afdcd-95aa-4fe0-af62-c87be9619b2b', metadata={'amenity': 'monastery', 'name': 'Refektarz', 'address': 'plac Dominikański 2-4, Wrocław', 'lattitude': 51.1092091, 'longitude': 17.0403065}, page_content='{"addr:city": "Wroc\\u0142aw", "addr:country": "PL", "addr:housenumber": "2-4", "addr:postcode": "50-159", "addr:s

In [91]:
documents = documents_to_string(results)


latitude, longitude = extract_lat_lon_from_documents(location, documents)

 From provided Documents, find the latitude and longitude of the location mentioned in the user's query. If it is not
explicitly mentioned, try to extract it from the metadata of the documents. It can be near the location mentioned in the
query.  QUERY: "'name': 'plac Grunwaldzki', 'description': 'city square', 'city': 'Wrocław'"  DOCUMENTS: 0. Metadata:
{'amenity': 'police', 'name': 'Komisariat Policji Wrocław-Stare Miasto', 'address': 'Unknown , Unknown', 'lattitude':
51.1124518, 'longitude': 17.0103449}    Rest: {"amenity": "police", "name": "Komisariat Policji Wroc\u0142aw-Stare
Miasto"} 1. Metadata: {'amenity': 'police', 'name': 'I Rewir Dzielnicowych Komisariatu Policji Wrocław-Fabryczna',
'address': 'Unknown , Unknown', 'lattitude': 51.1285018, 'longitude': 16.9721794}    Rest: {"amenity": "police", "name":
"I Rewir Dzielnicowych Komisariatu Policji Wroc\u0142aw-Fabryczna"} 2. Metadata: {'amenity': 'monastery', 'name':
'Refektarz', 'address': 'plac Dominikański 2-4, Wrocław', 'l

In [92]:
latitude, longitude, place

(51.1092091, 17.0403065, "'amenity': 'cafe', 'description': 'coffee shop'")

In [98]:
results = location_vector_store.similarity_search(
    place,
    k=5,
    filter=generate_square_filter((latitude, longitude), radius_km=distance),
)


results

[Document(id='2450cc3e-f5f7-481a-8b72-b0a10a4517a9', metadata={'amenity': 'cafe', 'name': 'Kawiarnia Muzealna', 'address': 'Unknown , Unknown', 'lattitude': 51.1107113, 'longitude': 17.0475408}, page_content='{"amenity": "cafe", "name": "Kawiarnia Muzealna"}'),
 Document(id='2f337e64-818a-4457-b7c1-bdaa9abf2f9f', metadata={'amenity': 'cafe', 'name': 'Café Kulisy', 'address': 'Unknown , Unknown', 'lattitude': 51.1013937, 'longitude': 17.0263401}, page_content='{"amenity": "cafe", "name": "Caf\\u00e9 Kulisy", "outdoor_seating": "yes"}'),
 Document(id='1f4f9c42-fa09-47d2-8ecd-78b6b4fcc134', metadata={'amenity': 'cafe', 'name': "Slivianczik's", 'address': 'Unknown , Unknown', 'lattitude': 51.106372, 'longitude': 17.043319}, page_content='{"amenity": "cafe", "name": "Slivianczik\'s"}'),
 Document(id='efd9935b-ced7-4890-90a0-f9cdf58ba20b', metadata={'amenity': 'cafe', 'name': 'Cafe de France', 'address': 'Unknown , Unknown', 'lattitude': 51.1103353, 'longitude': 17.0327545}, page_content='{"

In [94]:
documents = documents_to_string(results)

answer = summarize_results(QUERY, documents)

 Provide short suummarization based on user query and the documents provided. Recommend the most relevant document.
QUERY: "Find me a place where i can buy a good coffee near to plac Grunwaldzki, Wrocław"  DOCUMENTS: 0. Metadata:
{'amenity': 'cafe', 'name': 'Kawiarnia Muzealna', 'address': 'Unknown , Unknown', 'lattitude': 51.1107113, 'longitude':
17.0475408}    Rest: {"amenity": "cafe", "name": "Kawiarnia Muzealna"} 1. Metadata: {'amenity': 'cafe', 'name': 'Café
Kulisy', 'address': 'Unknown , Unknown', 'lattitude': 51.1013937, 'longitude': 17.0263401}    Rest: {"amenity": "cafe",
"name": "Caf\u00e9 Kulisy", "outdoor_seating": "yes"} 2. Metadata: {'amenity': 'cafe', 'name': "Slivianczik's",
'address': 'Unknown , Unknown', 'lattitude': 51.106372, 'longitude': 17.043319}    Rest: {"amenity": "cafe", "name":
"Slivianczik's"} 3. Metadata: {'amenity': 'cafe', 'name': 'Cafe de France', 'address': 'Unknown , Unknown', 'lattitude':
51.1103353, 'longitude': 17.0327545}    Rest: {"amenity": "caf

In [95]:
print('\n'.join(textwrap.wrap(answer, width=TEXT_WRAP_WIDTH)))

 Based on your query, I would recommend "Café de France" as it is a café with outdoor seating, which could be a nice
place to enjoy your coffee. It is also located relatively close to Plac Grunwaldzki in Wrocław. However, please note
that the exact addresses for these locations are not provided, so I recommend checking their specific locations for the
most accurate information.


# Booksy

In [24]:
embeddings = HuggingFaceEmbeddings(model_name="sdadas/mmlw-roberta-large")

booksy_vector_store = FAISS.load_local(
    'booksy_wroclaw',
    embeddings,
    allow_dangerous_deserialization=True,
)

In [56]:
# QUERY = "Find me a hairdresser that offers hair coloring near to plac Grunwaldzki, Wrocław"

QUERY = "Find ma a hairdresser that costs less than 100 PLN for a men's haircut near to Galeria Dominikańska, Wrocław"

In [57]:
location, place, distance = extract_entities_from_documents(QUERY, None)

 Please extract the following fields from the user's query:  1. `LOCATION_VECTOR_DB_PROMPT`: Provide a representation of
the location mentioned in the user's query. This should correspond to a specific place or landmark (e.g., "Dworzec
Wrocław Główny"). It should have been extracted in OSM format, e.g., 'amenity': 'restaurant', 'cuisine': 'polish'. if
you can find address also pass it      2. `PLACE_VECTOR_DB_PROMPT`: Provide a representation of the type of place or
business mentioned in the query (e.g., "restaurant", "hotel", etc.). In the given example, it would be a "Polish
restaurant". It should have been extracted in OSM format, e.g., 'amenity': 'restaurant', 'cuisine': 'polish'.  3.
`DISTANCE`: If the query contains information about the proximity or distance, either explicitly or implicitly, include
it. This could be a specific distance (e.g., "5 km away") or a general description (e.g., "near"). If available if not
thy to change it to a number e.g., "near" -> "1 km"] Provide on

In [58]:
location, place, distance

("'name': 'Galeria Dominikańska', 'description': 'shopping mall', 'address': {'street': 'ul. Oławska', 'housenumber': '1', 'city': 'Wrocław'}",
 "'amenity': 'hairdresser', 'price': 'less than 100 PLN', 'service': 'men's haircut'",
 1.0)

In [60]:
results = location_vector_store.similarity_search(
    location,
    k=5,
)

results

[Document(id='2d6afdcd-95aa-4fe0-af62-c87be9619b2b', metadata={'amenity': 'monastery', 'name': 'Refektarz', 'address': 'plac Dominikański 2-4, Wrocław', 'lattitude': 51.1092091, 'longitude': 17.0403065}, page_content='{"addr:city": "Wroc\\u0142aw", "addr:country": "PL", "addr:housenumber": "2-4", "addr:postcode": "50-159", "addr:street": "plac Dominika\\u0144ski", "addr:street:sym_ul": "04103", "amenity": "monastery", "image": "https://photos.app.goo.gl/PnhinPXQh9QtAxwYA", "name": "Refektarz"}'),
 Document(id='78343a13-b4b7-4b7e-acd6-b133932cb780', metadata={'amenity': 'marketplace', 'name': 'C.H. TARGET', 'address': 'Henryka Michała Kamieńskiego 14, Wrocław', 'lattitude': 51.1412245, 'longitude': 17.0343133}, page_content='{"addr:city": "Wroc\\u0142aw", "addr:housenumber": "14", "addr:postcode": "51-124", "addr:street": "Henryka Micha\\u0142a Kamie\\u0144skiego", "amenity": "marketplace", "name": "C.H. TARGET"}'),
 Document(id='c17f5631-3238-4b57-a50d-472d8078093a', metadata={'amenity

In [61]:
documents = documents_to_string(results)

latitude, longitude = extract_lat_lon_from_documents(location, documents)

 From provided Documents, find the latitude and longitude of the location mentioned in the user's query. If it is not
explicitly mentioned, try to extract it from the metadata of the documents. It can be near the location mentioned in the
query.  QUERY: "'name': 'Galeria Dominikańska', 'description': 'shopping mall', 'address': {'street': 'ul. Oławska',
'housenumber': '1', 'city': 'Wrocław'}"  DOCUMENTS: 0. Metadata: {'amenity': 'monastery', 'name': 'Refektarz',
'address': 'plac Dominikański 2-4, Wrocław', 'lattitude': 51.1092091, 'longitude': 17.0403065}    Rest: {"addr:city":
"Wroc\u0142aw", "addr:country": "PL", "addr:housenumber": "2-4", "addr:postcode": "50-159", "addr:street": "plac
Dominika\u0144ski", "addr:street:sym_ul": "04103", "amenity": "monastery", "image":
"https://photos.app.goo.gl/PnhinPXQh9QtAxwYA", "name": "Refektarz"} 1. Metadata: {'amenity': 'marketplace', 'name':
'C.H. TARGET', 'address': 'Henryka Michała Kamieńskiego 14, Wrocław', 'lattitude': 51.1412245, 'longit

In [62]:
latitude, longitude, place

(51.1092091,
 17.0403065,
 "'amenity': 'hairdresser', 'price': 'less than 100 PLN', 'service': 'men's haircut'")

In [63]:
results = booksy_vector_store.similarity_search(
    place,
    k=5,
    filter=generate_square_filter((latitude, longitude), radius_km=distance),
)

results

[Document(id='68a7772b-0ca6-4b17-9a09-d13004c1cea3', metadata={'name': 'THE MOST WANTED BARBERSHOP 2', 'rating': '4,9', 'lattitude': 51.1084243, 'longitude': 17.0313557, 'address': 'Ofiar Oświęcimskich 15, 50-059, Wrocław'}, page_content="Name: THE MOST WANTED BARBERSHOP 2 Rating: 4,9 Address: Ofiar Oświęcimskich 15, 50-059, Wrocław  Services: [{'price': '55,00 zł+', 'name': 'strzyżenie męskie krótkie włosy'}, {'price': '95,00 zł+', 'name': 'strzyżenie damskie włosy średnie'}, {'price': '120,00 zł+', 'name': 'strzyżenie damskie włosy długie'}, {'price': '20,00 zł', 'name': 'grzywka'}, {'price': 'Darmowa', 'name': 'bezpłatna konsultacja'}, {'price': '55,00 zł+', 'name': 'strzyżenie męskie krótkie włosy'}, {'price': '70,00 zł+', 'name': 'strzyżenie męskie włosy długie'}, {'price': '140,00 zł', 'name': 'zabieg odsiwiający strzyżenie'}, {'price': '50,00 zł+', 'name': 'strzyżenie maszynka'}, {'price': '160,00 zł+', 'name': 'męska trwała ondulacja włosy krótkie'}, {'price': '70,00 zł+', 'nam

In [64]:
documents = documents_to_string(results)

answer = summarize_results(QUERY, documents)

 Provide short suummarization based on user query and the documents provided. Recommend the most relevant document.
QUERY: "Find ma a hairdresser that costs less than 100 PLN for a men's haircut near to Galeria Dominikańska, Wrocław"
DOCUMENTS: 0. Metadata: {'name': 'THE MOST WANTED BARBERSHOP 2', 'rating': '4,9', 'lattitude': 51.1084243, 'longitude':
17.0313557, 'address': 'Ofiar Oświęcimskich 15, 50-059, Wrocław'}    Rest: Name: THE MOST WANTED BARBERSHOP 2 Rating:
4,9 Address: Ofiar Oświęcimskich 15, 50-059, Wrocław  Services: [{'price': '55,00 zł+', 'name': 'strzyżenie męskie
krótkie włosy'}, {'price': '95,00 zł+', 'name': 'strzyżenie damskie włosy średnie'}, {'price': '120,00 zł+', 'name':
'strzyżenie damskie włosy długie'}, {'price': '20,00 zł', 'name': 'grzywka'}, {'price': 'Darmowa', 'name': 'bezpłatna
konsultacja'}, {'price': '55,00 zł+', 'name': 'strzyżenie męskie krótkie włosy'}, {'price': '70,00 zł+', 'name':
'strzyżenie męskie włosy długie'}, {'price': '140,00 zł', 'name':

In [65]:
print('\n'.join(textwrap.wrap(answer, width=TEXT_WRAP_WIDTH)))

 Based on the provided documents, the most relevant hairdresser that meets your requirements is:  0. THE MOST WANTED
BARBERSHOP 2    - Address: Ofiar Oświęcimskich 15, 50-059, Wrocław    - Services: Men's haircut for short hair costs
55,00 zł+, which is less than 100 PLN.  This hairdresser is the best match for your query as it offers men's haircuts
for less than 100 PLN and is located near Galeria Dominikańska in Wrocław.
