In [1]:
import os
os.environ["SSL_CERT_FILE"] = "/mnt/d/Travel Assistant/Musafir/Fortinet_CA_SSL(15).cer"
os.environ["REQUESTS_CA_BUNDLE"] = "/mnt/d/Travel Assistant/Musafir/Fortinet_CA_SSL(15).cer"

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("../data/processed_data/all_cities_combined_clean.csv")

In [4]:
documents = df.to_dict(orient='records')

In [5]:
from elasticsearch import Elasticsearch
import requests
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from mistralai import Mistral
from mistralai.models import UserMessage
import os
from dotenv import load_dotenv
import tiktoken

In [7]:
es_client = Elasticsearch('http://localhost:9200')
print(es_client.info())

{'name': '9125275addfa', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'M25oUGE6Qe6ICaWjniVYiw', 'version': {'number': '8.13.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '09df99393193b2c53d92899662a8b8b3c55b45cd', 'build_date': '2024-03-22T03:35:46.757803203Z', 'build_snapshot': False, 'lucene_version': '9.10.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


In [8]:
for index, value in enumerate(documents[:5]):
    print(f"{index}, value: {value}")
    print("*" * 25)

0, value: {'id': 0, 'city': 'Cairo', 'section': 'See', 'subsection': 'Further afield', 'text': '29.8 31.233333 1 Dahshur Pyramids . For a contrast to touristy Pyramids of Giza, head south to the oldest known pyramid, the Red Pyramid. The neglected Dahshur Pyramids are interesting and worth a visit, considering its history and the hassle-free atmosphere. Also, see the weird Bent Pyramid there and hike around the area to the Black Pyramid. The Red Pyramid has an entrance to the inside, which you can climb down. ( updated Jan 2018 )'}
*************************
1, value: {'id': 1, 'city': 'Cairo', 'section': 'See', 'subsection': nan, 'text': '29.8 31.233333 5 Dahshur Pyramids . For a contrast to touristy Pyramids of Giza, head south to the oldest known pyramid, the Red Pyramid. The neglected Dahshur Pyramids are interesting and worth a visit, considering its history and the hassle-free atmosphere. Also, see the weird Bent Pyramid there and hike around the area to the Black Pyramid. The Red

In [9]:
index_name = "travel_assistant"

In [10]:
if es_client.indices.exists(index=index_name):
    es_client.indices.delete(index=index_name)
    print(f"Delete existing index: {index_name}")

Delete existing index: travel_assistant


In [11]:
list(documents[0].keys())

['id', 'city', 'section', 'subsection', 'text']

In [12]:
# Index doc by 'city' and 'section' value and the rest is the text 

index_settings = {
    "settings":{
        "number_of_shards":1,
        "number_of_replicas":0
    },
    "mappings":{
        "properties":{
            "city":{"type":"keyword"},
            "section":{"type":"keyword"},
            "subsection":{"type":"text"},
            "text":{"type":"text"},
        }
    }
}

es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'travel_assistant'})

In [13]:
# replace none value with null
import math

def clean_doc(doc):
    for k, v in doc.items():
        if isinstance(v, float) and math.isnan(v):
            doc[k] = None  
    return doc

In [14]:
# Set the index for the whole docs

for doc in tqdm(documents):
    es_client.index(index=index_name, document=clean_doc(doc))


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 544/544 [01:38<00:00,  5.52it/s]


In [27]:
def elastic_search(query, size, city_filter=None):

    search_query = {
        "size": size,
        "query": {
            "bool": {
                "must": [
                    {
                        "multi_match": {
                            "query": query,
                            "fields": ['city', 'section', 'subsection', 'text'],
                            "type": "best_fields"
                        }
                    }
                ]
            }
        }
    }

    # Add filter 
    if city_filter:
        search_query["query"]["bool"]["filter"] = {
            "term": {
                "city": city_filter
            }
        }


    response = es_client.search(index=index_name, body=search_query)
    
    result_docs = []

    for hit in response['hits']['hits']:
        result_docs.append(hit)     
    
    return result_docs

In [32]:
# Searching 
f_query = "where can I find the best halal food in London?"
elastic_search(query=f_query, size=5, city_filter=None)

[{'_index': 'travel_assistant',
  '_id': '7hhIe5kBylxcUMKE_i9W',
  '_score': 10.650509,
  '_source': {'id': 318,
   'city': 'London',
   'section': 'Eat',
   'subsection': 'Restaurant streets',
   'text': 'While central London is full of restaurants and cafes, there are some areas where the majority of diners are Londoners, rather than tourists, and in general you will get a much more pleasant, better value, and less crowded eating experience than you will find in the West End. These places are best visited in the evenings.'}},
 {'_index': 'travel_assistant',
  '_id': 'YRhJe5kBylxcUMKERjAp',
  '_score': 10.606996,
  '_source': {'id': 433,
   'city': 'Rome',
   'section': 'Eat',
   'subsection': None,
   'text': 'Rome is full of good restaurants, many in attractive settings, particularly when you sit outside in the evening. No one location can be recommended to search for a good restaurant: some of the best places to eat are in the most unpromising locations while well-situated restaura

In [33]:
# loads variables from .env
load_dotenv()  

True

In [34]:
api_key = os.getenv("API_KEY")

In [36]:
client = Mistral(api_key = api_key)

In [41]:
def build_prompt(query, search_results):
    context_template = "Q: {question}\n A: {text}"

    context_parts = []
    for doc in search_results:
        source = doc['_source']
        context_parts.append(context_template.format(question=source.get('question', ''), text=source.get('text', '')))

    context = "\n\n".join(context_parts)

    prompt_template = """
You're a travel assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}

    """.strip()
    
    prompt = prompt_template.format(question=query, context=context)
    print('The length of the resulting prompt:', len(prompt))

    encoding = tiktoken.encoding_for_model("gpt-4o")
    encoded_prompt = encoding.encode(prompt)
    num_token = len(encoded_prompt)

    # Number of token
    print("The number of token",num_token)
    return prompt


In [38]:
def llm(prompt):
    response = client.chat.complete(
        model= "mistral-medium-latest",
        messages=[UserMessage(content=prompt)],
    )


    return response.choices[0].message.content

In [39]:
def rag(query):
    search_results = elastic_search(query=query, size=5, city_filter=None)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)

    return answer

In [42]:
answer = rag(f_query)
print("\n Answer:", answer)

The length of the resulting prompt: 2907
The number of token 619

 Answer: Based on the **CONTEXT**, the best areas in London to find **halal food** include:

- **Whitechapel Road & Brick Lane** (East End)
- **Bayswater, Edgware Road, and Paddington**
- **Many parts of North London**

These areas have a high concentration of **halal restaurants and shops**, ranging from casual eateries to nicer dining options.


In [47]:
answer = rag("Plan two days trip to Roma")
print("\n Answer:", answer)

The length of the resulting prompt: 2324
The number of token 533

 Answer: Here’s a well-structured **2-day trip plan for Rome** based on the provided context, optimizing time and cost with the **Roma Pass 48-Hours (€36.50)**:

---
### **Day 1: Ancient Rome & Iconic Landmarks**
**Morning:**
- **Colosseum (Colosseo)** – Use your **Roma Pass for free entry + skip-the-line access** (arrive early to avoid crowds).
- **Palatine Hill (Palatino Hill)** – Included in the same ticket as the Colosseum (free entry with Roma Pass).
- **Roman Forum** – Walk through the heart of ancient Rome (entry included with Colosseum ticket).

**Afternoon:**
- **Lunch**: Quick bite near **Monti district** (try *La Carbonara* or *Trattoria Da Enzo*).
- **Baths of Caracalla (Terme di Caracalla)** – Free entry with Roma Pass (impressive Roman baths).
- **Circus Maximus** – Free outdoor site (no pass needed); great for photos.

**Evening:**
- **Piazza Venezia & Altare della Patria** – Climb the monument for panoram

In [49]:
answer = rag("Plan three-day itinerary for Dubai")
print("\n Answer:", answer)

The length of the resulting prompt: 1935
The number of token 421

 Answer: I’m unable to provide a three-day itinerary for Dubai based on the given **CONTEXT**, as the provided information only covers topics related to **Rome, London, airport shopping, and Cairo day trips**.

For a Dubai itinerary, I would typically include details on attractions like the **Burj Khalifa, Dubai Mall, Palm Jumeirah, Desert Safari, Dubai Marina, and cultural sites like Al Fahidi Historical District**. However, since the **CONTEXT** does not contain any relevant facts about Dubai, I cannot generate an answer.

Would you like me to assist with a general Dubai itinerary based on standard travel recommendations? Let me know!
