In [1]:
import os
import pandas as pd
from dotenv import load_dotenv
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from pymilvus import connections, Collection, utility



In [2]:
load_dotenv()
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')

In [None]:
connections.connect(host="localhost", port="19530")
print("Connected to Milvus server")

Connected to Milvus server


In [None]:
# Check collection
COLLECTION_NAME = "tourism_search"
if not utility.has_collection(COLLECTION_NAME):
    print(f"Collection {COLLECTION_NAME} does not exist")
    exit(1)

In [None]:
# Create embedding function
def emb_texts(texts):
    embed_model = GoogleGenerativeAIEmbeddings(
        model="models/embedding-001",
        task_type="RETRIEVAL_DOCUMENT"
    )
    embeddings = embed_model.embed_documents(texts)
    return embeddings

In [6]:
collection = Collection(COLLECTION_NAME)
collection.load()
print(f"Loaded collection {COLLECTION_NAME}")

Loaded collection tourism_search


In [None]:
# Example query
query = "What are the entertaining activates of Vasco da Gama?" 
query_embedding = emb_texts([query])[0]

# Search top-k destinations
search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
results = collection.search(
    data=[query_embedding],
    anns_field="embedding",
    param=search_params,
    limit=1,
    output_fields=["ID", "Destination", "State", "Description", "TouristAttractions", "Activities"]
)

# Print results
print("Top matching destinations:")
for result in results[0]:
    print(f"ID: {result.entity.get('ID')}")
    print(f"Destination: {result.entity.get('Destination')}")
    print(f"State: {result.entity.get('State')}")
    print(f"Description: {result.entity.get('Description')}")
    print(f"Tourist Attractions: {result.entity.get('TouristAttractions')}")
    print(f"Activities: {result.entity.get('Activities')}")
    print(f"Distance: {result.distance:.4f}")
    print("-" * 50)

Top matching destinations:
ID: 85
Destination: Vasco da Gama
State: Goa
Description: A major port city known for its shipbuilding and beaches.
TouristAttractions: Bogmalo Beach, Japanese Garden, St. Andrew's Church.
Activities: Enjoy water sports and sunbathing at the beaches. Visit spice plantations and attend cooking classes. Explore the city's Portuguese colonial heritage. Shop for cashews, spices, and Goan handicrafts. Enjoy nightlife and live music at beach shacks and clubs.
Distance: 0.5088
--------------------------------------------------


In [10]:
print(results)

data: [[{'ID': 85, 'distance': 0.508787989616394, 'entity': {'TouristAttractions': "Bogmalo Beach, Japanese Garden, St. Andrew's Church.", 'Activities': "Enjoy water sports and sunbathing at the beaches. Visit spice plantations and attend cooking classes. Explore the city's Portuguese colonial heritage. Shop for cashews, spices, and Goan handicrafts. Enjoy nightlife and live music at beach shacks and clubs.", 'Destination': 'Vasco da Gama', 'State': 'Goa', 'Description': 'A major port city known for its shipbuilding and beaches.', 'ID': 85}}]]


In [14]:
import google.generativeai as genai

chat_history = []

system_message = (
    "You are a fiendly and knowledageable travel assistant. "
    "Your answer questions only about destinations, their location (state), descriptions, tourist attractions and and recreational activities during that trip " 
    "based on the provided travel dataset."
    "If a query does not have an exact match in the data, provide the closest relevant information available. "
    "Use a warm, conservational tone, as if you are chatting with someone planning a trip. "
    "If the user asks about topics unrelated to travel or outside the dataset, report with: "
    "'I can only provide answers related to the travel destination I know about, specifically tourist spots in India.'"
)

In [None]:
def generate_answer(system_message, chat_history, prompt):
    genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
    model = genai.GenerativeModel('gemini-1.5-flash')
    
    # Append the prompt to chat history
    chat_history.append(f"User: {prompt}")
    if len(chat_history) > 10:              # Historical limit of 5 entries (User + Assistant)
        chat_history = chat_history[-10:]
    
    # Combine system message to chat history
    full_prompt = f"{system_message}\n\n" + "\n".join(chat_history) + "\nAssistant:"
    
    # Generate response
    response = model.generate_content(full_prompt).text
    chat_history.append(f"Assistant: {response}")
    
    print("Chat history:", chat_history)
    return response

In [None]:
def get_relevant_chunk(query, collection):
    
    # Generate embedding for the query
    query_embedding = emb_texts([query])[0]
    
    # Search by top-k
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    results = collection.search(
        data=[query_embedding],
        anns_field="embedding",
        param=search_params,
        limit=1,
        output_fields=["Destination", "State", "Description", "TouristAttractions", "Activities"]
    )
    
    if results and len(results[0]) > 0:
        result = results[0][0]  # Lấy entity đầu tiên
        context = (
            f"Destination: {result.entity.get('Destination')}\n"
            f"State: {result.entity.get('State')}\n"
            f"Description: {result.entity.get('Description')}\n"
            f"Tourist Attractions: {result.entity.get('TouristAttractions')}\n"
            f"Activities: {result.entity.get('Activities')}"
        )
        return context
    else:
        return "No relevant search found in the dataset."
    
def make_prompt(query, context):
    return f"Query: {query}\n\nContext:\n{context}\n\Answer: Please provide a warm, conversational response focusing on recreational activities if asked, using the context provided."

In [26]:
def main():
    # query = "What are the entertaining activities in Vasco da Gama?"
    # relevant_text = get_relevant_chunk(query, collection)
    # prompt = make_prompt(query, relevant_text)

    # answer = generate_answer(system_message, chat_history, prompt)
    # print("Answer:", answer)
    
    query = "Tell me about fun things to do in Mysore."
    relevant_text = get_relevant_chunk(query, collection)
    prompt = make_prompt(query, relevant_text)
    
    answer = generate_answer(system_message, chat_history, prompt)
    print("Answer:", answer)


if __name__=="__main__":
    main()

Chat history: ["User: Query: What are the entertaining activities in Vasco da Gama?\n\nContext:\nDestination: Vasco da Gama\nState: Goa\nDescription: A major port city known for its shipbuilding and beaches.\nTourist Attractions: Bogmalo Beach, Japanese Garden, St. Andrew's Church.\nActivities: Enjoy water sports and sunbathing at the beaches. Visit spice plantations and attend cooking classes. Explore the city's Portuguese colonial heritage. Shop for cashews, spices, and Goan handicrafts. Enjoy nightlife and live music at beach shacks and clubs.\n\\Answer: Please provide a warm, conversational response focusing on recreational activities if asked, using the context provided.", "Assistant: Hey there!  Planning a trip to Vasco da Gama, huh? That sounds fantastic!  Vasco da Gama is a really interesting place in Goa.\n\nSo, you're looking for entertaining activities?  Well, you're in luck!  Besides relaxing and soaking up the sun at Bogmalo Beach (which is great for water sports!), you ca