In [1]:
import pandas as pd
import os

places_df = pd.read_csv("data/places.csv")

In [5]:
places_df.columns

Index(['id', 'name', 'location', 'description', 'rating', 'type', 'longitude',
       'latitude', 'required_time', 'activities', 'Image'],
      dtype='object')

In [6]:
def detailed_desc(place_id, name, desc, location, lon, lat, place_type, activities, time):
    return f'''# {name} (id = {place_id}):
**Location**: {location}, Longitude:{lon}, Latitude: {lat}.
**General Description**: {name} is a {place_type} place where we can do {activities} and takes around {time} hr. on average to explore.
**About**: {desc}
'''

In [8]:
# Create the new description
places_df['description_llm'] = places_df.apply(
    lambda row: detailed_desc(row['id'],row['name'], row['description'],  row['location'], row['longitude'], row['latitude'], row['type'], row['activities'], row['required_time']),
    axis=1
)

# Generate embeddings and store

In [10]:
import numpy as np
np.float_ = np.float64

In [11]:
from dotenv import load_dotenv
load_dotenv()

True

In [13]:
from langchain_core.documents import Document
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from uuid import uuid4
from typing import List, Tuple

In [29]:
DEFAULT_COLLECTION_NAME = 'all_places'
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# Initialize Chroma vector store
def initialize_chroma_client():
    return Chroma(
        collection_name=DEFAULT_COLLECTION_NAME, 
        embedding_function=embeddings,
        persist_directory="./chroma_db"
    )

# Store text chunks in Chroma vector store, replacing existing data if any
def store_chunks(data):
    # Reinitialize collection and add new documents
    collection = initialize_chroma_client()

    # Create metadata by excluding 'description_llm'
    metadata = {key: value for key, value in data.items() if key != 'description_llm'}

    # Create a document with embedding and metadata
    document = Document(
        page_content=data['description_llm'],
        metadata=metadata
    )

    # Generate a unique ID
    uuid = str(uuid4())

    # Add document to the collection
    collection.add_documents(documents=[document], ids=[uuid])

# Retrieve top_n chunks similar to a query
def query_chunks(query: str, top_n: int = 10) -> List:
    collection = initialize_chroma_client()
    results = collection.similarity_search(query, k=top_n)
    content = [r.page_content for r in results]
    return content

In [30]:
for index, row in places_df.iterrows():
    row_dict = row.to_dict()  # Convert the Series to a dictionary
    store_chunks(row_dict)


In [31]:
print(query_chunks("Historic place with lots of history", 2)[0])

# Narayanhiti Palace Museum (id = 7):
**Location**: 1 km north of central Kathmandu, Longitude:85.32, Latitude: 27.71.
**General Description**: Narayanhiti Palace Museum is a Historic, Cultural place where we can do Exploring and takes around 2.0 hr. on average to explore.
**About**: Narayanhiti Palace Museum, a former royal residence in Kathmandu, showcases Nepal's monarchy until its 2008 abolishment.  This museum offers visitors a look into royal life through its opulent rooms, including the Throne Room and Banquet Hall, and displays artifacts like gifts and personal belongings.  Its architecture blends traditional Nepali and modern styles, and the surrounding gardens contribute to a stately atmosphere.  It's a key destination for understanding Nepal's history and culture. 



In [11]:
places_df.columns

Index(['id', 'name', 'location', 'description', 'rating', 'type', 'longitude',
       'latitude', 'required_time', 'activities', 'Image'],
      dtype='object')

In [12]:
activities = set()
for a in places_df['type']:
    act = a.split(', ')
    for ax in act:
        if ax not in activities:
            activities.add(ax)
            print(ax)

Historic
Religious
Cultural
Commercial
Natural
Wildlife
