In [1]:
import geojson
from langchain.vectorstores import Chroma
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain.chains import RetrievalQA, LLMChain
from langchain.prompts import PromptTemplate
from IPython.display import Markdown, display
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

In [2]:
def load_geojson_from_directory(directory_path):
    geojson_texts = []
    for filename in os.listdir(directory_path):
        if filename.endswith('.geojson'):
            file_path = os.path.join(directory_path, filename)
            with open(file_path) as f:
                data = geojson.load(f)
            # Extract text from each GeoJSON file
            geojson_texts.extend(extract_geojson_text(data))
    return geojson_texts

# Function to extract relevant text from GeoJSON features
def extract_geojson_text(geojson_data):
    return [feature['properties'] for feature in geojson_data['features']]



In [3]:
# Path to the directory containing your GeoJSON files
directory_path = 'data'

# Load all GeoJSON texts from the directory
geojson_texts = load_geojson_from_directory(directory_path)

In [4]:
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

# Split the text data
split_geojson_texts = []
for text in geojson_texts:
    chunks = splitter.split_text(str(text))  # Ensure data is stringified if not already
    split_geojson_texts.extend(chunks)

In [5]:
embedding_fun = OllamaEmbeddings(model='nomic-embed-text')

In [6]:
persist_directory = 'db4'

In [7]:
vectordb = Chroma.from_texts(split_geojson_texts, embedding_fun, persist_directory=persist_directory)

In [20]:
llm = ChatOllama(model="llama3.2:1b")

In [9]:
prompt = PromptTemplate(
    template="""You are an assistant. Use the following document to answer the user's question:
    Document: {context}
    
    Question: {question}
    
    Answer:""",
    input_variables=["context", "question"]
)

In [10]:
def format_context(docs):
    # Join the page content from the retrieved documents into a single string
    return "\n\n".join([doc.page_content for doc in docs])

In [11]:
qa_pipeline = RetrievalQA.from_chain_type(
    llm, retriever=vectordb.as_retriever(), chain_type_kwargs={"prompt": prompt}
)

In [76]:
query = "What is the street address of Lifeline Hospital and Research Centre?"
retrieved_docs = qa_pipeline.retriever.get_relevant_documents(query)

In [77]:
formatted_context = format_context(retrieved_docs)

In [78]:
response = qa_pipeline.run({
    "context": formatted_context,
    "query": query
})

In [79]:
display(Markdown(response))

The street address of Lifeline Hospital and Research Centre is 'Jagat Goswami Road'.