In [1]:
!pip install neo4j llama-index llama-index-embeddings-huggingface 

Collecting neo4j
  Downloading neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Collecting llama-index
  Downloading llama_index-0.12.35-py3-none-any.whl.metadata (12 kB)
Collecting llama-index-embeddings-huggingface
  Downloading llama_index_embeddings_huggingface-0.5.3-py3-none-any.whl.metadata (767 bytes)
Collecting llama-index-agent-openai<0.5,>=0.4.0 (from llama-index)
  Downloading llama_index_agent_openai-0.4.7-py3-none-any.whl.metadata (438 bytes)
Collecting llama-index-cli<0.5,>=0.4.1 (from llama-index)
  Downloading llama_index_cli-0.4.1-py3-none-any.whl.metadata (1.5 kB)
Collecting llama-index-core<0.13,>=0.12.35 (from llama-index)
  Downloading llama_index_core-0.12.35-py3-none-any.whl.metadata (2.4 kB)
Collecting llama-index-embeddings-openai<0.4,>=0.3.0 (from llama-index)
  Downloading llama_index_embeddings_openai-0.3.1-py3-none-any.whl.metadata (684 bytes)
Collecting llama-index-indices-managed-llama-cloud>=0.4.0 (from llama-index)
  Downloading llama_index_indices_manag

In [None]:
from neo4j import GraphDatabase
import json
# from sentence_transformers import SentenceTransformer # Replaced by llama_index
from llama_index.embeddings.huggingface import HuggingFaceEmbedding # New import
import numpy as np
import re # For basic slugification
import time # For potential delays if needed

# --- Initialize Embedding Model (LlamaIndex HuggingFaceEmbedding) ---
# This model needs to be downloaded by llama_index the first time it's used.
# It's a larger model, so initialization might take a moment.
EMBEDDING_MODEL_NAME = "BAAI/bge-large-en-v1.5"
EMBEDDING_DIMENSIONS = 1024 # Critical for Neo4j Vector Index

class Neo4jUploader:
    def __init__(self, uri, user, password, embedding_model_name=EMBEDDING_MODEL_NAME):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
        print(f"Initializing LlamaIndex HuggingFace embedding model: {embedding_model_name}...")
        try:
            self.embed_model = HuggingFaceEmbedding(model_name=embedding_model_name)
            print("Embedding model initialized successfully.")
        except Exception as e:
            print(f"Error initializing HuggingFaceEmbedding model: {e}")
            print("Please ensure the model name is correct and you have internet access for the first download.")
            print("You might need to install additional dependencies like 'pip install torch sentence-transformers'.")
            self.embed_model = None # Set to None if initialization fails
            raise # Re-raise the exception to stop execution if model is critical

    def close(self):
        if self.driver:
            self.driver.close()
            print("Neo4j connection closed.")

    def _run_query(self, query, parameters=None):
        try:
            with self.driver.session(database="neo4j") as session:
                result = session.run(query, parameters)
                return [record for record in result]
        except Exception as e:
            print(f"Error executing Cypher query: {e}")
            print(f"Query: {query}")
            print(f"Parameters: {parameters}")
            return []

    def generate_embedding(self, text):
        if not self.embed_model:
            print("Error: Embedding model not initialized. Cannot generate embedding.")
            return None
        if not text or not isinstance(text, str):
            # print(f"Warning: Cannot generate embedding for non-string or empty text: {text}")
            return None # Or handle as an error
        try:
            # LlamaIndex's HuggingFaceEmbedding typically returns a list of floats directly
            embedding_vector = self.embed_model.get_text_embedding(text)
            return embedding_vector # Should already be a list of floats
        except Exception as e:
            print(f"Error generating embedding for text '{text[:50]}...': {e}")
            return None

    def _basic_slugify(self, text_parts):
        slug = "-".join(str(part).strip() for part in text_parts if str(part).strip())
        slug = re.sub(r'[^\w\s-]', '', slug).strip().lower()
        slug = re.sub(r'[-\s]+', '-', slug)
        return slug if slug else "unknown"

    # --- upload_quran_verses method (no change needed in its Cypher, only uses self.generate_embedding) ---
    def upload_quran_verses(self, quran_json_data):
        print("Starting Quran verse upload...")
        query_verse = """
        UNWIND keys($chapter_map) AS chapter_key
        UNWIND $chapter_map[chapter_key] AS verse_data
        MERGE (qv:QuranVerse {unique_id: 'Quran:' + verse_data.chapter + ':' + verse_data.verse})
        ON CREATE SET qv.chapter_number = toInteger(verse_data.chapter),
                      qv.verse_number = toInteger(verse_data.verse),
                      qv.text_english = verse_data.text,
                      qv.embedding = $embedding_map['Quran:' + verse_data.chapter + ':' + verse_data.verse]
        ON MATCH SET  qv.text_english = verse_data.text,
                      qv.embedding = $embedding_map['Quran:' + verse_data.chapter + ':' + verse_data.verse]
        RETURN count(qv) AS verses_processed
        """
        embedding_map = {}
        processed_count = 0
        if not quran_json_data:
            print("  No Quran data provided to upload_quran_verses.")
            return

        total_verses_to_embed = sum(len(verses) for verses in quran_json_data.values())
        print(f"  Generating embeddings for approximately {total_verses_to_embed} Quran verses...")

        for chapter_key, verses in quran_json_data.items():
            for verse in verses:
                unique_id = f"Quran:{verse['chapter']}:{verse['verse']}"
                embedding = self.generate_embedding(verse['text'])
                if embedding: # Only add if embedding was successful
                    embedding_map[unique_id] = embedding
                processed_count +=1
                if processed_count % 500 == 0 or processed_count == total_verses_to_embed:
                    print(f"    Generated embeddings for {processed_count}/{total_verses_to_embed} Quran verses...")

        if not embedding_map:
            print("  No embeddings were generated for Quran verses. Aborting upload for Quran.")
            return

        # Filter quran_json_data to only include verses for which embeddings were generated
        filtered_quran_data = {}
        for chapter_key, verses in quran_json_data.items():
            filtered_verses = []
            for verse in verses:
                unique_id = f"Quran:{verse['chapter']}:{verse['verse']}"
                if unique_id in embedding_map:
                    filtered_verses.append(verse)
            if filtered_verses:
                filtered_quran_data[chapter_key] = filtered_verses
        if not filtered_quran_data:
            print("  No Quran verses with successful embeddings to upload.")
            return

        result = self._run_query(query_verse, parameters={"chapter_map": filtered_quran_data, "embedding_map": embedding_map})
        if result:
            print(f"Quran verses and embeddings processed/uploaded: {result[0]['verses_processed']}")
        else:
            print("Quran verse upload might have encountered an issue or no verses were processed.")


    # --- upload_bukhari_hadiths method (no change needed in its Cypher) ---
    def upload_bukhari_hadiths(self, bukhari_json_data):
        print("Starting Sahih Bukhari Hadith upload...")
        query_hadith = """
        UNWIND $volume_list AS volume_data
        UNWIND volume_data.books AS book_data
        UNWIND book_data.hadiths AS hadith_item
        // Using pre-calculated unique_id and ensuring it exists in the embedding_map
        WITH volume_data.name AS volume_name, book_data.name AS book_name, hadith_item,
             $id_to_unique_id_map[hadith_item.info + '|' + hadith_item.text] AS hadith_unique_id
        WHERE hadith_unique_id IS NOT NULL AND $embedding_map[hadith_unique_id] IS NOT NULL
        MERGE (h:Hadith {unique_id: hadith_unique_id})
        ON CREATE SET h.volume_name = volume_name,
                      h.book_name_english = book_name,
                      h.info_text = hadith_item.info,
                      h.narrated_by = hadith_item.by,
                      h.text_english = hadith_item.text,
                      h.embedding = $embedding_map[hadith_unique_id]
        ON MATCH SET  h.text_english = hadith_item.text,
                      h.embedding = $embedding_map[hadith_unique_id]
        RETURN count(h) AS hadiths_processed
        """
        embedding_map = {}
        id_to_unique_id_map = {} # Maps original composite key to the generated unique_id
        hadith_counter = 0
        total_hadiths_to_embed = sum(len(book.get("hadiths", [])) for volume in bukhari_json_data for book in volume.get("books", []))
        print(f"  Generating embeddings for approximately {total_hadiths_to_embed} Hadiths...")

        for volume in bukhari_json_data:
            vol_name = volume.get("name", "UnknownVolume")
            for book in volume.get("books", []):
                book_name = book.get("name", "UnknownBook")
                for hadith_data in book.get("hadiths", []):
                    info_text = hadith_data.get("info", "")
                    hadith_text_content = hadith_data.get("text", "")
                    if not hadith_text_content: # Skip if no text for embedding
                        continue

                    # Create a temporary key from original data to map to the generated unique_id
                    original_data_key = info_text + '|' + hadith_text_content

                    slug_parts = [ vol_name, book_name, info_text, str(hadith_counter)]
                    unique_id_val = "Bukhari:" + self._basic_slugify(slug_parts)
                    id_to_unique_id_map[original_data_key] = unique_id_val

                    embedding = self.generate_embedding(hadith_text_content)
                    if embedding:
                        embedding_map[unique_id_val] = embedding
                    hadith_counter += 1
                    if hadith_counter % 500 == 0 or hadith_counter == total_hadiths_to_embed:
                        print(f"    Generated embeddings for {hadith_counter}/{total_hadiths_to_embed} Hadiths...")

        if not embedding_map:
            print("  No embeddings were generated for Hadiths. Aborting upload for Bukhari.")
            return

        # Filter bukhari_json_data to only include hadiths for which embeddings were generated
        # and reconstruct the list for UNWIND to ensure hadith_item matches what's in id_to_unique_id_map
        filtered_bukhari_data = []
        for volume in bukhari_json_data:
            filtered_volume = {"name": volume.get("name"), "books": []}
            for book in volume.get("books", []):
                filtered_book = {"name": book.get("name"), "hadiths": []}
                for hadith_item in book.get("hadiths", []):
                    original_data_key = hadith_item.get("info", "") + '|' + hadith_item.get("text", "")
                    # Check if this hadith's generated unique_id has an embedding
                    if original_data_key in id_to_unique_id_map and id_to_unique_id_map[original_data_key] in embedding_map:
                        filtered_book["hadiths"].append(hadith_item)
                if filtered_book["hadiths"]:
                    filtered_volume["books"].append(filtered_book)
            if filtered_volume["books"]:
                filtered_bukhari_data.append(filtered_volume)

        if not filtered_bukhari_data:
            print("  No Hadith data with successful embeddings to upload.")
            return

        result = self._run_query(query_hadith, parameters={
            "volume_list": filtered_bukhari_data,
            "embedding_map": embedding_map,
            "id_to_unique_id_map": id_to_unique_id_map
        })
        if result:
            print(f"Sahih Bukhari hadiths and embeddings processed/uploaded: {result[0]['hadiths_processed']}")
        else:
            print("Sahih Bukhari Hadith upload might have encountered an issue or no hadiths were processed.")


    # --- create_general_concepts_with_embeddings method (no change needed in its Cypher) ---
    def create_general_concepts_with_embeddings(self, concepts_list):
        print("Starting general Islamic concept upload...")
        # ... (implementation is the same as before, just ensure self.generate_embedding is called)
        query = """
        UNWIND $concepts AS concept_data
        MERGE (ic:IslamicConcept {name: concept_data.name})
        ON CREATE SET ic.definition_english = concept_data.definition_english,
                      ic.category = concept_data.category,
                      ic.embedding = $embedding_map[concept_data.name]
        ON MATCH SET  ic.definition_english = concept_data.definition_english,
                      ic.category = concept_data.category,
                      ic.embedding = $embedding_map[concept_data.name]
        RETURN count(ic) AS concepts_processed
        """
        embedding_map = {}
        for concept in concepts_list:
            text_to_embed = concept['name']
            if 'definition_english' in concept and concept['definition_english']:
                text_to_embed += ". " + concept['definition_english']
            embedding = self.generate_embedding(text_to_embed)
            if embedding:
                embedding_map[concept['name']] = embedding
        if not embedding_map:
            print("  No embeddings generated for general concepts. Skipping upload.")
            return
        
        # Filter concepts to only include those for which embeddings were generated
        filtered_concepts_list = [c for c in concepts_list if c['name'] in embedding_map]

        result = self._run_query(query, parameters={"concepts": filtered_concepts_list, "embedding_map": embedding_map})
        if result:
            print(f"General Islamic concepts processed/uploaded: {result[0]['concepts_processed']}")

    # --- create_finance_product_concepts_with_embeddings method (no change needed in its Cypher) ---
    def create_finance_product_concepts_with_embeddings(self, products_list):
        print("Starting Islamic finance product concept upload...")
        # ... (implementation is the same as before, just ensure self.generate_embedding is called)
        query = """
        UNWIND $products AS product_data
        MERGE (ifp:IslamicFinanceProduct {name: product_data.name})
        ON CREATE SET ifp.use_case = product_data.use_case,
                      ifp.scenario_summary = product_data.scenario_summary,
                      ifp.implementation_summary = product_data.implementation_summary,
                      ifp.popular_for = product_data.popular_for,
                      ifp.fas_reference = product_data.fas_reference,
                      ifp.embedding = $embedding_map[product_data.name]
        ON MATCH SET  ifp.use_case = product_data.use_case,
                      ifp.scenario_summary = product_data.scenario_summary,
                      ifp.implementation_summary = product_data.implementation_summary,
                      ifp.popular_for = product_data.popular_for,
                      ifp.fas_reference = product_data.fas_reference,
                      ifp.embedding = $embedding_map[product_data.name]
        RETURN count(ifp) AS products_processed
        """
        embedding_map = {}
        for product in products_list:
            text_to_embed = f"Product: {product['name']}. "
            text_to_embed += f"Use Case: {product.get('use_case', '')}. "
            text_to_embed += f"FAS Reference: {product.get('fas_reference', '')}. "
            text_to_embed += f"Scenario: {product.get('scenario_summary', '')}. "
            text_to_embed += f"Implementation: {product.get('implementation_summary', '')}. "
            text_to_embed += f"Popular for: {product.get('popular_for', '')}."
            embedding = self.generate_embedding(text_to_embed)
            if embedding:
                embedding_map[product['name']] = embedding
        if not embedding_map:
            print("  No embeddings generated for finance products. Skipping upload.")
            return

        filtered_products_list = [p for p in products_list if p['name'] in embedding_map]

        result = self._run_query(query, parameters={"products": filtered_products_list, "embedding_map": embedding_map})
        if result:
            print(f"Islamic finance product concepts processed/uploaded: {result[0]['products_processed']}")
    def upload_document_chunks(self, llama_index_nodes):
        """
        Uploads LlamaIndex Node objects (chunks) to Neo4j.
        Assumes nodes might or might not have pre-computed embeddings.
        If node.embedding is None, it will attempt to generate one.
        """
        if not self.embed_model:
            print("Error: Embedding model not initialized. Cannot process document chunks.")
            return
        if not llama_index_nodes:
            print("No document chunks provided to upload.")
            return

        print(f"Starting upload of {len(llama_index_nodes)} document chunks...")
        query_chunk = """
        UNWIND $chunk_batch AS chunk_data
        MERGE (dc:DocumentChunk {chunk_id: chunk_data.chunk_id})
        ON CREATE SET dc.text_content = chunk_data.text_content,
                      dc.source_document_name = chunk_data.source_document_name,
                      dc.page_number = chunk_data.page_number,
                      dc.extra_metadata = chunk_data.extra_metadata,
                      dc.embedding = chunk_data.embedding
        ON MATCH SET  dc.text_content = chunk_data.text_content,
                      dc.source_document_name = chunk_data.source_document_name,
                      dc.page_number = chunk_data.page_number,
                      dc.extra_metadata = chunk_data.extra_metadata,
                      dc.embedding = chunk_data.embedding
        RETURN count(dc) AS chunks_processed
        """

        batch_size = 50
        chunks_data_for_neo4j_batch = []
        total_uploaded_to_db = 0
        freshly_embedded_count = 0

        for i, node in enumerate(llama_index_nodes):
            chunk_id = node.node_id
            text_content = node.get_content()
            metadata = node.metadata or {}
            source_document_name = metadata.get('file_name', 'Unknown Source')
            page_number_str = metadata.get('page_label', None)
            page_number = None
            if page_number_str is not None:
                try: page_number = int(page_number_str)
                except ValueError: page_number = None # Keep as None if not a valid int

            embedding = getattr(node, 'embedding', None) # Safely get embedding
            if not embedding:
                # print(f"  Node {chunk_id} missing embedding. Generating fresh one...")
                embedding = self.generate_embedding(text_content)
                if embedding:
                    freshly_embedded_count += 1
                else:
                    print(f"  Skipping chunk {chunk_id} from {source_document_name} due to embedding generation failure.")
                    continue # Skip this node if embedding failed

            if not embedding: # Double check if embedding is still None
                print(f"  Skipping chunk {chunk_id} (final check) - no embedding.")
                continue

            chunks_data_for_neo4j_batch.append({
                "chunk_id": chunk_id,
                "text_content": text_content,
                "source_document_name": source_document_name,
                "page_number": page_number,
                "extra_metadata": json.dumps(metadata), # Store all metadata
                "embedding": embedding
            })

            if (i + 1) % batch_size == 0 or (i + 1) == len(llama_index_nodes):
                if chunks_data_for_neo4j_batch:
                    print(f"  Uploading batch of {len(chunks_data_for_neo4j_batch)} chunks to Neo4j ({i+1}/{len(llama_index_nodes)} processed)...")
                    result = self._run_query(query_chunk, parameters={"chunk_batch": chunks_data_for_neo4j_batch})
                    if result and result[0]['chunks_processed'] is not None:
                        total_uploaded_to_db += result[0]['chunks_processed']
                    else:
                        print(f"    Batch upload might have encountered an issue for {len(chunks_data_for_neo4j_batch)} chunks.")
                    chunks_data_for_neo4j_batch = [] # Clear batch

        if freshly_embedded_count > 0:
            print(f"  Generated fresh embeddings for {freshly_embedded_count} chunks.")
        print(f"Document chunk upload complete. Total chunks uploaded/updated in DB: {total_uploaded_to_db}")


    # --- link_nodes_by_semantic_similarity method (no change needed) ---
    def link_nodes_by_semantic_similarity(self, node1_label, node2_label, relationship_type, threshold=0.75, id_prop1='unique_id', id_prop2='unique_id'):
        print(f"Attempting to link {node1_label} with {node2_label} using relationship {relationship_type} (threshold: {threshold})...")
        # ... (implementation remains the same as the previous complete code example)
        query1 = f"MATCH (n1:{node1_label}) WHERE n1.embedding IS NOT NULL RETURN n1.{id_prop1} AS id1, n1.embedding AS emb1"
        nodes1_data = self._run_query(query1)

        query2 = f"MATCH (n2:{node2_label}) WHERE n2.embedding IS NOT NULL RETURN n2.{id_prop2} AS id2, n2.embedding AS emb2"
        nodes2_data = self._run_query(query2)

        if not nodes1_data or not nodes2_data:
            print(f"  No nodes found for one or both labels ({node1_label}, {node2_label}) with embeddings. Skipping linking.")
            return

        print(f"  Comparing {len(nodes1_data)} {node1_label}(s) with {len(nodes2_data)} {node2_label}(s)...")

        link_query = f"""
        MATCH (n1:{node1_label} {{{id_prop1}: $id1}})
        MATCH (n2:{node2_label} {{{id_prop2}: $id2}})
        MERGE (n1)-[r:{relationship_type}]->(n2)
        ON CREATE SET r.score = $score, r.method = 'embedding_similarity'
        ON MATCH SET r.score = $score, r.method = 'embedding_similarity_updated'
        """
        links_created_total = 0
        processed_node1_count = 0

        for node1 in nodes1_data:
            processed_node1_count += 1
            if not node1.get('emb1') or not node1.get('id1'): continue # Added .get for safety
            emb1_np = np.array(node1['emb1'])
            if emb1_np.ndim == 0 or emb1_np.size == 0: continue

            links_for_node1 = 0
            for node2 in nodes2_data:
                if not node2.get('emb2') or not node2.get('id2'): continue # Added .get for safety
                if node1_label == node2_label and node1['id1'] == node2['id2']:
                    continue

                emb2_np = np.array(node2['emb2'])
                if emb2_np.ndim == 0 or emb2_np.size == 0: continue

                try:
                    # Ensure embeddings are not zero vectors before normalization
                    norm_emb1 = np.linalg.norm(emb1_np)
                    norm_emb2 = np.linalg.norm(emb2_np)
                    if norm_emb1 == 0 or norm_emb2 == 0:
                        similarity = 0.0
                    else:
                        similarity = np.dot(emb1_np, emb2_np) / (norm_emb1 * norm_emb2)
                    if np.isnan(similarity):
                        similarity = 0.0
                except Exception as e:
                    similarity = 0.0

                if similarity >= threshold:
                    self._run_query(link_query, parameters={"id1": node1['id1'], "id2": node2['id2'], "score": float(similarity)})
                    links_for_node1 += 1
            links_created_total += links_for_node1
            if processed_node1_count % 100 == 0 or links_for_node1 > 0 :
                 print(f"  Processed {processed_node1_count}/{len(nodes1_data)} {node1_label}s. '{node1['id1']}' linked to {links_for_node1} {node2_label}(s). Total links so far: {links_created_total}")

        print(f"Finished linking {node1_label} and {node2_label}. Total links created/updated above threshold: {links_created_total}")

    # Inside the Neo4jUploader class:

    def search_relevant_nodes_by_text(self, search_text, node_label, index_name, top_k=5):
        """
        Searches for nodes of a given label that are semantically similar to the search_text.

        Args:
            search_text (str): The text to search for.
            node_label (str): The Neo4j label of the nodes to search (e.g., "QuranVerse", "Hadith").
            index_name (str): The name of the Neo4j vector index for that node label.
            top_k (int): The number of top similar results to return.

        Returns:
            list: A list of dictionaries, where each dictionary contains the node's properties and the similarity score.
        """
        if not self.embed_model:
            print("Error: Embedding model not initialized. Cannot perform search.")
            return []
        if not search_text:
            print("Error: Search text cannot be empty.")
            return []

        # print(f"Generating embedding for search text: '{search_text[:50]}...'")
        query_embedding = self.generate_embedding(search_text)

        if not query_embedding:
            print("Error: Could not generate embedding for the search text.")
            return []

        # Ensure your Neo4j version supports db.index.vector.queryNodes (5.11+)
        # and that the vector index 'index_name' exists for 'node_label'
        # with the correct dimensions and similarity function.
        cypher_query = f"""
        CALL db.index.vector.queryNodes($index_name, $top_k, $query_embedding)
        YIELD node, score
        // Ensure the node has the expected label, though the index should handle this.
        // WHERE $node_label IN labels(node) // Optional safeguard
        RETURN node, score
        """
        # For older Neo4j without vector index, this would be very inefficient:
        # You'd have to MATCH all nodes, get all embeddings, and compute similarity in Python.

        # print(f"Querying Neo4j vector index '{index_name}' for '{node_label}' nodes similar to '{search_text[:30]}...'")
        results = self._run_query(cypher_query, parameters={
            "index_name": index_name,
            "top_k": top_k,
            "query_embedding": query_embedding
            # "node_label": node_label # Only if using the optional WHERE clause
        })

        formatted_results = []
        if results:
            for record in results:
                node_data = dict(record["node"]) # Convert Neo4j Node object to dictionary
                node_data["similarity_score"] = record["score"]
                formatted_results.append(node_data)
            # print(f"Found {len(formatted_results)} relevant {node_label} nodes.")
        # else:
            # print(f"No relevant {node_label} nodes found for the search text or an error occurred.")

        return formatted_results
    # Inside the Neo4jUploader class:
    # You'll also need the search function for DocumentChunk
    def search_document_chunks_by_text(self, search_text, top_k=5):
        """
        Searches for DocumentChunk nodes semantically similar to the search_text.
        Assumes a vector index named 'document_chunk_embeddings' exists.
        """
        return self.search_relevant_nodes_by_text(
            search_text=search_text,
            node_label="DocumentChunk",
            index_name="document_chunk_embeddings", # Ensure this index is created
            top_k=top_k
        )
    
    def search_all_sources_by_text(self, search_text, top_k=6):
        results = []

        # Search in FAS Document Chunks
        doc_chunks = self.search_relevant_nodes_by_text(
            search_text=search_text,
            node_label="DocumentChunk",
            index_name="document_chunk_embeddings",
            top_k=top_k
        )
        for result in doc_chunks:
            result["source_type"] = "DocumentChunk"
            results.append(result)

        # Search in Hadiths
        hadiths = self.search_relevant_nodes_by_text(
            search_text=search_text,
            node_label="Hadith",
            index_name="hadith_embeddings",
            top_k=top_k
        )
        for result in hadiths:
            result["source_type"] = "Hadith"
            results.append(result)

        # Search in QuranVerses
        quran_verses = self.search_relevant_nodes_by_text(
            search_text=search_text,
            node_label="QuranVerse",
            index_name="quran_verse_embeddings",
            top_k=top_k
        )
        for result in quran_verses:
            result["source_type"] = "QuranVerse"
            results.append(result)

        # Sort all results by similarity score (descending)
        results.sort(key=lambda x: x.get("similarity_score", 0), reverse=True)

        return results[:top_k]
   



In [None]:

# --- Main Execution ---
if __name__ == "__main__":
    NEO4J_URI = "neo4j://localhost:7687"
    NEO4J_USER = "neo4j"
    NEO4J_PASSWORD = "yourStrongPassword123" # <<< CHANGE THIS!

    uploader = None
    try:
        # Initialize uploader (this will also initialize the embedding model)
        uploader = Neo4jUploader(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
        print("Successfully connected to Neo4j and initialized uploader.")

        # --- Load your JSON data ---
        print("Loading JSON data files...")
        quran_json_data = None
        bukhari_json_data = None
        try:
            with open('data/en.json', 'r', encoding='utf-8') as f:
                quran_json_data = json.load(f)
            print("  quran.json loaded successfully.")
        except FileNotFoundError:
            print("Error: quran.json not found. Please ensure it's in the same directory.")
        except json.JSONDecodeError:
            print("Error: quran.json is not valid JSON.")

        try:
            with open('data/sahih_bukhari.json', 'r', encoding='utf-8') as f:
                bukhari_json_data = json.load(f)
            print("  bukhari.json loaded successfully.")
        except FileNotFoundError:
            print("Error: bukhari.json not found. Please ensure it's in the same directory.")
        except json.JSONDecodeError:
            print("Error: bukhari.json is not valid JSON.")


        # --- Define concepts and products ---
        general_islamic_concepts = [
            {"name": "Tawhid", "definition_english": "The oneness of God, the indivisible monotheistic concept of Islam.", "category": "Core Belief"},
            {"name": "Sunnah", "definition_english": "The verbally transmitted record of the teachings, deeds and sayings, silent permissions (or disapprovals) of the Islamic prophet Muhammad.", "category": "Source of Law"},
            {"name": "Intention (Niyyah)", "definition_english": "The underlying purpose or intent behind an action, crucial for its validity and reward in Islam.", "category": "Core Principle"},
            {"name": "Halal", "definition_english": "Permissible or lawful in Islam.", "category": "Ruling"},
            {"name": "Haram", "definition_english": "Forbidden or unlawful in Islam.", "category": "Ruling"}
        ]
        islamic_finance_products_from_fas = [
            {
                "name": "Musharaka Financing", "fas_reference": "FAS 4", "use_case": "Co-investment in a small manufacturing business",
                "scenario_summary": "IFI contributes $1,000,000 and client $500,000 for a water bottling plant.",
                "implementation_summary": "Sign Musharaka contract (profit/loss sharing), IFI disburses capital (Musharaka Financing), profit split recognized, loss shared proportionally, value distributed on termination.",
                "popular_for": "Project finance, SMEs, agricultural ventures, or real estate development."
            },
            {
                "name": "Murabaha and Deferred Payment Sale", "fas_reference": "FAS 28", "use_case": "Asset financing for equipment purchase",
                "scenario_summary": "Client wants machine ($150k). IFI buys and resells for $175k (deferred over 5 months).",
                "implementation_summary": "Client gives binding promise, IFI purchases (Murabaha Inventory), sells on credit (Murabaha Receivable = $175k), records profit ($25k in Deferred Profit), recognizes monthly income ($5k), receives payments.",
                "popular_for": "Consumer goods, vehicles, real estate, corporate equipment."
            },
            {
                "name": "Salam and Parallel Salam", "fas_reference": "FAS 7", "use_case": "Agricultural financing with a back-to-back contract",
                "scenario_summary": "Hilal Islamic Bank pays farmer $100k for 50k tons of barley in 3 months. Signs Parallel Salam to sell for $105k.",
                "implementation_summary": "Hilal Bank pays upfront (Salam Financing), enters Parallel Salam. On delivery, inventory recognized ($100k). Delivery to final buyer: revenue ($105k), profit ($5k). Failure: receivable or adjusted against security.",
                "popular_for": "Advance purchase of crops, minerals, or commodities with resale commitment."
            },
            {
                "name": "Istisna’a and Parallel Istisna’a", "fas_reference": "FAS 10", "use_case": "Construction of a custom building for a client",
                "scenario_summary": "Client wants custom warehouse. Bank agrees to build for $1M. Subcontracts for $800k.",
                "implementation_summary": "IFI enters Istisna’a with client, then Parallel Istisna’a with constructor. Records WIP. Revenue recognized by percentage of completion. Remaining revenue + profit on final delivery. Cost overruns/delays: bank may reduce payment/seek penalties.",
                "popular_for": "Infrastructure, housing projects, industrial manufacturing orders."
            },
            {
                "name": "Ijarah (Leasing)", "fas_reference": "FAS 32", "use_case": "Leasing of industrial equipment with ownership transfer at the end",
                "scenario_summary": "Bank leases truck ($100k) for 5 years with monthly rentals and ownership transfer at end.",
                "implementation_summary": "IFI purchases truck (Ijarah Asset). Client signs Ijarah Muntahia Bittamleek. IFI depreciates, recognizes rental income. Client records right-of-use asset & Ijarah liability. Ownership transferred (sale/gift at end).",
                "popular_for": "Equipment leasing, vehicles, aircrafts, medical machines."
            }
        ]

        # --- Step 1: Upload Nodes with Embeddings ---
        if quran_json_data:
            uploader.upload_quran_verses(quran_json_data)
        if bukhari_json_data:
            uploader.upload_bukhari_hadiths(bukhari_json_data)

        uploader.create_general_concepts_with_embeddings(general_islamic_concepts)
        uploader.create_finance_product_concepts_with_embeddings(islamic_finance_products_from_fas)

        print("\nNode uploading complete.")
        print("IMPORTANT: Create Neo4j Vector Indexes now if you haven't for efficient semantic search!")
        print(f"Example for QuranVerse (dimensions: {EMBEDDING_DIMENSIONS}):")
        print(f"CREATE VECTOR INDEX quran_verse_embeddings IF NOT EXISTS FOR (n:QuranVerse) ON (n.embedding) OPTIONS {{indexConfig: {{`vector.dimensions`: {EMBEDDING_DIMENSIONS}, `vector.similarity_function`: 'cosine'}}}}")
        # Add similar CREATE VECTOR INDEX commands for Hadith, IslamicConcept, IslamicFinanceProduct


        # --- Step 2: Link Nodes by Semantic Similarity (Run selectively and after indexing) ---
        print("\nStarting semantic linking (this can be time-consuming)...")
        if quran_json_data: # Only run if data was loaded
            uploader.link_nodes_by_semantic_similarity(
                node1_label="QuranVerse", node2_label="IslamicConcept",
                relationship_type="SEMANTICALLY_RELATES_TO_CONCEPT", threshold=0.65, id_prop2='name' # BGE models often need higher thresholds
            )
            uploader.link_nodes_by_semantic_similarity(
                node1_label="QuranVerse", node2_label="IslamicFinanceProduct",
                relationship_type="SEMANTICALLY_RELATES_TO_PRODUCT", threshold=0.60, id_prop2='name'
            )
        if bukhari_json_data: # Only run if data was loaded
            uploader.link_nodes_by_semantic_similarity(
                node1_label="Hadith", node2_label="IslamicConcept",
                relationship_type="SEMANTICALLY_RELATES_TO_CONCEPT", threshold=0.65, id_prop2='name'
            )
            uploader.link_nodes_by_semantic_similarity(
                node1_label="Hadith", node2_label="IslamicFinanceProduct",
                relationship_type="SEMANTICALLY_RELATES_TO_PRODUCT", threshold=0.60, id_prop2='name'
            )
        uploader.link_nodes_by_semantic_similarity(
            node1_label="IslamicConcept", node2_label="IslamicFinanceProduct",
            relationship_type="CONCEPT_APPLIES_TO_PRODUCT", threshold=0.7, id_prop1='name', id_prop2='name'
        )
        print("Semantic linking process finished (or skipped).")

    except Exception as e:
        print(f"An error occurred in the main execution: {e}")
    finally:
        if uploader:
            uploader.close()

    print("\nFull script execution finished.")

  from .autonotebook import tqdm as notebook_tqdm


Initializing LlamaIndex HuggingFace embedding model: BAAI/bge-large-en-v1.5...
Embedding model initialized successfully.
Successfully connected to Neo4j and initialized uploader.
Loading JSON data files...
  quran.json loaded successfully.
  bukhari.json loaded successfully.
Starting Quran verse upload...
  Generating embeddings for approximately 6236 Quran verses...
    Generated embeddings for 500/6236 Quran verses...
    Generated embeddings for 1000/6236 Quran verses...
    Generated embeddings for 1500/6236 Quran verses...
    Generated embeddings for 2000/6236 Quran verses...
    Generated embeddings for 2500/6236 Quran verses...
    Generated embeddings for 3000/6236 Quran verses...
    Generated embeddings for 3500/6236 Quran verses...
    Generated embeddings for 4000/6236 Quran verses...
    Generated embeddings for 4500/6236 Quran verses...
    Generated embeddings for 5000/6236 Quran verses...
    Generated embeddings for 5500/6236 Quran verses...
    Generated embeddings f

In [6]:
print("\nStarting semantic linking (this can be time-consuming)...")
if quran_json_data: # Only run if data was loaded
    uploader.link_nodes_by_semantic_similarity(
        node1_label="QuranVerse", node2_label="IslamicConcept",
        relationship_type="SEMANTICALLY_RELATES_TO_CONCEPT", threshold=0.65, id_prop2='name' # BGE models often need higher thresholds
    )
    uploader.link_nodes_by_semantic_similarity(
        node1_label="QuranVerse", node2_label="IslamicFinanceProduct",
        relationship_type="SEMANTICALLY_RELATES_TO_PRODUCT", threshold=0.60, id_prop2='name'
    )
if bukhari_json_data: # Only run if data was loaded
    uploader.link_nodes_by_semantic_similarity(
        node1_label="Hadith", node2_label="IslamicConcept",
        relationship_type="SEMANTICALLY_RELATES_TO_CONCEPT", threshold=0.65, id_prop2='name'
    )
    uploader.link_nodes_by_semantic_similarity(
        node1_label="Hadith", node2_label="IslamicFinanceProduct",
        relationship_type="SEMANTICALLY_RELATES_TO_PRODUCT", threshold=0.60, id_prop2='name'
    )
uploader.link_nodes_by_semantic_similarity(
    node1_label="IslamicConcept", node2_label="IslamicFinanceProduct",
    relationship_type="CONCEPT_APPLIES_TO_PRODUCT", threshold=0.7, id_prop1='name', id_prop2='name'
)
print("Semantic linking process finished (or skipped).")



Starting semantic linking (this can be time-consuming)...
Attempting to link QuranVerse with IslamicConcept using relationship SEMANTICALLY_RELATES_TO_CONCEPT (threshold: 0.65)...


  with self.driver.session(database="neo4j") as session:


  Comparing 6236 QuranVerse(s) with 5 IslamicConcept(s)...
  Processed 65/6236 QuranVerses. 'Quran:112:1' linked to 1 IslamicConcept(s). Total links so far: 1
  Processed 66/6236 QuranVerses. 'Quran:112:2' linked to 1 IslamicConcept(s). Total links so far: 2
  Processed 100/6236 QuranVerses. 'Quran:91:1' linked to 0 IslamicConcept(s). Total links so far: 2
  Processed 200/6236 QuranVerses. 'Quran:10:14' linked to 0 IslamicConcept(s). Total links so far: 2
  Processed 235/6236 QuranVerses. 'Quran:10:49' linked to 1 IslamicConcept(s). Total links so far: 3
  Processed 241/6236 QuranVerses. 'Quran:10:55' linked to 1 IslamicConcept(s). Total links so far: 4
  Processed 245/6236 QuranVerses. 'Quran:10:59' linked to 1 IslamicConcept(s). Total links so far: 5
  Processed 300/6236 QuranVerses. 'Quran:98:5' linked to 0 IslamicConcept(s). Total links so far: 5
  Processed 386/6236 QuranVerses. 'Quran:11:83' linked to 1 IslamicConcept(s). Total links so far: 6
  Processed 389/6236 QuranVerses. 'Q

In [13]:
# --- In your __main__ execution block ---
if __name__ == "__main__":
    NEO4J_URI = "neo4j://localhost:7687"
    NEO4J_USER = "neo4j"
    NEO4J_PASSWORD = "yourStrongPassword123" # <<< CHANGE THIS!

    uploader = None
    try:
        uploader = Neo4jUploader(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
        print("Successfully connected to Neo4j and initialized uploader.")

        # ... (your existing data loading and uploading code) ...
        # ... (ensure quran_json_data, bukhari_json_data etc. are loaded) ...
        # ... (ensure nodes are uploaded via uploader.upload_quran_verses(), etc.) ...

        print("\n--- Starting Search Examples ---")

        # Create Vector Indexes in Neo4j Browser first if you haven't!
        # Example:
        # CREATE VECTOR INDEX quran_verse_embeddings IF NOT EXISTS
        # FOR (n:QuranVerse) ON (n.embedding)
        # OPTIONS {indexConfig: {
        #  `vector.dimensions`: 1024,
        #  `vector.similarity_function`: 'cosine'
        # }};
        # (Do this for Hadith, IslamicConcept, IslamicFinanceProduct as well)

        search_query = "what is riba"

        print(f"\nSearching QuranVerses for: '{search_query}'")
        quran_results = uploader.search_relevant_nodes_by_text(
            search_text=search_query,
            node_label="QuranVerse",
            index_name="quran_verse_embeddings", # Make sure this index exists!
            top_k=3
        )
        for result in quran_results:
            print(f"  ID: {result.get('unique_id')}, Score: {result.get('similarity_score'):.4f}, Text: {result.get('text_english', '')[:]}...")

        print(f"\nSearching Hadiths for: '{search_query}'")
        hadith_results = uploader.search_relevant_nodes_by_text(
            search_text=search_query,
            node_label="Hadith",
            index_name="hadith_embeddings", # Make sure this index exists!
            top_k=3
        )
        for result in hadith_results:
            print(f"  ID: {result.get('unique_id')}, Score: {result.get('similarity_score'):.4f}, Text: {result.get('text_english', '')[:]}...")

        search_query_product = "Financing for agricultural produce like barley"
        print(f"\nSearching IslamicFinanceProducts for: '{search_query_product}'")
        product_results = uploader.search_relevant_nodes_by_text(
            search_text=search_query_product,
            node_label="IslamicFinanceProduct",
            index_name="product_embeddings", # Make sure this index exists!
            top_k=2
        )
        for result in product_results:
            print(f"  Product: {result.get('name')}, FAS: {result.get('fas_reference')}, Score: {result.get('similarity_score'):.4f}")
            print(f"    Use Case: {result.get('use_case')}")

        search_query_concept = "The prohibition of excessive uncertainty in contracts"
        print(f"\nSearching IslamicConcepts for: '{search_query_concept}'")
        concept_results = uploader.search_relevant_nodes_by_text(
            search_text=search_query_concept,
            node_label="IslamicConcept",
            index_name="concept_embeddings", # Make sure this index exists!
            top_k=2
        )
        for result in concept_results:
            print(f"  Concept: {result.get('name')}, Score: {result.get('similarity_score'):.4f}")
            print(f"    Definition: {result.get('definition_english')}")


    except Exception as e:
        print(f"An error occurred in the main execution: {e}")
    finally:
        if uploader:
            uploader.close()

    print("\nFull script execution finished.")

Initializing LlamaIndex HuggingFace embedding model: BAAI/bge-large-en-v1.5...
Embedding model initialized successfully.
Successfully connected to Neo4j and initialized uploader.

--- Starting Search Examples ---

Searching QuranVerses for: 'what is riba'
Generating embedding for search text: 'what is riba...'
Querying Neo4j vector index 'quran_verse_embeddings' for 'QuranVerse' nodes similar to 'what is riba...'
Found 3 relevant QuranVerse nodes.
  ID: Quran:2:257, Score: 0.7713, Text: Allah is the ally of those who believe. He brings them out from darknesses into the light. And those who disbelieve - their allies are Taghut. They take them out of the light into darknesses. Those are the companions of the Fire; they will abide eternally therein...
  ID: Quran:39:23, Score: 0.7702, Text: Allah has sent down the best statement: a consistent Book wherein is reiteration. The skins shiver therefrom of those who fear their Lord; then their skins and their hearts relax at the remembrance of 

# islamic finance book

In [16]:
!pip install llama-index sentence-transformers transformers nltk 
!pip install llama-index llama-index-embeddings-huggingface
# !pip install llama-index-llms-ollama
# !pip install ollama
!pip install huggingface_hub

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [18]:
!pip install llama-index-llms-ollama

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting llama-index-llms-ollama
  Downloading llama_index_llms_ollama-0.5.4-py3-none-any.whl.metadata (3.8 kB)
Collecting ollama>=0.4.3 (from llama-index-llms-ollama)
  Downloading ollama-0.4.8-py3-none-any.whl.metadata (4.7 kB)
Downloading llama_index_llms_ollama-0.5.4-py3-none-any.whl (7.8 kB)
Downloading ollama-0.4.8-py3-none-any.whl (13 kB)
Installing collected packages: ollama, llama-index-llms-ollama
Successfully installed llama-index-llms-ollama-0.5.4 ollama-0.4.8


In [None]:
import os
import json
from llama_index.core import StorageContext, load_index_from_storage, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Path where the index was persisted
PERSIST_DIR = "./data/output"  # Or "/kaggle/working/output"
VECTOR_STORE_PATH = os.path.join(PERSIST_DIR, "default__vector_store.json")

# Load embedding model
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")
Settings.embed_model = embed_model  # Needed for embedding rehydration

# Load vector store embeddings manually
with open(VECTOR_STORE_PATH, "r") as f:
    vector_data = json.load(f)
embedding_dict = vector_data.get("embedding_dict", {})

# Load index from storage
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
index = load_index_from_storage(storage_context)

# Extract nodes
print("Extracting nodes from loaded index...")
all_nodes = list(index.docstore.docs.values())
print(f"Found {len(all_nodes)} nodes in the loaded index")

# Inject missing embeddings
missing_embeddings = 0
for node in all_nodes:
    if not hasattr(node, 'embedding') or node.embedding is None:
        embedding = embedding_dict.get(node.node_id)
        if embedding:
            node.embedding = embedding
        else:
            missing_embeddings += 1

print(f"{missing_embeddings} nodes are still missing embeddings after injection")

# Upload if embeddings are present
if missing_embeddings == 0:
    print("Uploading nodes to Neo4j database...")
    uploader.upload_document_chunks(all_nodes)
    print("Upload complete!")
else:
    print("Some nodes are still missing embeddings. Check the vector_store.json or node IDs.")


Extracting nodes from loaded index...
Found 683 nodes in the loaded index
0 nodes are still missing embeddings after injection
Uploading nodes to Neo4j database...
Starting upload of 683 document chunks...
  Uploading batch of 100 chunks to Neo4j...
    100 chunks processed in this batch.
  Uploading batch of 100 chunks to Neo4j...
    100 chunks processed in this batch.
  Uploading batch of 100 chunks to Neo4j...
    100 chunks processed in this batch.
  Uploading batch of 100 chunks to Neo4j...
    100 chunks processed in this batch.
  Uploading batch of 100 chunks to Neo4j...
    100 chunks processed in this batch.
  Uploading batch of 100 chunks to Neo4j...
    100 chunks processed in this batch.
  Uploading batch of 83 chunks to Neo4j...
    83 chunks processed in this batch.
Document chunk upload complete. Total chunks processed in DB: 683
Upload complete!


In [33]:
pdf_search_query = "What are the accounting treatments for Murabaha inventory?"
        # Ensure the index "document_chunk_embeddings" exists and is up-to-date
chunk_results = uploader.search_all_sources_by_text(pdf_search_query, top_k=2)
if chunk_results:
    for i, result in enumerate(chunk_results):
        print(f"  Chunk Result {i+1} (Score: {result.get('similarity_score'):.4f}):")
        print(f"    Chunk ID: {result.get('chunk_id')}")
        print(f"    Source: {result.get('source_document_name')} (Page: {result.get('page_number')})")
        print(f"    Content: {result.get('text_content', '')[:200]}...")
else:
    print(f"  No results or search failed for query: '{pdf_search_query}'")


Generating embedding for search text: 'What are the accounting treatments for Murabaha in...'
Querying Neo4j vector index 'document_chunk_embeddings' for 'DocumentChunk' nodes similar to 'What are the accounting treatm...'
Found 2 relevant DocumentChunk nodes.
Generating embedding for search text: 'What are the accounting treatments for Murabaha in...'
Querying Neo4j vector index 'hadith_embeddings' for 'Hadith' nodes similar to 'What are the accounting treatm...'
Found 2 relevant Hadith nodes.
Generating embedding for search text: 'What are the accounting treatments for Murabaha in...'
Querying Neo4j vector index 'quran_verse_embeddings' for 'QuranVerse' nodes similar to 'What are the accounting treatm...'
Found 2 relevant QuranVerse nodes.
  Chunk Result 1 (Score: 0.8290):
    Chunk ID: 5dae1c69-06eb-4883-86e2-ed91f3a663fb
    Source: 152672_Al-Masri-Book.pdf (Page: 191)
    Content: Estimation of the quantity should be possible (quantity should not 
be so big or so small). When quan

In [48]:

def ask_compliance_question(question, uploader, llm, top_k=6):
    results = uploader.search_all_sources_by_text(question, top_k=top_k)

    print("\n--- Retrieved Chunks ---")
    context_blocks = []
    refs = {"Hadith": [], "QuranVerse": [], "DocumentChunk": []}

    for i, result in enumerate(results):
        source = result.get("source_type")
        content = result.get("text_content", "")
        # print(f"\nChunk {i+1} ({source}):\n{content[:300]}...\n")

        # Accumulate context
        context_blocks.append(content)

        # Track references
        if source == "Hadith":
            refs["Hadith"].append(result.get("unique_id"))
        elif source == "QuranVerse":
            refs["QuranVerse"].append(result.get("unique_id"))
        elif source == "DocumentChunk":
            refs["DocumentChunk"].append(f"{result.get('source_document_name')} (Page {result.get('page_number')})")

    prompt = "\n".join(context_blocks) + f"\n\nQuestion: {question}\nAnswer with reference to Islamic jurisprudence and mention any related Hadiths or Quranic verses if present."

    response = llm.complete(prompt=prompt)

    # Attach references
    response_text = response.text.strip()
    response_text += "\n\nReferences:\n"
    if refs["QuranVerse"]:
        response_text += "- Quran Verses: " + ", ".join(refs["QuranVerse"]) + "\n"
    if refs["Hadith"]:
        response_text += "- Hadiths: " + ", ".join(refs["Hadith"]) + "\n"
    if refs["DocumentChunk"]:
        response_text += "- FAS Document Chunks: " + ", ".join(refs["DocumentChunk"]) + "\n"

    return response_text



In [41]:
import os

os.environ["GOOGLE_API_KEY"] = "AIzaSyCxOWqwcHGPh8R8QK6407oxWUMHgsoRhK0"

In [56]:
from llama_index.llms.gemini import Gemini

# 1. Initialize the Gemini LLM
llm = Gemini(
    model="models/gemini-1.5-flash",
    temperature=0.1

)
NEO4J_URI = "neo4j://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "yourStrongPassword123" # <<< CHANGE THIS!
uploader = Neo4jUploader(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)
# 3. Ask a compliance question
question = "Does the use of riba interest for asset financing allowed in islam?"
response = ask_compliance_question(question, uploader, llm)

# 4. Output the answer
print("\n🧠 AI Answer:\n", response)


  llm = Gemini(


Initializing LlamaIndex HuggingFace embedding model: BAAI/bge-large-en-v1.5...
Embedding model initialized successfully.

--- Retrieved Chunks ---

🧠 AI Answer:
 No, the use of *riba* (interest) for asset financing is explicitly forbidden in Islam.  Islamic jurisprudence strictly prohibits the charging or receiving of interest on loans or any form of financial transaction.  This prohibition is a fundamental tenet of Islamic finance, rooted in both the Quran and the Sunnah (prophetic traditions).

While the provided text discusses attempts to differentiate between *riba* and interest, arguing for a free market approach to interest rates,  Islamic scholars overwhelmingly reject these arguments. The core principle remains that any increase in the principal amount of a loan solely due to the passage of time is considered *riba* and is haram (forbidden).

**Quranic Verses:**

The Quran explicitly condemns *riba* in several verses, most notably:

* **Surah Al-Baqarah (2:275):**  This verse i

In [38]:
!pip install google-generativeai
!pip install  llama-index-llms-gemini


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting llama-index-llms-gemini
  Downloading llama_index_llms_gemini-0.4.14-py3-none-any.whl.metadata (3.6 kB)
Collecting pillow<11.0.0,>=10.2.0 (from llama-index-llms-gemini)
  Downloading pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (9.2 kB)
Downloading llama_index_llms_gemini-0.4.14-py3-none-any.whl (9.6 kB)
Downloading pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl (4.5 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m403.0 kB/s[0m eta [36m0:00:00[0m[36m0:00:01[0mm eta [36m0:00:01[0m
[?25hInstalling collected packages: pillow, llama-index-llms-gemini
  Attempting uninstall: pillow
    Found existing installation: pillow 11.2.1
    Uninstalling pillow-11.2.1:
      Successfully uninstalled pillow-11.2.1
Successfully installed llama-index-llms-gemini-0.4.14 pillow-10.4.0


In [55]:
result= uploader.search_relevant_nodes_by_text(
            search_text="interest riba",
            node_label="QuranVerse",
            index_name="quran_verse_embeddings",
           
            top_k=5
        )

for record in result:
    print(record["text_english"]) # Convert Neo4j Node object to dictionaryprb

Those who consume interest cannot stand [on the Day of Resurrection] except as one stands who is being beaten by Satan into insanity. That is because they say, "Trade is [just] like interest." But Allah has permitted trade and has forbidden interest. So whoever has received an admonition from his Lord and desists may have what is past, and his affair rests with Allah. But whoever returns to [dealing in interest or usury] - those are the companions of the Fire; they will abide eternally therein
Allah destroys interest and gives increase for charities. And Allah does not like every sinning disbeliever
And whatever you give for interest to increase within the wealth of people will not increase with Allah. But what you give in zakah, desiring the countenance of Allah - those are the multipliers
Let a man of wealth spend from his wealth, and he whose provision is restricted - let him spend from what Allah has given him. Allah does not charge a soul except [according to] what He has given it