In [None]:
import pandas as pd
import os
import csv
import time
from collections import defaultdict
from tqdm import tqdm
import nest_asyncio
nest_asyncio.apply()

from llama_index.core import Document, PropertyGraphIndex, Settings
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import StorageContext, load_index_from_storage


### Ollama and Neo4j

In [None]:

def load_csv(filepath):
    """Load CSV data"""
    with open(filepath, 'r', encoding='utf-8') as f:
        return list(csv.DictReader(f))

def safe_float(value, default=0.0):
    """Convert to float safely"""
    try:
        return float(str(value).strip())
    except:
        return default

def create_trade_documents(export_import_data, export_data, import_data):
    """Create LlamaIndex documents from CSV data"""
    documents = []
    doc_id = 0
    
    # Process export-import summary
    partner_summary = {}
    for record in export_import_data:
        partner = record.get('Partner Name', '').strip()
        if not partner or partner == 'India':
            continue
        
        year = record.get('Year', '2022')
        trade_balance = safe_float(record.get('Trade Balance (US$ Thousand)', 0))
        export_value = safe_float(record.get('Export (US$ Thousand)', 0))
        import_value = safe_float(record.get('Import (US$ Thousand)', 0))
        export_share = safe_float(record.get('Export Share in Total Products (%)', 0))
        import_share = safe_float(record.get('Import Share in Total Products (%)', 0))
        num_export_hs6 = safe_float(record.get('No Of exported HS6 digit Products', 0))
        num_import_hs6 = safe_float(record.get('No Of imported HS6 digit Products', 0))
        
        trade_status = "surplus" if trade_balance > 0 else "deficit" if trade_balance < 0 else "balanced"
        
        text = f"""India has a trade relationship with {partner} in {year}. 
        India exports ${export_value} thousand to {partner} ({export_share}% share, {num_export_hs6} products).
        India imports ${import_value} thousand from {partner} ({import_share}% share, {num_import_hs6} products).
        Trade balance is ${trade_balance} thousand ({trade_status})."""
        
        documents.append(Document(
            text=text,
            metadata={
                'type': 'trade_summary',
                'reporter': 'India',
                'partner': partner,
                'year': year,
                'trade_balance': trade_balance,
                'export_value': export_value,
                'import_value': import_value
            },
            doc_id=f"summary_{doc_id}"
        ))
        doc_id += 1
        partner_summary[partner] = True
    
    # Process export products
    export_by_partner = defaultdict(list)
    for record in export_data:
        partner = record.get('Partner Name', '').strip()
        if not partner or partner == 'India':
            continue
        
        product = record.get('Product Group', '').strip()
        value = safe_float(record.get('Export (US$ Thousand)', 0))
        share = safe_float(record.get('Export Product Share (%)', 0))
        year = record.get('Year', '2022')
        
        export_by_partner[partner].append({
            'product': product,
            'value': value,
            'share': share,
            'year': year
        })
    
    for partner, products in export_by_partner.items():
        product_list = ", ".join([f"{p['product']} (${p['value']} thousand, {p['share']}% share)" for p in products[:5]])
        text = f"India exports to {partner}: {product_list}."
        
        documents.append(Document(
            text=text,
            metadata={
                'type': 'export_detail',
                'reporter': 'India',
                'partner': partner,
                'products': [p['product'] for p in products],
                'total_value': sum(p['value'] for p in products)
            },
            doc_id=f"export_{doc_id}"
        ))
        doc_id += 1
    
    # Process import products
    import_by_partner = defaultdict(list)
    for record in import_data:
        partner = record.get('Partner Name', '').strip()
        if not partner or partner == 'India':
            continue
        
        product = record.get('Product Group', '').strip()
        value = safe_float(record.get('Import (US$ Thousand)', 0))
        share = safe_float(record.get('Import Product Share (%)', 0))
        year = record.get('Year', '2022')
        
        import_by_partner[partner].append({
            'product': product,
            'value': value,
            'share': share,
            'year': year
        })
    
    for partner, products in import_by_partner.items():
        product_list = ", ".join([f"{p['product']} (${p['value']} thousand, {p['share']}% share)" for p in products[:5]])
        text = f"India imports from {partner}: {product_list}."
        
        documents.append(Document(
            text=text,
            metadata={
                'type': 'import_detail',
                'reporter': 'India',
                'partner': partner,
                'products': [p['product'] for p in products],
                'total_value': sum(p['value'] for p in products)
            },
            doc_id=f"import_{doc_id}"
        ))
        doc_id += 1
    
    print(f"‚úì Created {len(documents)} documents")
    return documents

def setup_llamaindex_with_neo4j(neo4j_url, neo4j_username, neo4j_password, 
                                  ollama_base_url="http://localhost:11434",
                                  ollama_model="llama3.2-vision"):
    """Setup LlamaIndex with Neo4j, local Ollama LLM, and HuggingFace embeddings"""
 
    
    # Configure local Ollama LLM
    llm = Ollama(model=ollama_model, base_url=ollama_base_url, request_timeout=120.0)
    print(f"‚úì Using local Ollama LLM: {ollama_model}")
    
    # Use HuggingFace embeddings (free, local, no rate limits)
    embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
    print("‚úì Using HuggingFace embeddings (local, no rate limits)")
    
    # Set global settings
    Settings.llm = llm
    Settings.embed_model = embed_model
    Settings.chunk_size = 512
    
    # Setup Neo4j graph store
    graph_store = Neo4jPropertyGraphStore(
        username=neo4j_username,
        password=neo4j_password,
        url=neo4j_url,
        database="neo4j"
    )
    
    print("‚úì LlamaIndex configured with Ollama and Neo4j")
    return llm, embed_model, graph_store


def load_index(persist_dir="./storage", graph_store=None, llm=None, embed_model=None,documents=None):
    """Load index from disk"""
  
    
    if os.path.exists(persist_dir):
        print(f"index found at {persist_dir}")
        # return None
    
        try:
            # Load storage context
            storage_context = StorageContext.from_defaults(
                persist_dir=persist_dir,
                property_graph_store=graph_store
            )
            
            # Load index
            index = load_index_from_storage(
                storage_context,
                property_graph_store=graph_store,
                llm=llm,
                embed_model=embed_model
            )
            
            print(f"‚úì Index loaded from {persist_dir}")
            return index
            
        except Exception as e:
            print(f"‚ùå Error loading index: {e}")
            return None


    """Create PropertyGraphIndex from documents"""
   
    
    print("\n[Building Property Graph Index]")
    print(f"Processing {len(documents)} documents...")
    print("Using local LLM and embeddings - no API rate limits!")
    print("This will take 10-15 minutes...")
    
    # Process ONE document at a time
    index = None
    successful_count = 0
    
    for i, doc in enumerate(documents):
        doc_num = i + 1
        print(f"\nüìÑ Processing document {doc_num}/{len(documents)}...", end=" ")
        
        retry_count = 0
        max_retries = 2
        
        while retry_count < max_retries:
            try:
                if index is None:
                    # First document - create new index
                    index = PropertyGraphIndex.from_documents(
                        [doc],
                        property_graph_store=graph_store,
                        llm=llm,
                        embed_model=embed_model,
                        show_progress=False
                    )
                else:
                    # Subsequent documents - add to existing index
                    index.insert(doc)
                    
                successful_count += 1
                print(f"‚úì (Total: {successful_count})")
                
                # Small delay to not overwhelm Ollama
                if doc_num < len(documents):
                    time.sleep(1)
                
                break  # Success, exit retry loop
                
            except Exception as e:
                retry_count += 1
                error_msg = str(e)
                
                if "memory" in error_msg.lower() or "oom" in error_msg.lower():
                    print(f"\n   ‚ùå Memory error: Your Ollama model is too large!")
                    print(f"   Switch to a smaller model like 'llama3.2' or 'phi3'")
                    raise Exception("Out of memory - use smaller model")
                elif retry_count < max_retries:
                    print(f"\n   ‚ö†Ô∏è Error: {error_msg[:100]}... Retrying in 5 seconds...")
                    time.sleep(5)
                else:
                    print(f"\n   ‚ùå Failed after {max_retries} attempts: {error_msg[:100]}")
                    break
        
        # Show progress every 10 documents
        if doc_num % 10 == 0:
            print(f"\n{'='*50}")
            print(f"Progress: {successful_count}/{doc_num} documents processed successfully")
            print(f"{'='*50}")
    
    if index is None:
        raise Exception("Failed to create index - all documents failed")
    
    print(f"\n‚úì Property Graph Index created successfully")
    print(f"‚úì Successfully processed {successful_count}/{len(documents)} documents")
    return index



def query_graph_index(index, question):
    """Query the property graph index"""
    
    # Create query engine with hybrid retrieval (vector + graph)
    query_engine = index.as_query_engine(
        include_text=True,
        response_mode="tree_summarize",
        similarity_top_k=5
    )
    
    response = query_engine.query(question)
    return response

def run_sample_queries(index,questions):
    """Run sample queries"""

    
    print("\n" + "="*70)
    print("SAMPLE QUERIES - LLAMAINDEX PROPERTY GRAPH")
    print("="*70)
    
    for q in questions:
        print(f"\nüìä Q: {q}")
        response = query_graph_index(index, q)
        print(f"üí° A: {response}")
        print("-"*70)

def get_graph_statistics(graph_store):
    """Get statistics from Neo4j graph"""
    stats = {}
    
    try:
        # Count entities
        result = graph_store._driver.execute_query(
            "MATCH (n) RETURN count(n) as count"
        )
        stats['total_nodes'] = result.records[0]['count'] if result.records else 0
        
        # Count relationships
        result = graph_store._driver.execute_query(
            "MATCH ()-[r]->() RETURN count(r) as count"
        )
        stats['total_relationships'] = result.records[0]['count'] if result.records else 0
        
    except Exception as e:
        print(f"Could not fetch stats: {e}")
        stats['total_nodes'] = "Unknown"
        stats['total_relationships'] = "Unknown"
    
    return stats

def print_statistics(stats):
    """Print graph statistics"""
    print("\n" + "="*70)
    print("PROPERTY GRAPH STATISTICS")
    print("="*70)
    print(f"Total Nodes: {stats['total_nodes']}")
    print(f"Total Relationships: {stats['total_relationships']}")
    print("="*70)

def clear_neo4j_graph(graph_store):
    """Clear Neo4j database"""
    try:
        graph_store._driver.execute_query("MATCH (n) DETACH DELETE n")
        print("‚úì Neo4j database cleared")
    except Exception as e:
        print(f"Could not clear database: {e}")



In [None]:
# Main execution
if __name__ == "__main__":     


    OLLAMA_BASE_URL = "http://127.0.0.1:11434"  
    OLLAMA_MODEL = "llama3.2-vision"  
  
    
    # Neo4j connection parameters
    NEO4J_URL = "YOUR_NEO4J_URL" 
    NEO4J_USERNAME = "YOUR_NEO4J_USERNAME"
    NEO4J_PASSWORD = "YOUR_NEO4J_PASSWORD"

    EXPORT_IMPORT_FILE = r"Trade data\ex_im_data.csv"
    EXPORT_FILE = r"Trade data\export_data.csv"
    IMPORT_FILE = r"Trade data\import_data.csv"
    
    # Step 1: Load data
    print("\n[1] Loading CSV data...")
    export_import_data = load_csv(EXPORT_IMPORT_FILE)
    export_data = load_csv(EXPORT_FILE)
    import_data = load_csv(IMPORT_FILE)
    print(f"‚úì Loaded {len(export_import_data)} summary records")
    print(f"‚úì Loaded {len(export_data)} export records")
    print(f"‚úì Loaded {len(import_data)} import records")
    
    print("\n[2] Creating documents...")
    documents = create_trade_documents(export_import_data, export_data, import_data)
        
    # Step 3: Setup LlamaIndex with Neo4j and Ollama
    print("\n[3] Setting up LlamaIndex with Neo4j and local Ollama...")
    llm, embed_model, graph_store = setup_llamaindex_with_neo4j(
        NEO4J_URL, NEO4J_USERNAME, NEO4J_PASSWORD,
        OLLAMA_BASE_URL, OLLAMA_MODEL
    )

    if graph_store is None:
        print("‚ùå Failed to setup LlamaIndex")
        print("\nTo setup:")
        print("1. Install and start Neo4j")
        print("2. Install Ollama: https://ollama.ai")
        print("3. Pull model: ollama pull llama3.2-vision")
        print("4. Update NEO4J_PASSWORD in code")
        exit(1)

    # Step 4: Clear previous data
    print("\n[4] Clearing previous Neo4j data...")
    clear_neo4j_graph(graph_store)

    # Step 5: Build Property Graph Index
    print("\n[5] Building Property Graph Index...")
    index = load_index(documents =documents,graph_store=graph_store,llm=llm,embed_model=embed_model)
    # Step 4: Get statistics
    print("\n[4] Getting graph statistics...")
    stats = get_graph_statistics(graph_store)
    print_statistics(stats)

    

In [None]:
questions = [
    "What does India export to United Arab Emirates?. Enlist down the product too!",
]
for q in questions:
    nest_asyncio.apply()
    run_sample_queries(index,[q])
    print("Done!!!!\n")
    print("\n" + "="*70)