In [2]:
import pandas as pd
import os
from neo4j import GraphDatabase
from langchain_ollama import ChatOllama, OllamaEmbeddings
from dotenv import load_dotenv

load_dotenv()

# –ù–∞—Å—Ç—Ä–æ–π–∫–∏
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
NEO4J_USER = os.getenv("NEO4J_USERNAME", "neo4j")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "password")

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
llm = ChatOllama(model="qwen3:8b", temperature=0)
embeddings = OllamaEmbeddings(
    model="qwen3-embedding:0.6b"
)

def load_initial_graph(csv_path):
    df = pd.read_csv(csv_path, sep=';')
    print(f"–ó–∞–≥—Ä—É–∑–∫–∞ {len(df)} –∫–æ–º–ø–∞–Ω–∏–π...")
    
    with driver.session() as session:
        # –°–æ–∑–¥–∞–µ–º –∏–Ω–¥–µ–∫—Å—ã
        session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (c:Company) REQUIRE c.ticker IS UNIQUE")
        session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (i:Industry) REQUIRE i.name IS UNIQUE")
        session.run("CREATE CONSTRAINT IF NOT EXISTS FOR (s:Sector) REQUIRE s.name IS UNIQUE")
        
        # –ò–º–ø–æ—Ä—Ç
        for _, row in df.iterrows():
            cypher = """
            MERGE (c:Company {ticker: $ticker})
            SET c.name = $name, c.description = $desc
            
            MERGE (i:Industry {name: $industry})
            MERGE (s:Sector {name: $sector})
            
            MERGE (c)-[:BELONGS_TO]->(i)
            MERGE (i)-[:PART_OF]->(s)
            """
            session.run(cypher, 
                        ticker=row['Ticker'], 
                        name=row['Name'], 
                        desc=row['Description'],
                        industry=row['Industry'],
                        sector=row['Sector'])
    print("‚úÖ –ì—Ä–∞—Ñ –ø–æ—Å—Ç—Ä–æ–µ–Ω.")

# 1. –ó–∞–ø—É—Å–∫–∞–µ–º –∑–∞–≥—Ä—É–∑–∫—É (–µ—Å–ª–∏ –µ—â–µ –Ω–µ –∑–∞–≥—Ä—É–∂–µ–Ω–æ)
# load_initial_graph('./data/sp500_graph_ready.csv')

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
def generate_industry_summaries():
    print("üöÄ –ù–∞—á–∞–ª–æ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ–ø–∏—Å–∞–Ω–∏–π –¥–ª—è –ò–Ω–¥—É—Å—Ç—Ä–∏–π...")
    
    with driver.session() as session:
        # 1. –ü–æ–ª—É—á–∞–µ–º —Å–ø–∏—Å–æ–∫ –≤—Å–µ—Ö –∏–Ω–¥—É—Å—Ç—Ä–∏–π
        industries = session.run("MATCH (i:Industry) RETURN i.name as name").value()
        
        for ind_name in industries:
            # 2. –ë–µ—Ä–µ–º –æ–ø–∏—Å–∞–Ω–∏—è –≤—Å–µ—Ö –∫–æ–º–ø–∞–Ω–∏–π –≤ —ç—Ç–æ–π –∏–Ω–¥—É—Å—Ç—Ä–∏–∏
            result = session.run("""
                MATCH (c:Company)-[:BELONGS_TO]->(i:Industry {name: $name})
                RETURN c.name, c.description
            """, name=ind_name)
            
            companies_text = "\n".join([f"- {r['c.name']}: {r['c.description']}" for r in result])
            
            # 3. –ü—Ä–æ–º–ø—Ç –¥–ª—è —Å–∞–º–º–∞—Ä–∏–∑–∞—Ü–∏–∏
            prompt = f"""
            Analyze the following companies in the '{ind_name}' industry:
            {companies_text[:10000]}  # –û–±—Ä–µ–∑–∞–µ–º, –µ—Å–ª–∏ —Å–ª–∏—à–∫–æ–º –¥–ª–∏–Ω–Ω–æ

            Create a comprehensive summary (2-3 paragraphs) of this Industry. 
            Describe what these companies typically do, the technologies they use, 
            and the markets they serve. Do not list companies, synthesize the trends.
            """
            
            # 4. –ì–µ–Ω–µ—Ä–∞—Ü–∏—è
            print(f"  generating summary for Industry: {ind_name}...")
            summary = llm.invoke(prompt).content
            
            # 5. –ó–∞–ø–∏—Å—å –æ–±—Ä–∞—Ç–Ω–æ –≤ –≥—Ä–∞—Ñ
            session.run("""
                MATCH (i:Industry {name: $name})
                SET i.description = $desc
            """, name=ind_name, desc=summary)

    print("‚úÖ –ò–Ω–¥—É—Å—Ç—Ä–∏–∏ –æ–ø–∏—Å–∞–Ω—ã.")
def generate_sector_summaries():
    print("üöÄ –ù–∞—á–∞–ª–æ –≥–µ–Ω–µ—Ä–∞—Ü–∏–∏ –æ–ø–∏—Å–∞–Ω–∏–π –¥–ª—è –°–µ–∫—Ç–æ—Ä–æ–≤...")
    
    with driver.session() as session:
        sectors = session.run("MATCH (s:Sector) RETURN s.name as name").value()
        
        for sec_name in sectors:
            # –ë–µ—Ä–µ–º –æ–ø–∏—Å–∞–Ω–∏—è –ò–ù–î–£–°–¢–†–ò–ô, –≤—Ö–æ–¥—è—â–∏—Ö –≤ —Å–µ–∫—Ç–æ—Ä
            result = session.run("""
                MATCH (i:Industry)-[:PART_OF]->(s:Sector {name: $name})
                RETURN i.name, i.description
            """, name=sec_name)
            
            industries_text = "\n".join([f"- Industry {r['i.name']}: {r['i.description']}" for r in result])
            
            prompt = f"""
            Analyze the following industries within the '{sec_name}' sector:
            {industries_text}

            Create a high-level strategic summary of this Sector. 
            Explain the economic role of this sector, key drivers, and sub-verticals involved.
            """
            
            print(f"  generating summary for Sector: {sec_name}...")
            summary = llm.invoke(prompt).content
            
            session.run("""
                MATCH (s:Sector {name: $name})
                SET s.description = $desc
            """, name=sec_name, desc=summary)
            
    print("‚úÖ –°–µ–∫—Ç–æ—Ä—ã –æ–ø–∏—Å–∞–Ω—ã.")

In [None]:
import os
from langchain_community.vectorstores import Neo4jVector
from dotenv import load_dotenv

load_dotenv()

# 1. –ù–∞—Å—Ç—Ä–æ–π–∫–∞ –º–æ–¥–µ–ª–∏ —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤ (–¥–æ–ª–∂–Ω–∞ —Å–æ–≤–ø–∞–¥–∞—Ç—å —Å —Ç–æ–π, —á—Ç–æ –±—É–¥–µ—Ç –ø—Ä–∏ –ø–æ–∏—Å–∫–µ)
print("‚è≥ –ù–∞—á–∏–Ω–∞—é —Å–æ–∑–¥–∞–Ω–∏–µ –≤–µ–∫—Ç–æ—Ä–Ω–æ–≥–æ –∏–Ω–¥–µ–∫—Å–∞ –∏ –≤—ã—á–∏—Å–ª–µ–Ω–∏–µ —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤...")
print("–≠—Ç–æ –º–æ–∂–µ—Ç –∑–∞–Ω—è—Ç—å –≤—Ä–µ–º—è, –µ—Å–ª–∏ –∫–æ–º–ø–∞–Ω–∏–π –º–Ω–æ–≥–æ...")

# 2. –≠—Ç–∞ –∫–æ–º–∞–Ω–¥–∞ –¥–µ–ª–∞–µ—Ç –¥–≤–µ –≤–µ—â–∏:
#    –∞) –°–æ–∑–¥–∞–µ—Ç –∏–Ω–¥–µ–∫—Å —Å –∏–º–µ–Ω–µ–º 'global_knowledge_index'
#    –±) –ü—Ä–æ—Ö–æ–¥–∏—Ç –ø–æ –≤—Å–µ–º —É–∑–ª–∞–º Company, –±–µ—Ä–µ—Ç –∏—Ö description, 
#       –ø—Ä–µ–≤—Ä–∞—â–∞–µ—Ç –≤ –≤–µ–∫—Ç–æ—Ä—ã –∏ —Å–æ—Ö—Ä–∞–Ω—è–µ—Ç –≤ —Å–≤–æ–π—Å—Ç–≤–æ 'embedding'
try:
    vector_store = Neo4jVector.from_existing_graph(
        embedding=embeddings,
        url=NEO4J_URI,
        username=NEO4J_USER,
        password=NEO4J_PASSWORD,
        index_name="global_knowledge_index",  # <--- –ò–º—è, –Ω–∞ –∫–æ—Ç–æ—Ä–æ–µ —Ä—É–≥–∞–ª–∞—Å—å –æ—à–∏–±–∫–∞
        node_label="Company",
        text_node_properties=["description"], # –ß—Ç–æ –≤–µ–∫—Ç–æ—Ä–∏–∑—É–µ–º
        embedding_node_property="embedding",  # –ö—É–¥–∞ —Å–æ—Ö—Ä–∞–Ω—è–µ–º –≤–µ–∫—Ç–æ—Ä
    )
    print("‚úÖ –ò–Ω–¥–µ–∫—Å 'global_knowledge_index' —É—Å–ø–µ—à–Ω–æ —Å–æ–∑–¥–∞–Ω!")
    
except Exception as e:
    print(f"‚ùå –û—à–∏–±–∫–∞: {e}")

‚è≥ –ù–∞—á–∏–Ω–∞—é —Å–æ–∑–¥–∞–Ω–∏–µ –≤–µ–∫—Ç–æ—Ä–Ω–æ–≥–æ –∏–Ω–¥–µ–∫—Å–∞ –∏ –≤—ã—á–∏—Å–ª–µ–Ω–∏–µ —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤...
–≠—Ç–æ –º–æ–∂–µ—Ç –∑–∞–Ω—è—Ç—å –≤—Ä–µ–º—è, –µ—Å–ª–∏ –∫–æ–º–ø–∞–Ω–∏–π –º–Ω–æ–≥–æ...
‚úÖ –ò–Ω–¥–µ–∫—Å 'global_knowledge_index' —É—Å–ø–µ—à–Ω–æ —Å–æ–∑–¥–∞–Ω!


In [None]:
generate_industry_summaries()
generate_sector_summaries()    

In [3]:
from langchain_community.vectorstores import Neo4jVector

def graph_rag_search_unified(question):
    print(f"üîé –í–æ–ø—Ä–æ—Å: {question}")
    
    # 1. –ü—Ä–µ–≤—Ä–∞—â–∞–µ–º –≤–æ–ø—Ä–æ—Å –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è –≤ –≤–µ–∫—Ç–æ—Ä
    query_vec = embeddings.embed_query(question)
    
    with driver.session() as session:
        # 2. –ò—â–µ–º –≤ –∏–Ω–¥–µ–∫—Å–µ 5 —Å–∞–º—ã—Ö –ø–æ—Ö–æ–∂–∏—Ö —É–∑–ª–æ–≤ (–Ω–µ–≤–∞–∂–Ω–æ, –ö–æ–º–ø–∞–Ω–∏—è —ç—Ç–æ, –°–µ–∫—Ç–æ—Ä –∏–ª–∏ –ò–Ω–¥—É—Å—Ç—Ä–∏—è)
        cypher_query = """
        CALL db.index.vector.queryNodes('global_knowledge_index', 5, $embedding)
        YIELD node, score
        
        // –î–∏–Ω–∞–º–∏—á–µ—Å–∫–∏ –æ–ø—Ä–µ–¥–µ–ª—è–µ–º —Ç–∏–ø —É–∑–ª–∞ –¥–ª—è –∫—Ä–∞—Å–∏–≤–æ–≥–æ –≤—ã–≤–æ–¥–∞
        RETURN 
            head(labels(node)) as type,
            node.name as name,
            node.description as description,
            score
        """
        
        results = session.run(cypher_query, embedding=query_vec)
        
        context_parts = []
        for r in results:
            # –§–æ—Ä–º–∏—Ä—É–µ–º –∫–æ–Ω—Ç–µ–∫—Å—Ç –¥–ª—è LLM
            context_parts.append(f"Type: {r['type']}\nName: {r['name']}\nInfo: {r['description']}\n")
            
        context_str = "\n---\n".join(context_parts)
        
        # 3. –û—Ç–ø—Ä–∞–≤–ª—è–µ–º –≤ LLM
        prompt = f"""
        Based on the provided context, answer the user's question.
        If the context contains Industries or Sectors, use that high-level info to provide a broader answer.
        
        Context:
        {context_str}
        
        Question: {question}
        """
        
        response = llm.invoke(prompt)
        return response.content

print(graph_rag_search_unified("The Health Care sector Equipment companies"))

üîé –í–æ–ø—Ä–æ—Å: The Health Care sector Equipment companies
In the **Health Care sector**, **Equipment companies** primarily focus on manufacturing, distributing, or providing medical devices, diagnostic tools, and related technologies. Based on the context, the following companies are key players in this space:

---

### **1. GE HealthCare Technologies Inc.**  
- **Segments**: Imaging, Advanced Visualization Solutions (AVS), Patient Care Solutions (PCS), Pharmaceutical Diagnostics (PDx).  
- **Key Equipment/Products**:  
  - Imaging systems (CT, MRI, X-ray, ultrasound).  
  - Diagnostic agents (radiopharmaceuticals, contrast media).  
  - Patient monitoring devices, surgical tools, and digital solutions for treatment and monitoring.  
- **Focus**: Diagnostic and therapeutic equipment for hospitals, clinics, and research facilities.  

---

### **2. Cardinal Health, Inc.**  
- **Segments**: Pharmaceutical and Specialty Solutions, Global Medical Products and Distribution.  
- **Key Eq

–±–µ–Ω—á–º–∞—Ä–∫:
    –ù–∞–π—Ç–∏ –Ω–∞–±–æ—Ä –æ–ø–æ—Ä–Ω—ã—Ö –≤–æ–ø—Ä–æ—Å–æ–≤ –∏ –Ω–∞ –æ—Å–Ω–æ–≤–µ –Ω–∏—Ö —Å–¥–µ–ª–∞—Ç—å —Å–≤–æ–π –±–µ–Ω—á–º–∞—Ä–∫ –∏–ª–∏ —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞—Ç—å
    –í—ã–¥–µ–ª–∏—Ç—å –Ω–µ—Å–∫–æ–ª—å–∫–æ —Ç–∏–ø–æ–≤ –≤–æ–ø—Ä–æ—Å–æ–≤
    –ï–©–Å –≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞—Ç—å –ø—Ä–∞–≤–∏–ª—å–Ω—ã–µ –æ—Ç–≤–µ—Ç—ã 
