# Network Visualization using Neo4j 

In [7]:
import os
import sys
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import AzureChatOpenAI
import re 
from langchain_community.graphs import Neo4jGraph
import dotenv
from langchain_community.graphs import Neo4jGraph
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pathlib import Path
import numpy as np 
import pandas as pd
# from project import web_crawl
import subprocess
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# connection to API's 

In [8]:

#LLM API's 
dotenv.load_dotenv()
llm=AzureChatOpenAI(
    azure_deployment=os.getenv('AZURE_OPENAI_DEPLOYMENT_MODEL'),
    api_version=os.getenv('AZURE_OpenAI_API_VERSION'),
    temperature=0,
    
)

graph=Neo4jGraph(url=os.getenv('NEO4J_URI_ONLINE'),
                 username=os.getenv('NEO4J_USERNAME_ONLINE'),
                 password=os.getenv('NEO4J_PASSWORD_ONLINE')
                #  database=os.getenv('NEO4J_DATABASE')
)


In [9]:
# empty graph database
# DElete everything from the database 
# delete_cypher=""" 
# Match(n)
# DETACH DELETE n
# """
# graph.query(delete_cypher)

# extract information from weburl


In [10]:
from langchain_core.documents import Document
from helpers.clean_data import clean_text
url_list=[  
    'https://en.wikipedia.org/wiki/Elon_Musk',
    'https://en.wikipedia.org/wiki/Mark_Zuckerberg',
    'https://en.wikipedia.org/wiki/Bill_Gates',
    'https://en.wikipedia.org/wiki/Jeff_Bezos',
    'https://en.wikipedia.org/wiki/Steve_Jobs',
    'https://en.wikipedia.org/wiki/Sam_Altman',
    'https://en.wikipedia.org/wiki/Larry_Ellison',
    'https://en.wikipedia.org/wiki/Larry_Page',
    'https://en.wikipedia.org/wiki/Sundar_Pichai',
    'https://en.wikipedia.org/wiki/Satya_Nadella' 
    
]
   

# Define a function to clean the extracted data from web URL's


In [11]:
# define a function to clean the extracted web URL data
import re #for regular expression 

def clean_text(text):
    # Remove HTML tags
    text = re.sub(r'<[^>]*?>', '', text)
    # Remove URLs
    text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
    # Remove special characters
    text = re.sub(r'[^a-zA-Z0-9 ]', '', text)
    # Replace multiple spaces with a single space
    text = re.sub(r'\s{2,}', ' ', text)
    # Trim leading and trailing whitespace
    text = text.strip()
    # Remove extra whitespace
    text = ' '.join(text.split())
    return text

# define a function to extract data frorm Web URL's using Lanchain Framework

In [12]:
# extract the data from the URLs
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_community.document_loaders import WebBaseLoader

def extract_data_from_URL(url):
    loader=WebBaseLoader([url])
    data=loader.load().pop().page_content
    data=clean_text(data)
    documents=[Document(page_content=data)]
    # print(documents)
    splitter=RecursiveCharacterTextSplitter(chunk_size=3000,chunk_overlap=100)
    smaller_doc=splitter.split_documents(documents)
    print(len(smaller_doc))
    return smaller_doc

# from the predefined prompts

In [None]:
from helpers.prompts import graphPrompt
from datetime import datetime
results=[]
start_time=datetime.now()
for url in url_list:
    
    try:
        smaller_doc=extract_data_from_URL(url)
        for doc in smaller_doc[:50]: 
            results.append(graphPrompt(doc.page_content))
    except Exception as e:
        print('Exception',e)
        errordata=e.args[0]            
end_time=datetime.now()
len(results)
print(f'extracted information in {end_time-start_time}')
for doc in smaller_doc[:30]:
    results.append(graphPrompt(doc.page_content))
len(results)

80
{
    "entities": [
        {
            "label": "Elon Musk",
            "id": "Elon Musk",
            "role": "Businessman",
            "description": "South African-born entrepreneur and business magnate"
        },
        {
            "label": "South Africa",
            "id": "South Africa",
            "role": "Birthplace",
            "description": "Country of Elon Musk's birth"
        },
        {
            "label": "Canada",
            "id": "Canada",
            "role": "Citizenship",
            "description": "Country of Elon Musk's citizenship"
        },
        {
            "label": "United States",
            "id": "United States",
            "role": "Citizenship",
            "description": "Country of Elon Musk's citizenship"
        }
    ]
}
{
    "entities": [
        {
            "label": "Elon Musk",
            "id": "Elon Musk",
            "role": "Founder, CEO, and Chief Engineer",
            "description": "Business magnate known for Space

[{'entities': [{'label': 'Elon Musk',
    'id': 'Elon Musk',
    'role': 'Businessman',
    'description': 'South African-born entrepreneur and business magnate'},
   {'label': 'South Africa',
    'id': 'South Africa',
    'role': 'Birthplace',
    'description': "Country of Elon Musk's birth"},
   {'label': 'Canada',
    'id': 'Canada',
    'role': 'Citizenship',
    'description': "Country of Elon Musk's citizenship"},
   {'label': 'United States',
    'id': 'United States',
    'role': 'Citizenship',
    'description': "Country of Elon Musk's citizenship"}]},
 {'entities': [{'label': 'Elon Musk',
    'id': 'Elon Musk',
    'role': 'Founder, CEO, and Chief Engineer',
    'description': 'Business magnate known for SpaceX and Tesla Inc.'},
   {'label': 'SpaceX',
    'id': 'SpaceX',
    'role': 'Spaceflight services company',
    'description': 'SpaceX was founded by Elon Musk in 2002'},
   {'label': 'Tesla Inc.',
    'id': 'Tesla Inc.',
    'role': 'Automotive company',
    'descriptio

In [22]:
# for res in results:
#     print(res)


In [16]:
#empty neo
# delete_cypher="MATCH (n) DETACH DELETE n"
# graph.query(delete_cypher)

[]

# inject data into neo4j

In [17]:
from neo4j import GraphDatabase


In [18]:
driver=GraphDatabase.driver(uri=os.getenv('NEO4J_URI_ONLINE'),auth=(os.getenv('NEO4J_USERNAME_ONLINE'),os.getenv('NEO4J_PASSWORD_ONLINE')))
existing_set=[]
def sanitize_label(label):
    sanitized=re.sub(r"[^\w]", "_", label)
    return re.sub(r"_+", "_", sanitized).strip("_")

def insert_entities(session,entities):
    for entity in entities:
        
        sanitize_labels=sanitize_label(entity['id'])
        query=f"""
            MERGE (n:{sanitize_labels} {{id:$id}})
            ON CREATE SET
                n.role=$role,
                n.description=$description
            ON MATCH SET
                n.role=$role,
                n.description=$description

            """
        session.run(query,label=entity['label'], id=entity['id'],role=entity['role'],description=entity['label'])
        print(f'Inserted entity: {entity["id"]}')
        existing_set.append(entity['label'])
    
def insert_relationship(session,relationships):
    for relation in relationships:
        query = f"""
        MATCH (a {{id: $source}}), (b {{id: $target}})
        MERGE (a)-[:{sanitize_label(relation['type'])} {{
            id: $id,
            description: $description
        }}]->(b)
        """
        session.run(query,source=relation['source'],target=relation['target'],id=relation['id'],description=relation['description'])
        print(f'Inserted relationship: {relation["id"]}')


In [19]:
with driver.session() as session:
    for result in results:
        try:
            session.execute_write(insert_entities,result['entities'])
            session.execute_write(insert_relationship,result['relationships'])
        except Exception as e:
            print(f'Error {e}')
driver.close()

Inserted entity: Elon Musk
Inserted entity: South Africa
Inserted entity: Canada
Inserted entity: United States
Error 'relationships'
Inserted entity: Elon Musk
Inserted entity: SpaceX
Inserted entity: Tesla Inc.
Inserted entity: X Corp
Inserted entity: The Boring Company
Inserted entity: xAI
Inserted entity: Neuralink
Inserted entity: OpenAI
Inserted entity: University of Pennsylvania
Inserted entity: Zip2
Inserted entity: PayPal
Inserted entity: Compaq
Inserted entity: eBay
Inserted relationship: relationship1
Inserted relationship: relationship2
Inserted relationship: relationship3
Inserted relationship: relationship4
Inserted relationship: relationship5
Inserted relationship: relationship6
Inserted relationship: relationship7
Inserted relationship: relationship8
Inserted relationship: relationship9
Inserted relationship: relationship10
Inserted relationship: relationship11
Inserted relationship: relationship12
Inserted entity: Elon Musk
Inserted entity: SolarCity
Inserted entity: T

# query through graph - future GRAPH RAG

In [81]:
from langchain_experimental.graph_transformers import LLMGraphTransformer
llm_transformer=LLMGraphTransformer(llm=llm)

In [None]:
from langchain_groq import ChatGroq
dotenv.load_dotenv()
llm=ChatGroq(groq_api_key=os.getenv('GROQ_API_KEY'),
             model=os.getenv('GROQ_MODEL'),
            #  max_tokens=1000
             )

graph=Neo4jGraph(url=os.getenv('NEO4J_URI_ONLINE'),
                 username=os.getenv('NEO4J_USERNAME_ONLINE'),
                 password=os.getenv('NEO4J_PASSWORD_ONLINE')
                #  database=os.getenv('NEO4J_DATABASE')
)


In [21]:
from langchain.chains import GraphCypherQAChain
chain=GraphCypherQAChain.from_llm(llm=llm,graph=graph,verbose=True,allow_dangerous_requests=True)
chain

GraphCypherQAChain(verbose=True, graph=<langchain_community.graphs.neo4j_graph.Neo4jGraph object at 0x000001BE3624AAA0>, cypher_generation_chain=LLMChain(verbose=False, prompt=PromptTemplate(input_variables=['question', 'schema'], input_types={}, partial_variables={}, template='Task:Generate Cypher statement to query a graph database.\nInstructions:\nUse only the provided relationship types and properties in the schema.\nDo not use any other relationship types or properties that are not provided.\nSchema:\n{schema}\nNote: Do not include any explanations or apologies in your responses.\nDo not respond to any questions that might ask anything else than for you to construct a Cypher statement.\nDo not include any text except the generated Cypher statement.\n\nThe question is:\n{question}'), llm=AzureChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000001BE3B727790>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001BE3E48AE60>, 

In [20]:
import json
with open('neo4j_nodes&edges.json','w') as file:
    json.dump(results,file,indent=1)