# Requirements

In [None]:
!pip install -r requirements.txt

# Costruzione Knowledge Graph

### Import

In [16]:
import asyncio
import logging.config
import os

from dotenv import load_dotenv
from neo4j import GraphDatabase
from neo4j_graphrag.embeddings import OpenAIEmbeddings
from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import (
    FixedSizeSplitter,
)
from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
from neo4j_graphrag.llm.openai_llm import OpenAILLM

from rdflib import Graph

load_dotenv()

True

### Livello di debug

In [2]:
# Set log level to DEBUG for all neo4j_graphrag.* loggers
logging.config.dictConfig(
    {
        "version": 1,
        "handlers": {
            "console": {
                "class": "logging.StreamHandler",
            }
        },
        "loggers": {
            "root": {
                "handlers": ["console"],
            },
            "neo4j_graphrag": {
                "level": "DEBUG",
            },
        },
    }
)

### Connessione a Neo4j

In [12]:
# Connect to the Neo4j database
URI = os.getenv("NEO4J_URI")
AUTH = (os.getenv("NEO4J_USERNAME"), os.getenv("NEO4J_PASSWORD"))
driver = GraphDatabase.driver(URI, auth=AUTH)

### Inizializzazione Splitter ed Embedder

In [18]:
#text_splitter = FixedSizeSplitter(chunk_size=150, chunk_overlap=20) non splitto perchè divide parole a metà e non è utile 


openaiKey = os.getenv("OPENAI_API_KEY")
print(openaiKey)
embedder = OpenAIEmbeddings(model="text-embedding-3-large")

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


### Schema grafo

In [None]:
#Schema ridotto 
# - le proprietà vengono ignorate
# - le relazioni e le entità inserite sono molto d'aiuto per realizzare il grafo
# - andrà sostituito con un'ontologia complessa

entities = {
    "Persona": {
        "properties": ["età", "nome", "genere"]
    },
    "Animale": {
        "properties": ["nome", "specie", "età"]
    },
    "Luogo": {
        "properties": ["nome", "descrizione", "coordinata_geografica"]
    },
    "Attività": {
        "properties": ["descrizione", "categoria"]
    },
    "Oggetto": {
        "properties": ["nome", "descrizione", "materiale"]
    },
    "Lavoro": {
        "properties": ["descrizione", "settore"]
    },
    "Hobby": {
        "properties": ["descrizione", "frequenza"]
    },
    "Emozione": {
        "properties": ["tipo", "intensità"]
    },
}
relations = [
    "haAmico", 
    "viveIn", 
    "haPassione", 
    "haLavoro", 
    "haHobby", 
    "haAnimale", 
    "partecipaAAttività",
    "haSorella",
    "haFiglio",
    "haNipote",
    "possiede",
    "sente",
    "visita",
    "usa",
    "lavoraIn",
]
potential_schema = [
    ("Persona", "sente", "Emozione"),
    ("Persona", "haAmico", "Persona"),
    ("Persona", "viveIn", "Luogo"),
    ("Persona", "haPassione", "Attività"),
    ("Persona", "haLavoro", "Lavoro"),
    ("Persona", "haHobby", "Hobby"),
    ("Persona", "haAnimale", "Animale"),
    ("Persona", "possiede", "Oggetto"),
    ("Persona", "partecipaAAttività", "Attività"),
    ("Persona", "haSorella", "Persona"),
    ("Persona", "haFiglio", "Persona"),
    ("Persona", "haNipote", "Persona"),
    ("Persona", "visita", "Luogo"),
    ("Persona", "usa", "Oggetto"),
    ("Persona", "lavoraIn", "Luogo"),
]

### Schema da Ontologia

In [None]:
from schemaFromOnto import getSchemaFromOnto
g = Graph()
neo4j_schema = getSchemaFromOnto("ontos/testOnt.ttl")   #per funzionare non ci devono essere gli unionOf
# entities = list(neo4j_schema.entities.values())
# relations = list(neo4j_schema.relations.values())
# potential_schema = neo4j_schema.potential_schema


print(neo4j_schema.entities)
print(neo4j_schema.relations)
print(neo4j_schema.potential_schema)


{'Event': {'label': 'Event', 'description': 'A specific occurrence that happens at a given time and place.', 'properties': [{'name': 'hasDescription', 'type': 'STRING', 'description': 'A textual description of an entity.'}]}, 'Activity': {'label': 'Activity', 'description': 'A task or action performed as part of an event or independently.', 'properties': []}, 'PlannedActivity': {'label': 'PlannedActivity', 'description': 'An activity that is scheduled to take place in the future.', 'properties': []}, 'Location': {'label': 'Location', 'description': 'A physical or virtual place where an event or activity takes place.', 'properties': []}, 'Time': {'label': 'Time', 'description': 'A specific time reference associated with an event or activity.', 'properties': [{'name': 'hasDate', 'type': 'STRING', 'description': 'The date when an event or activity occurs.'}, {'name': 'hasTime', 'type': 'STRING', 'description': 'The specific time of an event or activity.'}]}, 'Person': {'label': 'Person', 

### Importa Ontologia

In [58]:
from rdflib import Graph
from rdflib_neo4j import Neo4jStoreConfig, Neo4jStore, HANDLE_VOCAB_URI_STRATEGY


auth_data = {'uri': os.getenv("NEO4J_URI"),
             'database': "neo4j",
             'user': os.getenv("NEO4J_USERNAME"),
             'pwd': os.getenv("NEO4J_PASSWORD")}

# Define your custom mappings & store config
config = Neo4jStoreConfig(auth_data=auth_data,
                          handle_vocab_uri_strategy=HANDLE_VOCAB_URI_STRATEGY.IGNORE,
                          batching=True)

file_path = './proto/ontos/eventOntology.ttl'

# Create the RDF Graph, parse & ingest the data to Neo4j, and close the store(If the field batching is set to True in the Neo4jStoreConfig, remember to close the store to prevent the loss of any uncommitted records.)
g = Graph(store=Neo4jStore(config=config))
# Calling the parse method will implictly open the store
g.parse(file_path, format="ttl")
g.close(True)

Uniqueness constraint on :Resource(uri) is created.
The store is now: Open
The store is now: Closed
IMPORTED 113 TRIPLES


### LLM

In [39]:
llm = OpenAILLM(
    model_name="gpt-4o",
    model_params={
        "max_tokens": 2000,
        "response_format": {"type": "json_object"},
        "temperature": 0.0,
        "seed": 123
    },
)

### Pipeline per la costruzione
Utilizzo di una pipeline già presente per la costruzione di un grafo 

In [48]:
pipeline = SimpleKGPipeline(
    driver=driver,
    #text_splitter=text_splitter, non utilizzato
    embedder=embedder,
    entities=neo4j_schema.entities,
    relations=neo4j_schema.relations,
    potential_schema=neo4j_schema.potential_schema,
    llm=llm,
    on_error="IGNORE",
    from_pdf=False,
)

### Esecuzione (non possibile nel notebook)

In [None]:
with open('content/day1.txt', 'r') as file:
   content = file.read().replace('\n', '')

asyncio.run(
    pipeline.run_async(
        text= content
    )
)

driver.close()

# QA sul grafo

### Import

In [None]:
from neo4j_graphrag.embeddings.openai import OpenAIEmbeddings
from neo4j_graphrag.retrievers import HybridRetriever
from neo4j_graphrag.llm import OpenAILLM
from neo4j_graphrag.generation import GraphRAG
from neo4j import GraphDatabase

import os
from dotenv import load_dotenv
load_dotenv()

### Connessione al grafo

In [None]:
uri = os.getenv("NEO4J_URI")
username = os.getenv("NEO4J_USERNAME")
password = os.getenv("NEO4J_PASSWORD")
driver = GraphDatabase.driver(uri, auth=(username, password))

### Embedder (lo stesso)

In [None]:
embedder = OpenAIEmbeddings(model="text-embedding-3-large")

### HybridRetriever (ricerca ibrida)
Presuppone l'esistenza del vector index sugli embedding dei chunk: 

CREATE VECTOR INDEX textChuck IF NOT EXISTS
FOR (c:Chunk)
ON c.embedding
OPTIONS {indexConfig: {
 `vector.dimensions`: 3072,
 `vector.similarity_function`: 'cosine'
}}

e del fulltext index sul test dei chunk:
CREATE FULLTEXT INDEX textFulltext IF NOT EXISTS
FOR (c:Chunk)
ON EACH [c.text]


In [None]:
retriever = HybridRetriever(
    driver=driver,
    vector_index_name="textChuck",
    fulltext_index_name="textFulltext",
    embedder=embedder,
    return_properties=["text"],
)

### GraphRAG

In [None]:
from neo4j_graphrag.generation import GraphRAG

llm = OpenAILLM(model_name="gpt-4o", model_params={"temperature": 0})
rag = GraphRAG(retriever=retriever, llm=llm)

### QA

In [None]:
q = "chi è Alex"
response = rag.search(query_text=q, retriever_config={"top_k": 5})
print(response.answer)