In [22]:
import nomic
from nomic import embed
import numpy as np
import os

import base64
import ollama
from ollama import chat
from langchain_experimental.graph_transformers.llm import LLMGraphTransformer
#from langchain.schema import Document
from langchain_ollama.llms import OllamaLLM
from langchain_community.graphs import Neo4jGraph
from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship
from langchain_core.documents import Document
#from langchain_community.graphs.graph_store import GraphStore
#from langchain_community.graphs.graph_writer import GraphWriter

from neo4j import GraphDatabase

from llama_index.core import StorageContext
from llama_index.graph_stores.neo4j import Neo4jGraphStore
from llama_index.core import Settings

from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import KnowledgeGraphRAGRetriever
from llama_index.core.response_synthesizers import get_response_synthesizer


In [47]:
#import deeplake
#ds = deeplake.load('hub://activeloop/flickr30k')

In [23]:
NEO4J_SERVER_URL = "bolt://localhost:7687"
NEO4J_DB_NAME= "ragdb"
NEO4J_LOGIN = os.environ['NEO4J_USER_LOGIN']
#NEO4J_LOGIN = "dituser"
NEO4J_PWD = os.environ['NEO4J_USER_PWD']

In [24]:
TEXT_EMBEDDING_MODEL = 'DC1LEX/nomic-embed-text-v1.5-multimodal'
VISION_EMBEDDING_MODEL = 'nomic-embed-vision-v1.5'
MULTIMODAL_INFERENCE_MODEL = "gemma3:4b"

In [25]:
IMAGES = [
        "Man.jpg",
        "People.jpg",
    ]

In [26]:
def encode_images(img_paths):
    encoded_images = []
    for img_path in img_paths:
        with open(img_path, "rb") as f:
            encoded_images.append(base64.b64encode(f.read()).decode("utf-8"))
    
    return encoded_images



In [27]:
ollama.pull(TEXT_EMBEDDING_MODEL)
ollama.pull(MULTIMODAL_INFERENCE_MODEL)

ProgressResponse(status='success', completed=None, total=None, digest=None)

In [28]:
def get_img_embedding(img_path):
    nomic_api_key = os.environ['NOMIC_API_KEY'] 

    nomic.login(nomic_api_key)

    output = embed.image(
        images=[img_path
        ],
        model=VISION_EMBEDDING_MODEL,
    )

    #print(output['usage'])
    img_embeddings = np.array(output['embeddings'])
    #print(img_embeddings)
    #print(img_embeddings.shape)
    
    return img_embeddings[0]


In [29]:
def get_text_embedding(txt):
    response = ollama.embed(model=TEXT_EMBEDDING_MODEL, input=txt)
    txt_embedding = response["embeddings"]
    return txt_embedding[0]

In [30]:
def get_node_from_text_prop(graph_doc, txt):
    #result = None
    for node in graph_doc.nodes:
        txt_value = node.properties.get("text")
        if txt_value is not None and txt_value == txt:
            return node
    return None
            

In [40]:
def llm_txt_search(txt_search, ctx):
    response = chat(
        model=MULTIMODAL_INFERENCE_MODEL,
        messages=[
            {
                "role": "user",
                "content": f"Answer the question while taking into account the context. \n question:{txt_search} \n context:{ctx}",
                #"images":[img_b64]
            }
        ]
        
    )

    result = response['message']['content']
    return result

In [38]:
def format_context_for_rag(neo4j_results):
        context_list = []
        for record in neo4j_results:
            # Extract relevant information from the record
            source_entity = record["source_id"]
            relationship = record["r"]
            neighbor = record["dest_id"]

            # Format the extracted information into a string or structured data
            # suitable for your RAG model
            """context_string = (
                f"Source Entity: {source_entity.get('id', 'N/A')}, "
                f"Relationship: {relationship.type}, "
                f"Related Entity: {neighbor.get('id', 'N/A')}"
            )"""
            context_string = f"{source_entity} {relationship.type} {neighbor}"
            
            context_list.append(context_string)
        return "\n".join(context_list)

In [55]:
imgs_b64 = encode_images(IMAGES)

image_descriptions = []

for img_b64 in imgs_b64:
    print(img_b64)
    response = chat(
        model=MULTIMODAL_INFERENCE_MODEL,
        messages=[
            {
                "role": "user",
                "content": "Describe all people, organizations, and events in this image.",
                "images":[img_b64]
            }
        ]
        
    )

    image_description = response['message']['content']
    image_descriptions.append(image_description)

print(image_descriptions)

/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBxISEhUSEBAWFhUVFhAQEBIVFRcPEBUVFRcWFxUVFhUYHSggGBolGxYVITEhJSkrLi4uFx8zODMtNygtLisBCgoKDg0OGhAQGi0dHyUtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0rKystLS0tLS0tLSstLf/AABEIALIBGwMBIgACEQEDEQH/xAAcAAABBQEBAQAAAAAAAAAAAAADAAECBAUGBwj/xAA+EAABAwIEAgcECAQHAQAAAAABAAIRAwQSITFBBVEGEyJhcYGRMqGxwQcUI0JSgtHwM3Lh8RVDYnOSosLD/8QAGQEAAwEBAQAAAAAAAAAAAAAAAAECAwQF/8QAJhEAAgICAgICAgIDAAAAAAAAAAECEQMSITEEQSJREzJhgRQzcf/aAAwDAQACEQMRAD8A9PCkohTSGBKkFEqYQAwTlMFIpgME6YJ0AIJ0wToARCjWqtY0ue4NA1JMBc70o6YUbQ9WA6pWMYKbRJk6Z/Jcnf8ABeLXrZq1BTBwkMc4taPytBzz3QKzpeI/SFZUgYeXkfhGXkSs1n0pW+9F3d2h+kyuZqfRZWGf1hhJ1ycP7oB+jOuD/Fb3RKfAU2ekcF6a21dvbPUumMLzI7od+vNdM0zmF4HV6BXrCCwtcf5oP6Lu+iXGrm1Ao39NzWZNZVPaY2NpGyOPQ+V2ehoVQqbHggEGQcwUKuMkgB40g1ZxuYdBV6jVlTYFhiK1CaUVqoZYpIwQaSMExCSSSSASSSSAEkkkgBJJJIASSSSYDJJ0kAZYUlEKRSGCKmEJ2qICgQk5TAqRQBEKSYKSAGXL9NuMvphlvbH7esQGxnhbuTy/uuohefWP2t9cXJmJNOlPJsNJHccJPmk3Q6vg1eAcHo23a9usfbrP7T5OuEn2Qtj6yFkit2oVqnTJWayWX+NIuProT6oQzRKBcNMFNyY1BEal4J2U

In [56]:
# 1. Your normal Ollama LLM for graph extraction
llm = OllamaLLM(model=MULTIMODAL_INFERENCE_MODEL, temperature=0.0)

transformer = LLMGraphTransformer(
    llm=llm,
    #allowed_nodes=["Person", "Organization", "Event"],
    #node_properties=True
)

# 3) Prepare docs
#image docs 
img_docs = [Document(page_content=img_desc, metadata={"url": img_url, "embedding":get_text_embedding(img_desc)}) for img_desc, img_url in zip(image_descriptions, IMAGES)]
#Text docs then append img_docs
doc_text = [
    "Alice works at Acme Corp. She organized a conference in April 2024.",
    "Bob and Carol co-founded OpenWidgets, a startup focused on widgets.",
]

docs = [Document(page_content=txt, metadata={"embedding":get_text_embedding(txt)}) for txt in doc_text]

docs.extend(img_docs)


# 3. Extract graph
graph_docs = transformer.convert_to_graph_documents(docs)



In [63]:
# Add embeddings to the nodes of the graph
for graph_doc in graph_docs:
    
    for node in graph_doc.nodes:
        node_text = node.properties.get("text")
        if node_text is not None :
            node.properties["embedding"] = get_text_embedding(node_text)
        else : 
            node_id = node.id
            if node_id is not None :
                node.properties["embedding"] = get_text_embedding(node_id)



In [64]:
# Add Image nodes with properties like embedding then relate to the rest of the graph
img_id = 0

for graph_doc in graph_docs:
    # If the source of the graphDoc is the description of an image
    graph_source = graph_doc.source
    img_url = graph_source.metadata.get("url") 
       
    if img_url is not None:   
        #print(img_url) 
        # Créer un noeud image avec l'URL en question et l'embedding de l'image
        img_node = Node(id=f"img_{img_id}", type="Image", properties={"url": img_url, "embedding":get_img_embedding(img_url)})   
        
        new_relationships = []

        # Lier l'image à tous les noeuds du grapheDoc
        for node in graph_doc.nodes:
            new_relationships.append(Relationship(source=img_node,target=node, type="contains"))
        
        graph_doc.nodes.append(img_node)
        graph_doc.relationships.extend(new_relationships)
        img_id += 1

    #print(graph_doc)

In [None]:
# Store Knowledge Graph in Neo4j

graph_store = Neo4jGraph(url=NEO4J_SERVER_URL, username=NEO4J_LOGIN, password=NEO4J_PWD, database=NEO4J_DB_NAME)
#graph_store.write_graph(graph_docs)

#graph_store.
graph_store.add_graph_documents(graph_docs, include_source=True)

## Requête incluant le RAG

In [11]:
# Create a driver instance
driver = GraphDatabase.driver(uri=NEO4J_SERVER_URL, database=NEO4J_DB_NAME, auth=(NEO4J_LOGIN,NEO4J_PWD))

# Function to run a Cypher query
def run_query(query, parameters=None):
    with driver.session() as session:
        result = session.run(query, parameters or {})
        return [record for record in result]

In [44]:
# Query 1
search1 = "Where does Alice work?"

# Query 2
search2 = "Who works for OpenWidgets?"

search3 = "black"
search4 = "describe happy people"

In [45]:


# Example: match all nodes
search = search4

query = """
MATCH (a)-[r]->(b)
WITH a, b, gds.similarity.cosine(a.embedding, $query_embeddings) AS similarity, r
ORDER BY similarity DESC
LIMIT $top_k
RETURN a.id AS source_id,  a.url as source_url, similarity, r, b.id AS dest_id,  b.url as dest_url
"""


params = {"query_embeddings": get_text_embedding(search), "top_k":3}
results = run_query(query, params)

# Print results
for record in results:
    print(record)

search_ctx = format_context_for_rag(results)

print(llm_txt_search(search,search_ctx))

# Close driver when done
driver.close()

  with driver.session() as session:


<Record source_id='c20523608421863c3a78ff4411086b8c' source_url='People.jpg' similarity=0.610788709757223 r=<Relationship element_id='5:80c2cade-092b-451d-8eb1-e9319a127826:50' nodes=(<Node element_id='4:80c2cade-092b-451d-8eb1-e9319a127826:24' labels=frozenset() properties={}>, <Node element_id='4:80c2cade-092b-451d-8eb1-e9319a127826:25' labels=frozenset() properties={}>) type='MENTIONS' properties={}> dest_id='Group of Friends' dest_url=None>
<Record source_id='c20523608421863c3a78ff4411086b8c' source_url='People.jpg' similarity=0.610788709757223 r=<Relationship element_id='5:80c2cade-092b-451d-8eb1-e9319a127826:51' nodes=(<Node element_id='4:80c2cade-092b-451d-8eb1-e9319a127826:24' labels=frozenset() properties={}>, <Node element_id='4:80c2cade-092b-451d-8eb1-e9319a127826:26' labels=frozenset() properties={}>) type='MENTIONS' properties={}> dest_id='Woman with red hair' dest_url=None>
<Record source_id='c20523608421863c3a78ff4411086b8c' source_url='People.jpg' similarity=0.610788709

In [None]:
"""neo4j_graph_store = Neo4jGraphStore(
    url="bolt://localhost:7687",
    username=neo4j_login,
    password=neo4j_pwd
)

storage_context = StorageContext.from_defaults(graph_store=neo4j_graph_store)
Settings.llm = llm"""

In [None]:
#graph_rag_retriever = KnowledgeGraphRAGRetriever(storage_context=storage_context,verbose=True)
#query_engine = RetrieverQueryEngine.from_args(graph_rag_retriever)

  graph_rag_retriever = KnowledgeGraphRAGRetriever(storage_context=storage_context,verbose=True)


In [None]:
"""def query_and_synthesize(query):
    retrieved_context = query_engine.query(query)
    response = get_response_synthesizer.synthesize(query, retrieved_context)
    print(f"Query: {query}")
    print(f"Answer: {response}\n")"""

In [None]:
"""# Initialize the ResponseSynthesizer instance
response_synthesizer = get_response_synthesizer(llm)

# Query 1
query_and_synthesize("Where does Alice work?")

# Query 2
query_and_synthesize("Who works for OpenWidgets?")

# Query 3
query_and_synthesize("Does Bob work for the same company as Alice ?")"""

In [None]:
"""documents = [
  "Llamas are members of the camelid family meaning they're pretty closely related to vicuñas and camels",
  "Llamas were first domesticated and used as pack animals 4,000 to 5,000 years ago in the Peruvian highlands",
  "Llamas can grow as much as 6 feet tall though the average llama between 5 feet 6 inches and 5 feet 9 inches tall",
  "Llamas weigh between 280 and 450 pounds and can carry 25 to 30 percent of their body weight",
  "Llamas are vegetarians and have very efficient digestive systems",
  "Llamas live to be about 20 years old, though some only live for 15 years and others live to be 30 years old",
]

txt_embeddings = []

for i, d in enumerate(documents):
  response = ollama.embed(model=TEXT_EMBEDDING_MODEL, input=d)
  txt_embedding = response["embeddings"]

  txt_embeddings.append(txt_embedding)
  
  print(txt_embedding)
  print(len(txt_embedding[0]))"""